angr/patcherex

Non-existent labels added during reassembly

symike opened this issue · 8 comments

Using the reassembler backend with 0 patches (i.e. try to just generate a new binary from the old one with no modifications), the temporary .s file that's generated by patcherex refers to labels that are not defined, usually "label_0". I've used the reassembler backend in the way described in the README example.

The test program is a simple 64-bit ELF binary, the result of a C program compiled with gcc on 64-bit Linux. I attached the C source, binary, and resulting temporary assembly* that patcherex generates. This is not limited to this binary, but missing labels are in every executable that I've tried (all 64-bit ELF).

The * is that the temporary .s file has one immediate value manually changed (at 0x400439, from 0xfffffff0 to 0xfffffffffffffff0), in order to have the assembler (clang, the one used by the assemble script in compilerex -- though the same behaviour is seen for other assemblers) not complain (related to #2).

I believe the label_XX labels are inserted by patcherex (or a submodule like compilerex), and shouldn't need to be linked from any library, but label_0 doesn't appear anywhere.

	.section	.init
	.align	8
	#Procedure 0x4003c8

	# 0x4003c8:	subq	$8, %rsp [IMM, REG]
	.globl _init
	.type _init, @function
_init:
	subq	$8, %rsp
	# 0x4003cc:	movq	0x200c25(%rip), %rax [MEM, REG]
	movq	.label_0(%rip),  %rax
	# 0x4003d3:	testq	%rax, %rax [REG, REG]
	testq	%rax, %rax
	# 0x4003d6:	je	0x4003dd [IMM <CODEREF>]
	je	.label_1
	# 0x4003d8:	callq	0x400420 [IMM <CODEREF>]
	callq	.label_2
	# 0x4003dd:	addq	$8, %rsp [IMM, REG]
.label_1:
	addq	$8, %rsp
	# 0x4003e1:	retq	 []
	retq	
	.section	.plt.got
	.align	32
	#Procedure 0x400420

	# 0x400420:	jmpq	*0x200bd2(%rip) [MEM]
.label_2:
	jmpq	*.label_0(%rip)
	.section	.text
	.align	16
	#Procedure 0x400430
	.globl _start
	.type _start, @function
_start:

	# 0x400430:	xorl	%ebp, %ebp [REG, REG]
	xorl	%ebp, %ebp
	# 0x400432:	movq	%rdx, %r9 [REG, REG]
	movq	%rdx, %r9
	# 0x400435:	popq	%rsi [REG]
	popq	%rsi
	# 0x400436:	movq	%rsp, %rdx [REG, REG]
	movq	%rsp, %rdx
	# 0x400439:	andq	$0xfffffff0, %rsp [IMM, REG]
	andq	$0xfffffffffffffff0, %rsp
	# 0x40043d:	pushq	%rax [REG]
	pushq	%rax
	# 0x40043e:	pushq	%rsp [REG]
	pushq	%rsp
	# 0x40043f:	movq	$0x4005f0, %r8 [IMM <CODEREF>, REG]
	movq	$__libc_csu_fini,  %r8
	# 0x400446:	movq	$0x400580, %rcx [IMM <CODEREF>, REG]
	movq	$__libc_csu_init,  %rcx
	# 0x40044d:	movq	$0x400526, %rdi [IMM <CODEREF>, REG]
	movq	$main,  %rdi
	# 0x400454:	callq	0x400410 [IMM <CODEREF>]
	callq	__libc_start_main
	.section	.text
	.align	16
	#Procedure 0x400459
	.globl sub_400459
	.type sub_400459, @function
sub_400459:

	# 0x400459:	hlt	 []
	hlt	
	.section	.text
	.align	16
	#Procedure 0x400460

	# 0x400460:	movl	$0x60103f, %eax [IMM <DATAREF>, REG]
	.globl deregister_tm_clones
	.type deregister_tm_clones, @function
deregister_tm_clones:
	movl	$label_4,  %eax
	# 0x400465:	pushq	%rbp [REG]
	pushq	%rbp
	# 0x400466:	subq	$0x601038, %rax [IMM <DATAREF>, REG]
	subq	__TMC_END__,  %rax
	# 0x40046c:	cmpq	$0xe, %rax [IMM, REG]
	cmpq	$0xe, %rax
	# 0x400470:	movq	%rsp, %rbp [REG, REG]
	movq	%rsp, %rbp
	# 0x400473:	jbe	0x400490 [IMM <CODEREF>]
	jbe	.label_3
	# 0x400475:	movl	$0, %eax [IMM, REG]
	movl	$0, %eax
	# 0x40047a:	testq	%rax, %rax [REG, REG]
	testq	%rax, %rax
	# 0x40047d:	je	0x400490 [IMM <CODEREF>]
	je	.label_3
	# 0x40047f:	popq	%rbp [REG]
	popq	%rbp
	# 0x400480:	movl	$0x601038, %edi [IMM <DATAREF>, REG]
	movl	$__TMC_END__,  %edi
	# 0x400485:	jmpq	*%rax [REG]
	jmpq	*%rax
	# 0x400487:	nopw	(%rax, %rax) [MEM]
	nopw	(%rax, %rax)
	# 0x400490:	popq	%rbp [REG]
.label_3:
	popq	%rbp
	# 0x400491:	retq	 []
	retq	
	.section	.text
	.align	16
	#Procedure 0x4004a0

	# 0x4004a0:	movl	$0x601038, %esi [IMM <DATAREF>, REG]
	.globl register_tm_clones
	.type register_tm_clones, @function
register_tm_clones:
	movl	$__TMC_END__,  %esi
	# 0x4004a5:	pushq	%rbp [REG]
	pushq	%rbp
	# 0x4004a6:	subq	$0x601038, %rsi [IMM <DATAREF>, REG]
	subq	$__TMC_END__,  %rsi
	# 0x4004ad:	sarq	$3, %rsi [IMM, REG]
	sarq	$3, %rsi
	# 0x4004b1:	movq	%rsp, %rbp [REG, REG]
	movq	%rsp, %rbp
	# 0x4004b4:	movq	%rsi, %rax [REG, REG]
	movq	%rsi, %rax
	# 0x4004b7:	shrq	$0x3f, %rax [IMM, REG]
	shrq	$0x3f, %rax
	# 0x4004bb:	addq	%rax, %rsi [REG, REG]
	addq	%rax, %rsi
	# 0x4004be:	sarq	$1, %rsi [IMM, REG]
	sarq	$1, %rsi
	# 0x4004c1:	je	0x4004d8 [IMM <CODEREF>]
	je	.label_5
	# 0x4004c3:	movl	$0, %eax [IMM, REG]
	movl	$0, %eax
	# 0x4004c8:	testq	%rax, %rax [REG, REG]
	testq	%rax, %rax
	# 0x4004cb:	je	0x4004d8 [IMM <CODEREF>]
	je	.label_5
	# 0x4004cd:	popq	%rbp [REG]
	popq	%rbp
	# 0x4004ce:	movl	$0x601038, %edi [IMM <DATAREF>, REG]
	movl	$__TMC_END__,  %edi
	# 0x4004d3:	jmpq	*%rax [REG]
	jmpq	*%rax
	# 0x4004d5:	nopl	(%rax) [MEM]
	nopl	(%rax)
	# 0x4004d8:	popq	%rbp [REG]
.label_5:
	popq	%rbp
	# 0x4004d9:	retq	 []
	retq	
	.section	.text
	.align	16
	#Procedure 0x4004e0

	# 0x4004e0:	cmpb	$0, 0x200b51(%rip) [IMM, MEM]
	.globl __do_global_dtors_aux
	.type __do_global_dtors_aux, @function
__do_global_dtors_aux:
	cmpb	$0, __TMC_END__(%rip)
	# 0x4004e7:	jne	0x4004fa [IMM <CODEREF>]
	jne	.label_6
	# 0x4004e9:	pushq	%rbp [REG]
	pushq	%rbp
	# 0x4004ea:	movq	%rsp, %rbp [REG, REG]
	movq	%rsp, %rbp
	# 0x4004ed:	callq	0x400460 [IMM <CODEREF>]
	callq	deregister_tm_clones
	# 0x4004f2:	popq	%rbp [REG]
	popq	%rbp
	# 0x4004f3:	movb	$1, 0x200b3e(%rip) [IMM, MEM]
	movb	$1, __TMC_END__(%rip)
	# 0x4004fa:	retq	 []
.label_6:
	retq	
	.section	.text
	.align	16
	#Procedure 0x400500

	# 0x400500:	movl	$0x600e20, %edi [IMM <DATAREF>, REG]
	.globl frame_dummy
	.type frame_dummy, @function
frame_dummy:
	movl	$__JCR_END__,  %edi
	# 0x400505:	cmpq	$0, (%rdi) [IMM, MEM]
	cmpq	$0, (%rdi)
	# 0x400509:	jne	0x400510 [IMM <CODEREF>]
	jne	.label_7
	# 0x40050b:	jmp	0x4004a0 [IMM <CODEREF>]
.label_8:
	jmp	register_tm_clones
	# 0x400510:	movl	$0, %eax [IMM, REG]
.label_7:
	movl	$0, %eax
	# 0x400515:	testq	%rax, %rax [REG, REG]
	testq	%rax, %rax
	# 0x400518:	je	0x40050b [IMM <CODEREF>]
	je	.label_8
	# 0x40051a:	pushq	%rbp [REG]
	pushq	%rbp
	# 0x40051b:	movq	%rsp, %rbp [REG, REG]
	movq	%rsp, %rbp
	# 0x40051e:	callq	*%rax [REG]
	callq	*%rax
	# 0x400520:	popq	%rbp [REG]
	popq	%rbp
	# 0x400521:	jmp	0x4004a0 [IMM <CODEREF>]
	jmp	register_tm_clones
	.section	.text
	.align	16
	#Procedure 0x400526

	# 0x400526:	pushq	%rbp [REG]
	.globl main
	.type main, @function
main:
	pushq	%rbp
	# 0x400527:	movq	%rsp, %rbp [REG, REG]
	movq	%rsp, %rbp
	# 0x40052a:	subq	$0x20, %rsp [IMM, REG]
	subq	$0x20, %rsp
	# 0x40052e:	movl	%edi, -0x14(%rbp) [REG, MEM]
	movl	%edi, -0x14(%rbp)
	# 0x400531:	movq	%rsi, -0x20(%rbp) [REG, MEM]
	movq	%rsi, -0x20(%rbp)
	# 0x400535:	movl	$0x499602d2, -0xc(%rbp) [IMM, MEM]
	movl	$0x499602d2, -0xc(%rbp)
	# 0x40053c:	movl	-0xc(%rbp), %eax [MEM, REG]
	movl	-0xc(%rbp), %eax
	# 0x40053f:	movl	%eax, %esi [REG, REG]
	movl	%eax, %esi
	# 0x400541:	movl	$0x400604, %edi [IMM <DATAREF>, REG]
	movl	$label_9,  %edi
	# 0x400546:	movl	$0, %eax [IMM, REG]
	movl	$0, %eax
	# 0x40054b:	callq	0x400400 [IMM <CODEREF>]
	callq	printf
	# 0x400550:	movl	$0x423a35bd, -8(%rbp) [IMM, MEM]
	movl	$0x423a35bd, -8(%rbp)
	# 0x400557:	movl	-0xc(%rbp), %eax [MEM, REG]
	movl	-0xc(%rbp), %eax
	# 0x40055a:	subl	-8(%rbp), %eax [MEM, REG]
	subl	-8(%rbp), %eax
	# 0x40055d:	movl	%eax, -4(%rbp) [REG, MEM]
	movl	%eax, -4(%rbp)
	# 0x400560:	movl	-4(%rbp), %eax [MEM, REG]
	movl	-4(%rbp), %eax
	# 0x400563:	movl	%eax, %esi [REG, REG]
	movl	%eax, %esi
	# 0x400565:	movl	$0x400604, %edi [IMM <DATAREF>, REG]
	movl	$label_9,  %edi
	# 0x40056a:	movl	$0, %eax [IMM, REG]
	movl	$0, %eax
	# 0x40056f:	callq	0x400400 [IMM <CODEREF>]
	callq	printf
	# 0x400574:	movl	$0, %eax [IMM, REG]
	movl	$0, %eax
	# 0x400579:	leave	 []
	leave	
	# 0x40057a:	retq	 []
	retq	
	.section	.text
	.align	16
	#Procedure 0x400580

	# 0x400580:	pushq	%r15 [REG]
	.globl __libc_csu_init
	.type __libc_csu_init, @function
__libc_csu_init:
	pushq	%r15
	# 0x400582:	pushq	%r14 [REG]
	pushq	%r14
	# 0x400584:	movl	%edi, %r15d [REG, REG]
	movl	%edi, %r15d
	# 0x400587:	pushq	%r13 [REG]
	pushq	%r13
	# 0x400589:	pushq	%r12 [REG]
	pushq	%r12
	# 0x40058b:	leaq	0x20087e(%rip), %r12 [MEM, REG]
	leaq	__init_array_start(%rip),  %r12
	# 0x400592:	pushq	%rbp [REG]
	pushq	%rbp
	# 0x400593:	leaq	0x20087e(%rip), %rbp [MEM, REG]
	leaq	__init_array_end(%rip),  %rbp
	# 0x40059a:	pushq	%rbx [REG]
	pushq	%rbx
	# 0x40059b:	movq	%rsi, %r14 [REG, REG]
	movq	%rsi, %r14
	# 0x40059e:	movq	%rdx, %r13 [REG, REG]
	movq	%rdx, %r13
	# 0x4005a1:	subq	%r12, %rbp [REG, REG]
	subq	%r12, %rbp
	# 0x4005a4:	subq	$8, %rsp [IMM, REG]
	subq	$8, %rsp
	# 0x4005a8:	sarq	$3, %rbp [IMM, REG]
	sarq	$3, %rbp
	# 0x4005ac:	callq	0x4003c8 [IMM <CODEREF>]
	callq	_init
	# 0x4005b1:	testq	%rbp, %rbp [REG, REG]
	testq	%rbp, %rbp
	# 0x4005b4:	je	0x4005d6 [IMM <CODEREF>]
	je	.label_10
	# 0x4005b6:	xorl	%ebx, %ebx [REG, REG]
	xorl	%ebx, %ebx
	# 0x4005b8:	nopl	(%rax, %rax) [MEM]
	nopl	(%rax, %rax)
	# 0x4005c0:	movq	%r13, %rdx [REG, REG]
.label_11:
	movq	%r13, %rdx
	# 0x4005c3:	movq	%r14, %rsi [REG, REG]
	movq	%r14, %rsi
	# 0x4005c6:	movl	%r15d, %edi [REG, REG]
	movl	%r15d, %edi
	# 0x4005c9:	callq	*(%r12, %rbx, 8) [MEM]
	callq	*(%r12, %rbx, 8)
	# 0x4005cd:	addq	$1, %rbx [IMM, REG]
	addq	$1, %rbx
	# 0x4005d1:	cmpq	%rbp, %rbx [REG, REG]
	cmpq	%rbp, %rbx
	# 0x4005d4:	jne	0x4005c0 [IMM <CODEREF>]
	jne	.label_11
	# 0x4005d6:	addq	$8, %rsp [IMM, REG]
.label_10:
	addq	$8, %rsp
	# 0x4005da:	popq	%rbx [REG]
	popq	%rbx
	# 0x4005db:	popq	%rbp [REG]
	popq	%rbp
	# 0x4005dc:	popq	%r12 [REG]
	popq	%r12
	# 0x4005de:	popq	%r13 [REG]
	popq	%r13
	# 0x4005e0:	popq	%r14 [REG]
	popq	%r14
	# 0x4005e2:	popq	%r15 [REG]
	popq	%r15
	# 0x4005e4:	retq	 []
	retq	
	.section	.text
	.align	16
	#Procedure 0x4005f0

	# 0x4005f0:	retq	 []
	.globl __libc_csu_fini
	.type __libc_csu_fini, @function
__libc_csu_fini:
	retq	
	.section	.fini
	.align	4
	#Procedure 0x4005f4

	# 0x4005f4:	subq	$8, %rsp [IMM, REG]
	.globl _fini
	.type _fini, @function
_fini:
	subq	$8, %rsp
	# 0x4005f8:	addq	$8, %rsp [IMM, REG]
	addq	$8, %rsp
	# 0x4005fc:	retq	 []
	retq	
	.section .plt.got
	.align 32
	# data @ 0x400428
	.label_21:
	.section .text
	.align 16
	# data @ 0x4005f2
	.label_22:
	.section .rodata
	.align 32
	# data @ 0x400600
	.byte 1
	.byte 0
	.byte 2
	.byte 0
	# data @ 0x400604
label_9:
	.asciz "%d\n"
	.section .data
	.align 16
	# data @ 0x600e10
	.globl __init_array_start
	.type __init_array_start, @notype
__init_array_start:
	.quad frame_dummy
	.section .data
	.align 8
	# data @ 0x601028
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.section .bss
	.align 8
	# data @ 0x601038
	.globl __TMC_END__
	.type __TMC_END__, @object
__TMC_END__:
	.byte 0x0
	# data @ 0x601039
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
	.byte 0
label_4:
	.byte 0
	# data @ 0x601040
		.globl _end
	.type _end, @notype
_end:

print.zip

This is because you are using reassembler on binaries compiled with libc, and the public version of reassembler and compilerex does not have a good support of those.

We will release quite a few updates of reassembler, patcherex and compilerex after next week, which essentially address many issues, including this one.

PS: now every time I'm using the reassembler, I have to check out some old branches of angr, patcherex, and compilerex...

As for now, a temporary workaround is to manually remove unnecessary functions like _init. You can manually call remove_unnecessary_stuff() on a Reassembler backend to do that (here), or just manually modify the generated assembly code.

Thanks @ltfish!

I tried your suggested workaround, invoking backend._binary.remove_unnecessary_stuff() right before calling backend.save(). It helped get rid of a bunch of undefined references, but the linker still complained about a nonexistent label_0. There's indeed one reference left, and this is in a section that isn't in the "unnecessary" sections that are removed by a call to remove_unnecessary_stuff. It looks similar to a section in the original assembly I posted:


  # 0x400420: jmpq  *0x200bd2(%rip) [MEM]
.label_2:
  jmpq  *.label_0(%rip)
  .section  .text
  .align  16
  #Procedure 0x400459
  .globl sub_400459
  .type sub_400459, @function

If the updates you plan to push are functionally equivalent to your workaround, then it seems that the extra label_XX issue would still exist?

I believe label_2 (and the succeeding basic block which contains label_0) should be removed together with the _init function. It's part of the _init function.

Are any new releases ready? Even if they aren't perfect to fix this?

Hello! Is there any potential update for this? I believe this is exactly the same issue that I am having. Upon doing some more debugging, I have narrowed down the problem potentially to the assemble.sh

export CC=$DIR/bin/clang`
export LD=$DIR/bin/ld`
export CXX=$DIR/bin/clang++
export OBJCOPY=$DIR/bin/objcopy
export LDFLAGS="-nostdlib -static -Wl,-mcgc_i386 $LDFLAGS"

export PATH="$DIR/bin:$PATH"

function assemble() {
    $CC \
    $LDFLAGS $@ ;
}

assemble $@

incorrectly linking and loading the library, which is where all of those "undefined" references come from? It seems like compile.sh has both ELF and CGC compilation method as shown below

### ELF
CC=gcc

function compile() {
    $CC -isystem$DIR/include -Ilib/ -std=gnu99 -O3 -Wall -Wno-unused-variable -Wextra -Wshadow -Wwrite-strings -Wpointer-arith -Wstrict-overflow=4 $CFLAGS $LDFLAGS $@ $LDLIBS;
}

### CGC

#export CC=$DIR/bin/clang
#export LD=$DIR/bin/ld
#export CXX=$DIR/bin/clang++
#export OBJCOPY=$DIR/bin/objcopy

#export LDFLAGS="-nostdlib -static -Wl,-mcgc_i386 $LDFLAGS"
##export LDFLAGS="-nostdlib -static -Wl, $LDFLAGS"
#export  CFLAGS="-nostdlib -fno-builtin -nostdinc -isystem$DIR/include $CFLAGS"
#export LDLIBS="-L$DIR/lib -lboolector -llgl -lc -lcgc $LDLIBS"

#export PATH="$DIR/bin:$PATH"

#function compile() {
#    $CC -Ilib/ \
#        -std=gnu99 -Wall -Wno-incompatible-pointer-types-discards-qualifiers -Wno-unused-variable -Wno-unused-parameter -Wno-sign-compare -Wextra -Wshadow -Wwrite-strings -Wpointer-arith -Wstrict-overflow=4  \
#        $CFLAGS $LDFLAGS $@ $LDLIBS;
#}

compile $@

do we need to do a similar thing for assembling as well? Thank you for any update.

This issue has been marked as stale because it has no recent activity. Please comment or add the pinned tag to prevent this issue from being closed.

This issue has been closed due to inactivity.