Non-existent labels added during reassembly
symike opened this issue · 8 comments
Using the reassembler backend with 0 patches (i.e. try to just generate a new binary from the old one with no modifications), the temporary .s file that's generated by patcherex refers to labels that are not defined, usually "label_0". I've used the reassembler backend in the way described in the README example.
The test program is a simple 64-bit ELF binary, the result of a C program compiled with gcc on 64-bit Linux. I attached the C source, binary, and resulting temporary assembly* that patcherex generates. This is not limited to this binary, but missing labels are in every executable that I've tried (all 64-bit ELF).
The * is that the temporary .s file has one immediate value manually changed (at 0x400439, from 0xfffffff0 to 0xfffffffffffffff0), in order to have the assembler (clang, the one used by the assemble script in compilerex -- though the same behaviour is seen for other assemblers) not complain (related to #2).
I believe the label_XX labels are inserted by patcherex (or a submodule like compilerex), and shouldn't need to be linked from any library, but label_0 doesn't appear anywhere.
.section .init
.align 8
#Procedure 0x4003c8
# 0x4003c8: subq $8, %rsp [IMM, REG]
.globl _init
.type _init, @function
_init:
subq $8, %rsp
# 0x4003cc: movq 0x200c25(%rip), %rax [MEM, REG]
movq .label_0(%rip), %rax
# 0x4003d3: testq %rax, %rax [REG, REG]
testq %rax, %rax
# 0x4003d6: je 0x4003dd [IMM <CODEREF>]
je .label_1
# 0x4003d8: callq 0x400420 [IMM <CODEREF>]
callq .label_2
# 0x4003dd: addq $8, %rsp [IMM, REG]
.label_1:
addq $8, %rsp
# 0x4003e1: retq []
retq
.section .plt.got
.align 32
#Procedure 0x400420
# 0x400420: jmpq *0x200bd2(%rip) [MEM]
.label_2:
jmpq *.label_0(%rip)
.section .text
.align 16
#Procedure 0x400430
.globl _start
.type _start, @function
_start:
# 0x400430: xorl %ebp, %ebp [REG, REG]
xorl %ebp, %ebp
# 0x400432: movq %rdx, %r9 [REG, REG]
movq %rdx, %r9
# 0x400435: popq %rsi [REG]
popq %rsi
# 0x400436: movq %rsp, %rdx [REG, REG]
movq %rsp, %rdx
# 0x400439: andq $0xfffffff0, %rsp [IMM, REG]
andq $0xfffffffffffffff0, %rsp
# 0x40043d: pushq %rax [REG]
pushq %rax
# 0x40043e: pushq %rsp [REG]
pushq %rsp
# 0x40043f: movq $0x4005f0, %r8 [IMM <CODEREF>, REG]
movq $__libc_csu_fini, %r8
# 0x400446: movq $0x400580, %rcx [IMM <CODEREF>, REG]
movq $__libc_csu_init, %rcx
# 0x40044d: movq $0x400526, %rdi [IMM <CODEREF>, REG]
movq $main, %rdi
# 0x400454: callq 0x400410 [IMM <CODEREF>]
callq __libc_start_main
.section .text
.align 16
#Procedure 0x400459
.globl sub_400459
.type sub_400459, @function
sub_400459:
# 0x400459: hlt []
hlt
.section .text
.align 16
#Procedure 0x400460
# 0x400460: movl $0x60103f, %eax [IMM <DATAREF>, REG]
.globl deregister_tm_clones
.type deregister_tm_clones, @function
deregister_tm_clones:
movl $label_4, %eax
# 0x400465: pushq %rbp [REG]
pushq %rbp
# 0x400466: subq $0x601038, %rax [IMM <DATAREF>, REG]
subq __TMC_END__, %rax
# 0x40046c: cmpq $0xe, %rax [IMM, REG]
cmpq $0xe, %rax
# 0x400470: movq %rsp, %rbp [REG, REG]
movq %rsp, %rbp
# 0x400473: jbe 0x400490 [IMM <CODEREF>]
jbe .label_3
# 0x400475: movl $0, %eax [IMM, REG]
movl $0, %eax
# 0x40047a: testq %rax, %rax [REG, REG]
testq %rax, %rax
# 0x40047d: je 0x400490 [IMM <CODEREF>]
je .label_3
# 0x40047f: popq %rbp [REG]
popq %rbp
# 0x400480: movl $0x601038, %edi [IMM <DATAREF>, REG]
movl $__TMC_END__, %edi
# 0x400485: jmpq *%rax [REG]
jmpq *%rax
# 0x400487: nopw (%rax, %rax) [MEM]
nopw (%rax, %rax)
# 0x400490: popq %rbp [REG]
.label_3:
popq %rbp
# 0x400491: retq []
retq
.section .text
.align 16
#Procedure 0x4004a0
# 0x4004a0: movl $0x601038, %esi [IMM <DATAREF>, REG]
.globl register_tm_clones
.type register_tm_clones, @function
register_tm_clones:
movl $__TMC_END__, %esi
# 0x4004a5: pushq %rbp [REG]
pushq %rbp
# 0x4004a6: subq $0x601038, %rsi [IMM <DATAREF>, REG]
subq $__TMC_END__, %rsi
# 0x4004ad: sarq $3, %rsi [IMM, REG]
sarq $3, %rsi
# 0x4004b1: movq %rsp, %rbp [REG, REG]
movq %rsp, %rbp
# 0x4004b4: movq %rsi, %rax [REG, REG]
movq %rsi, %rax
# 0x4004b7: shrq $0x3f, %rax [IMM, REG]
shrq $0x3f, %rax
# 0x4004bb: addq %rax, %rsi [REG, REG]
addq %rax, %rsi
# 0x4004be: sarq $1, %rsi [IMM, REG]
sarq $1, %rsi
# 0x4004c1: je 0x4004d8 [IMM <CODEREF>]
je .label_5
# 0x4004c3: movl $0, %eax [IMM, REG]
movl $0, %eax
# 0x4004c8: testq %rax, %rax [REG, REG]
testq %rax, %rax
# 0x4004cb: je 0x4004d8 [IMM <CODEREF>]
je .label_5
# 0x4004cd: popq %rbp [REG]
popq %rbp
# 0x4004ce: movl $0x601038, %edi [IMM <DATAREF>, REG]
movl $__TMC_END__, %edi
# 0x4004d3: jmpq *%rax [REG]
jmpq *%rax
# 0x4004d5: nopl (%rax) [MEM]
nopl (%rax)
# 0x4004d8: popq %rbp [REG]
.label_5:
popq %rbp
# 0x4004d9: retq []
retq
.section .text
.align 16
#Procedure 0x4004e0
# 0x4004e0: cmpb $0, 0x200b51(%rip) [IMM, MEM]
.globl __do_global_dtors_aux
.type __do_global_dtors_aux, @function
__do_global_dtors_aux:
cmpb $0, __TMC_END__(%rip)
# 0x4004e7: jne 0x4004fa [IMM <CODEREF>]
jne .label_6
# 0x4004e9: pushq %rbp [REG]
pushq %rbp
# 0x4004ea: movq %rsp, %rbp [REG, REG]
movq %rsp, %rbp
# 0x4004ed: callq 0x400460 [IMM <CODEREF>]
callq deregister_tm_clones
# 0x4004f2: popq %rbp [REG]
popq %rbp
# 0x4004f3: movb $1, 0x200b3e(%rip) [IMM, MEM]
movb $1, __TMC_END__(%rip)
# 0x4004fa: retq []
.label_6:
retq
.section .text
.align 16
#Procedure 0x400500
# 0x400500: movl $0x600e20, %edi [IMM <DATAREF>, REG]
.globl frame_dummy
.type frame_dummy, @function
frame_dummy:
movl $__JCR_END__, %edi
# 0x400505: cmpq $0, (%rdi) [IMM, MEM]
cmpq $0, (%rdi)
# 0x400509: jne 0x400510 [IMM <CODEREF>]
jne .label_7
# 0x40050b: jmp 0x4004a0 [IMM <CODEREF>]
.label_8:
jmp register_tm_clones
# 0x400510: movl $0, %eax [IMM, REG]
.label_7:
movl $0, %eax
# 0x400515: testq %rax, %rax [REG, REG]
testq %rax, %rax
# 0x400518: je 0x40050b [IMM <CODEREF>]
je .label_8
# 0x40051a: pushq %rbp [REG]
pushq %rbp
# 0x40051b: movq %rsp, %rbp [REG, REG]
movq %rsp, %rbp
# 0x40051e: callq *%rax [REG]
callq *%rax
# 0x400520: popq %rbp [REG]
popq %rbp
# 0x400521: jmp 0x4004a0 [IMM <CODEREF>]
jmp register_tm_clones
.section .text
.align 16
#Procedure 0x400526
# 0x400526: pushq %rbp [REG]
.globl main
.type main, @function
main:
pushq %rbp
# 0x400527: movq %rsp, %rbp [REG, REG]
movq %rsp, %rbp
# 0x40052a: subq $0x20, %rsp [IMM, REG]
subq $0x20, %rsp
# 0x40052e: movl %edi, -0x14(%rbp) [REG, MEM]
movl %edi, -0x14(%rbp)
# 0x400531: movq %rsi, -0x20(%rbp) [REG, MEM]
movq %rsi, -0x20(%rbp)
# 0x400535: movl $0x499602d2, -0xc(%rbp) [IMM, MEM]
movl $0x499602d2, -0xc(%rbp)
# 0x40053c: movl -0xc(%rbp), %eax [MEM, REG]
movl -0xc(%rbp), %eax
# 0x40053f: movl %eax, %esi [REG, REG]
movl %eax, %esi
# 0x400541: movl $0x400604, %edi [IMM <DATAREF>, REG]
movl $label_9, %edi
# 0x400546: movl $0, %eax [IMM, REG]
movl $0, %eax
# 0x40054b: callq 0x400400 [IMM <CODEREF>]
callq printf
# 0x400550: movl $0x423a35bd, -8(%rbp) [IMM, MEM]
movl $0x423a35bd, -8(%rbp)
# 0x400557: movl -0xc(%rbp), %eax [MEM, REG]
movl -0xc(%rbp), %eax
# 0x40055a: subl -8(%rbp), %eax [MEM, REG]
subl -8(%rbp), %eax
# 0x40055d: movl %eax, -4(%rbp) [REG, MEM]
movl %eax, -4(%rbp)
# 0x400560: movl -4(%rbp), %eax [MEM, REG]
movl -4(%rbp), %eax
# 0x400563: movl %eax, %esi [REG, REG]
movl %eax, %esi
# 0x400565: movl $0x400604, %edi [IMM <DATAREF>, REG]
movl $label_9, %edi
# 0x40056a: movl $0, %eax [IMM, REG]
movl $0, %eax
# 0x40056f: callq 0x400400 [IMM <CODEREF>]
callq printf
# 0x400574: movl $0, %eax [IMM, REG]
movl $0, %eax
# 0x400579: leave []
leave
# 0x40057a: retq []
retq
.section .text
.align 16
#Procedure 0x400580
# 0x400580: pushq %r15 [REG]
.globl __libc_csu_init
.type __libc_csu_init, @function
__libc_csu_init:
pushq %r15
# 0x400582: pushq %r14 [REG]
pushq %r14
# 0x400584: movl %edi, %r15d [REG, REG]
movl %edi, %r15d
# 0x400587: pushq %r13 [REG]
pushq %r13
# 0x400589: pushq %r12 [REG]
pushq %r12
# 0x40058b: leaq 0x20087e(%rip), %r12 [MEM, REG]
leaq __init_array_start(%rip), %r12
# 0x400592: pushq %rbp [REG]
pushq %rbp
# 0x400593: leaq 0x20087e(%rip), %rbp [MEM, REG]
leaq __init_array_end(%rip), %rbp
# 0x40059a: pushq %rbx [REG]
pushq %rbx
# 0x40059b: movq %rsi, %r14 [REG, REG]
movq %rsi, %r14
# 0x40059e: movq %rdx, %r13 [REG, REG]
movq %rdx, %r13
# 0x4005a1: subq %r12, %rbp [REG, REG]
subq %r12, %rbp
# 0x4005a4: subq $8, %rsp [IMM, REG]
subq $8, %rsp
# 0x4005a8: sarq $3, %rbp [IMM, REG]
sarq $3, %rbp
# 0x4005ac: callq 0x4003c8 [IMM <CODEREF>]
callq _init
# 0x4005b1: testq %rbp, %rbp [REG, REG]
testq %rbp, %rbp
# 0x4005b4: je 0x4005d6 [IMM <CODEREF>]
je .label_10
# 0x4005b6: xorl %ebx, %ebx [REG, REG]
xorl %ebx, %ebx
# 0x4005b8: nopl (%rax, %rax) [MEM]
nopl (%rax, %rax)
# 0x4005c0: movq %r13, %rdx [REG, REG]
.label_11:
movq %r13, %rdx
# 0x4005c3: movq %r14, %rsi [REG, REG]
movq %r14, %rsi
# 0x4005c6: movl %r15d, %edi [REG, REG]
movl %r15d, %edi
# 0x4005c9: callq *(%r12, %rbx, 8) [MEM]
callq *(%r12, %rbx, 8)
# 0x4005cd: addq $1, %rbx [IMM, REG]
addq $1, %rbx
# 0x4005d1: cmpq %rbp, %rbx [REG, REG]
cmpq %rbp, %rbx
# 0x4005d4: jne 0x4005c0 [IMM <CODEREF>]
jne .label_11
# 0x4005d6: addq $8, %rsp [IMM, REG]
.label_10:
addq $8, %rsp
# 0x4005da: popq %rbx [REG]
popq %rbx
# 0x4005db: popq %rbp [REG]
popq %rbp
# 0x4005dc: popq %r12 [REG]
popq %r12
# 0x4005de: popq %r13 [REG]
popq %r13
# 0x4005e0: popq %r14 [REG]
popq %r14
# 0x4005e2: popq %r15 [REG]
popq %r15
# 0x4005e4: retq []
retq
.section .text
.align 16
#Procedure 0x4005f0
# 0x4005f0: retq []
.globl __libc_csu_fini
.type __libc_csu_fini, @function
__libc_csu_fini:
retq
.section .fini
.align 4
#Procedure 0x4005f4
# 0x4005f4: subq $8, %rsp [IMM, REG]
.globl _fini
.type _fini, @function
_fini:
subq $8, %rsp
# 0x4005f8: addq $8, %rsp [IMM, REG]
addq $8, %rsp
# 0x4005fc: retq []
retq
.section .plt.got
.align 32
# data @ 0x400428
.label_21:
.section .text
.align 16
# data @ 0x4005f2
.label_22:
.section .rodata
.align 32
# data @ 0x400600
.byte 1
.byte 0
.byte 2
.byte 0
# data @ 0x400604
label_9:
.asciz "%d\n"
.section .data
.align 16
# data @ 0x600e10
.globl __init_array_start
.type __init_array_start, @notype
__init_array_start:
.quad frame_dummy
.section .data
.align 8
# data @ 0x601028
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.section .bss
.align 8
# data @ 0x601038
.globl __TMC_END__
.type __TMC_END__, @object
__TMC_END__:
.byte 0x0
# data @ 0x601039
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
label_4:
.byte 0
# data @ 0x601040
.globl _end
.type _end, @notype
_end:
This is because you are using reassembler on binaries compiled with libc, and the public version of reassembler and compilerex does not have a good support of those.
We will release quite a few updates of reassembler, patcherex and compilerex after next week, which essentially address many issues, including this one.
PS: now every time I'm using the reassembler, I have to check out some old branches of angr, patcherex, and compilerex...
As for now, a temporary workaround is to manually remove unnecessary functions like _init
. You can manually call remove_unnecessary_stuff()
on a Reassembler
backend to do that (here), or just manually modify the generated assembly code.
Thanks @ltfish!
I tried your suggested workaround, invoking backend._binary.remove_unnecessary_stuff()
right before calling backend.save()
. It helped get rid of a bunch of undefined references, but the linker still complained about a nonexistent label_0
. There's indeed one reference left, and this is in a section that isn't in the "unnecessary" sections that are removed by a call to remove_unnecessary_stuff
. It looks similar to a section in the original assembly I posted:
# 0x400420: jmpq *0x200bd2(%rip) [MEM]
.label_2:
jmpq *.label_0(%rip)
.section .text
.align 16
#Procedure 0x400459
.globl sub_400459
.type sub_400459, @function
If the updates you plan to push are functionally equivalent to your workaround, then it seems that the extra label_XX
issue would still exist?
I believe label_2
(and the succeeding basic block which contains label_0
) should be removed together with the _init
function. It's part of the _init
function.
Are any new releases ready? Even if they aren't perfect to fix this?
Hello! Is there any potential update for this? I believe this is exactly the same issue that I am having. Upon doing some more debugging, I have narrowed down the problem potentially to the assemble.sh
export CC=$DIR/bin/clang`
export LD=$DIR/bin/ld`
export CXX=$DIR/bin/clang++
export OBJCOPY=$DIR/bin/objcopy
export LDFLAGS="-nostdlib -static -Wl,-mcgc_i386 $LDFLAGS"
export PATH="$DIR/bin:$PATH"
function assemble() {
$CC \
$LDFLAGS $@ ;
}
assemble $@
incorrectly linking and loading the library, which is where all of those "undefined" references come from? It seems like compile.sh has both ELF and CGC compilation method as shown below
### ELF
CC=gcc
function compile() {
$CC -isystem$DIR/include -Ilib/ -std=gnu99 -O3 -Wall -Wno-unused-variable -Wextra -Wshadow -Wwrite-strings -Wpointer-arith -Wstrict-overflow=4 $CFLAGS $LDFLAGS $@ $LDLIBS;
}
### CGC
#export CC=$DIR/bin/clang
#export LD=$DIR/bin/ld
#export CXX=$DIR/bin/clang++
#export OBJCOPY=$DIR/bin/objcopy
#export LDFLAGS="-nostdlib -static -Wl,-mcgc_i386 $LDFLAGS"
##export LDFLAGS="-nostdlib -static -Wl, $LDFLAGS"
#export CFLAGS="-nostdlib -fno-builtin -nostdinc -isystem$DIR/include $CFLAGS"
#export LDLIBS="-L$DIR/lib -lboolector -llgl -lc -lcgc $LDLIBS"
#export PATH="$DIR/bin:$PATH"
#function compile() {
# $CC -Ilib/ \
# -std=gnu99 -Wall -Wno-incompatible-pointer-types-discards-qualifiers -Wno-unused-variable -Wno-unused-parameter -Wno-sign-compare -Wextra -Wshadow -Wwrite-strings -Wpointer-arith -Wstrict-overflow=4 \
# $CFLAGS $LDFLAGS $@ $LDLIBS;
#}
compile $@
do we need to do a similar thing for assembling as well? Thank you for any update.
This issue has been marked as stale
because it has no recent activity. Please comment or add the pinned
tag to prevent this issue from being closed.
This issue has been closed due to inactivity.