angr/pyvex

Invalid temporary number 43 from block.py (CFGFast)

Closed this issue · 1 comments

Hi there :)

I use angr, and I try to use CFGFast on /bin/ls:

p = angr.Project('/bin/cat')
p.analyses.CFGFast()

PyVex raises IndexError: 43 from /usr/lib/python3.7/site-packages/pyvex/block.py, line 574:

    572         if tmp < 0 or tmp > self.types_used:
    573             l.debug("Invalid temporary number %d", tmp)
--> 574             raise IndexError(tmp)
    575         return self.types[tmp]

Versions info:

[~]$ uname -a
Linux yam 4.18.16-arch1-1-ARCH #1 SMP PREEMPT Sat Oct 20 22:06:45 UTC 2018 x86_64 GNU/Linux
[~]$ python --version
Python 3.7.1
[~]$ pip freeze | egrep "(pyvex|angr)"
angr==8.18.10.25
pyvex==8.18.10.25

Full Traceback:

IndexError                                Traceback (most recent call last)
<ipython-input-22-b758fac0fa51> in <module>
      1 p = angr.Project('/bin/cat')
----> 2 p.analyses.CFGFast()

/usr/lib/python3.7/site-packages/angr/analyses/analysis.py in __call__(self, *args, **kwargs)
    106 
    107         oself._show_progressbar = show_progressbar
--> 108         oself.__init__(*args, **kwargs)
    109         return oself
    110 

/usr/lib/python3.7/site-packages/angr/analyses/cfg/cfg_fast.py in __init__(self, binary, regions, pickle_intermediate_results, symbols, function_prologues, resolve_indirect_jumps, force_segment, force_complete_scan, indirect_jump_target_limit, collect_data_references, extra_cross_references, normalize, start_at_entry, function_starts, extra_memory_regions, data_type_guessing_handlers, arch_options, indirect_jump_resolvers, base_state, exclude_sparse_regions, skip_specific_regions, heuristic_plt_resolving, start, end, **extra_arch_options)
   1011 
   1012         # Start working!
-> 1013         self._analyze()
   1014 
   1015     #

/usr/lib/python3.7/site-packages/angr/analyses/forward_analysis.py in _analyze(self)
    550             # An example is the CFG recovery.
    551 
--> 552             self._analysis_core_baremetal()
    553 
    554         else:

/usr/lib/python3.7/site-packages/angr/analyses/forward_analysis.py in _analysis_core_baremetal(self)
    675             self._job_info_queue = self._job_info_queue[1:]
    676 
--> 677             self._process_job_and_get_successors(job_info)
    678 
    679             # Short-cut for aborting the analysis

/usr/lib/python3.7/site-packages/angr/analyses/forward_analysis.py in _process_job_and_get_successors(self, job_info)
    693         job = job_info.job
    694 
--> 695         successors = self._get_successors(job)
    696 
    697         all_new_jobs = [ ]

/usr/lib/python3.7/site-packages/angr/analyses/cfg/cfg_fast.py in _get_successors(self, job)
   1398         #    l.debug("Tracing new exit %#x", addr)
   1399 
-> 1400         jobs = self._scan_block(job)
   1401 
   1402         # l.debug("... got %d jobs: %s", len(jobs), jobs)

/usr/lib/python3.7/site-packages/angr/analyses/cfg/cfg_fast.py in _scan_block(self, cfg_job)
   1647 
   1648         else:
-> 1649             entries = self._scan_irsb(cfg_job, current_func_addr)
   1650 
   1651         return entries

/usr/lib/python3.7/site-packages/angr/analyses/cfg/cfg_fast.py in _scan_irsb(self, cfg_job, current_func_addr)
   1753         """
   1754 
-> 1755         addr, function_addr, cfg_node, irsb = self._generate_cfgnode(cfg_job, current_func_addr)
   1756 
   1757         # Add edges going to this node in function graphs

/usr/lib/python3.7/site-packages/angr/analyses/cfg/cfg_fast.py in _generate_cfgnode(self, cfg_job, current_function_addr)
   3520             try:
   3521                 lifted_block = self._lift(addr, size=distance, opt_level=self._iropt_level, collect_data_refs=True)
-> 3522                 irsb = lifted_block.vex_nostmt
   3523                 irsb_string = lifted_block.bytes[:irsb.size]
   3524             except SimTranslationError:

/usr/lib/python3.7/site-packages/angr/block.py in vex_nostmt(self)
    173             arch=self.arch,
    174             skip_stmts=True,
--> 175             collect_data_refs=self._collect_data_refs,
    176         )
    177         return self._vex_nostmt

/usr/lib/python3.7/site-packages/angr/engines/vex/engine.py in lift(self, state, clemory, insn_bytes, arch, addr, size, num_inst, traceflags, thumb, opt_level, strict_block_end, skip_stmts, collect_data_refs)
    541                                   strict_block_end=strict_block_end,
    542                                   skip_stmts=skip_stmts,
--> 543                                   collect_data_refs=collect_data_refs,
    544                                   )
    545 

/usr/lib/python3.7/site-packages/pyvex/lifting/__init__.py in lift(data, addr, arch, max_bytes, max_inst, bytes_offset, opt_level, traceflags, strict_block_end, inner, skip_stmts, collect_data_refs)
    150                         strict_block_end=strict_block_end,
    151                         skip_stmts=False,
--> 152                         collect_data_refs=collect_data_refs,
    153                         )
    154 

/usr/lib/python3.7/site-packages/pyvex/lifting/__init__.py in lift(data, addr, arch, max_bytes, max_inst, bytes_offset, opt_level, traceflags, strict_block_end, inner, skip_stmts, collect_data_refs)
    176             if more_irsb.size:
    177                 # Successfully decoded more bytes
--> 178                 final_irsb.extend(more_irsb)
    179         elif max_bytes == 0:
    180             # We have no more bytes left. Mark the jumpkind of the IRSB as Ijk_Boring

/usr/lib/python3.7/site-packages/pyvex/block.py in extend(self, extendwith)
    218                     stmt_.tmp = convert_tmp(stmt_.tmp)
    219                 for e in stmt_.args:
--> 220                     convert_expr(e)
    221             elif stmttype is CAS:
    222                 if stmt_.oldLo not in invalid_vals: stmt_.oldLo = convert_tmp(stmt_.oldLo)

/usr/lib/python3.7/site-packages/pyvex/block.py in convert_expr(expr_)
    203             """
    204             if type(expr_) is RdTmp:
--> 205                 return RdTmp.get_instance(convert_tmp(expr_.tmp))
    206             return expr_
    207 

/usr/lib/python3.7/site-packages/pyvex/block.py in convert_tmp(tmp)
    191             """
    192             if tmp not in conversion_dict:
--> 193                 tmp_type = extendwith.tyenv.lookup(tmp)
    194                 conversion_dict[tmp] = self.tyenv.add(tmp_type)
    195             return conversion_dict[tmp]

/usr/lib/python3.7/site-packages/pyvex/block.py in lookup(self, tmp)
    572         if tmp < 0 or tmp > self.types_used:
    573             l.debug("Invalid temporary number %d", tmp)
--> 574             raise IndexError(tmp)
    575         return self.types[tmp]
    576 

IndexError: 43

Resolved. This had to do with the interaction between an endbr64 instruction and an fld instruction, and our mechanism for unifying the results of different IR lifters not really being complete w.r.t dirty helpers.

In the future, can you provide a more reproducible testcase? Just uploading your binaries (including your libc, which was where the issue was) would be super helpful. Since this was inside pyvex, which contains no state, you could have reduced this to a testcase which was just a call into pyvex. I had to spin up a new arch install (thanks docker) to reproduce this.