/IDAPython-Example

IDAPython Example

Primary LanguagePythonApache License 2.0Apache-2.0

IDAPython 8.2 Example

Get the file type

import ida_ida
ida_ida.getinf_str(ida_ida.INF_FILE_FORMAT_NAME)

Get the compiler type

import idaapi
import ida_ida
def get_compiler_type():
    comp_dict = {
        0x00: 'Unknown',
        0x01: 'Visual C++',
        0x02: 'Borland C++',
        0x03: 'Watcom C++',
        0x06: 'GNU C++',
        0x07: 'Visual Age C++',
        0x08: 'Delphi',
    }
    
    info = idaapi.get_inf_structure()
    comp_types = list(filter(lambda x: x.startswith('COMP_'), dir(idc)))
    comp_types.remove('COMP_MASK')
    comp_type = info.cc.id & idc.COMP_MASK
    return comp_dict[comp_type]    

Get architecture

import idaapi

info = idaapi.get_inf_structure()

if info.is_64bit():
    bits = 64
elif info.is_32bit():
    bits = 32
else:
    bits = 16

try:
    is_be = info.is_be()
except:
    is_be = info.mf
endian = "big" if is_be else "little"

print('Processor: {info.procName}, {bits}bit, {endian} endian')

Get image base, end address, and size

image_base = idaapi.get_imagebase()
segs = list(Segments())
image_end = idc.get_segm_end(segs[-1])
image_size = idc.get_segm_end(segs[-1]) - image_base

Get segment name, start address, and end address

import idautils
import idc
def print_section_list():
    for ea in idautils.Segments():
        print(f'{idc.get_segm_name(ea)} {ea:x} {idc.get_segm_end(ea):x}')
        
print_section_list()

Get function start address and end address

ea = idc.get_screen_ea()
fn = idaapi.get_func(ea)
fn_start_ea = fn.start_ea
fn_end_ea = fn.end_ea

Get an instruction size

get_item_size(ea)

Disassemble one instruction

GetDisasm(ea)

Get operand string from an instruction at an address

print_operand(ea, 0)
print_operand(ea, 1)

Get operand value from an instruction at an address

get_operand_value(ea, 0)
get_operand_value(ea, 1)

Get a string at an address in a static image

def get_string(ea):
    out = ""
    while True:
        byt = idc.get_wide_byte(ea)
        if byt != 0:
            out += chr(byt)
        else:
            break
        ea += 1
    return out

Get Entrypoint

ida_ida.inf_get_start_ea()	

Print instructions in a function using capstone disassembler

import ida_funcs
import ida_kernwin
import idautils
import ida_bytes
from capstone import Cs, CS_ARCH_X86, CS_MODE_64

ea = ida_kernwin.get_screen_ea()
fn = ida_funcs.get_func(ea)

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

for ea in idautils.Heads(fn.start_ea, fn.end_ea):
    ins = idautils.DecodeInstruction(ea)
    ins_size = ins.size
    byts = ida_bytes.get_bytes(ea, ins_size)
    byts_str = ''
    for byt in byts:
        byts_str += f'{byt:02X}'
    cs_ins_gen = md.disasm(byts, ea)
    for cs_ins in cs_ins_gen:
        mne = cs_ins.mnemonic
        op_str = cs_ins.op_str
        print(f'{ea:016X} {mne} {op_str} # {byts_str}')

Get cross references of stack variables in a function

import idc, ida_ua, idautils, ida_bytes, ida_funcs
from typing import Dict

def find_stack_members(func_ea):
    members = {}
    base = None
    frame = idc.get_func_attr(func_ea, idc.FUNCATTR_FRAME)
    for frame_member in idautils.StructMembers(frame):
        member_offset, member_name, _ = frame_member
        members[member_offset] = member_name
        if member_name == ' r':
            base = member_offset
    if not base:
        raise ValueError("Failed identifying the stack's base address using the return address hidden stack member")
    return members, base


def find_stack_xrefs(func_offset) -> Dict[str, list[int, int]]:
    """
    Get cross references of each stack variables.
    :param func_offset:
    :return: variable name to list of an instruction address and its operand number.
    """
    func_ea = ida_funcs.get_func(func_offset).start_ea
    result = dict()
    members, stack_base = find_stack_members(func_ea)
    for func_item in idautils.FuncItems(func_ea):
        flags = ida_bytes.get_full_flags(func_item)
        stkvar = 0 if ida_bytes.is_stkvar0(flags) else 1 if ida_bytes.is_stkvar1(flags) else None
        if stkvar is None:
            continue
        insn = ida_ua.insn_t()
        ida_ua.decode_insn(insn, func_item)
        op = insn.ops[stkvar]
        stack_offset = op.addr + idc.get_spd(func_item) + stack_base
        member = members[stack_offset]
        result.setdefault(member, []).append((func_item, stkvar))
    return result


if __name__ == "__main__":
    result = find_stack_xrefs(idc.here())
    for member, lst in result.items():
        print(f"{member}")
        for addr, opn in lst:
            print(f'{addr:x} op#:{opn}')

Get Text Information not contained in comments which is generated by IDA Pro Disassembly View

import ida_kernwin
import idc
import ida_funcs

# get the function at the cursor
ea = idc.here()
pfn = ida_funcs.get_func(ea)

# get function start and end addresses
fn_start = pfn.start_ea

# get the lines of Disassembly View of the first instruction such as __unwind { // __CxxFrameHandler3
text = ida_kernwin.disasm_text_t()
ida_kernwin.gen_disasm_text(text, fn_start, idc.get_item_end, False)
for disasm_line in text: 
    print(disasm_line.line)

Set incorrectly analyzed data into code

import idautils
import idc
import ida_kernwin
import ida_funcs
import ida_bytes
import ida_ua
    
def fix_data_to_code(ea):
    """
    Fix incorrectly analyzed data to code.
    Example:
    .text:0000000140013257                 jz      loc_1400137B0
    .text:0000000140013257 ; ---------------------------------------------------------------------------
    .text:000000014001325D byte_14001325D  db 4Ch                  ; CODE XREF: sub_140012DB0+4A1↑j
    .text:000000014001325D ;   } // starts at 140013031
    .text:000000014001325E ; ---------------------------------------------------------------------------
    .text:000000014001325E
    .text:000000014001325E loc_14001325E:                          ; DATA XREF: .rdata:0000000140288EA8↓o
    .text:000000014001325E ;   try {
    .text:000000014001325E                 mov     ecx, esi

    When we try to make code by IDAPython create_insn function directly at 14001325D, it will not change anything. 
    Instead we first need to undefine the next instruction at 14001325E and then make 14001325D as code.
    """
    # get function start and end address
    pfn = ida_funcs.get_func(ea)
    fn_start = pfn.start_ea
    fn_end = pfn.end_ea
    ea = fn_start
    is_change = True

    # repeat until all data is converted into code
    while is_change:
        is_change = False
        # iterate each items in a function
        # I didn't use FuncItems because the function omits the data items. 
        while ea < fn_end:
            disasm = idc.GetDisasm(ea)
            # Because is_code and is_data function do not work well
            # I instead use the disasm text to check if it is code or data. 
            if disasm.startswith('db'):
                print(f'set as code @ {ea:x} {disasm}')
                # I used ea + idc.get_item_size(ea) to get the next address because next_head function does not work well.
                
                next_ea = ea + idc.get_item_size(ea)
                while idc.GetDisasm(next_ea).startswith('db'):
                    next_ea += 1    
                
                print(f'executiong ida_bytes.del_items({next_ea:x})')
                ida_bytes.del_items(next_ea)
                ida_ua.create_insn(ea)

                # auto_wait is mandatory to make IDA to finish the automatic analysis. 
                idc.auto_wait()
                is_change = True
                break
            ea = ea + idc.get_item_size(ea)
        

if __name__ == "__main__":
    ea = idc.here()
    fix_data_to_code(ea)

Computing cyclomatic complexity

import ida_gdl
import ida_kernwin
import ida_funcs

node_cnt = 0
edge_cnt = 0

ea = ida_kernwin.get_screen_ea()
ida_fn = ida_funcs.get_func(ea)
for bl in ida_gdl.FlowChart(ida_fn):    
    node_cnt += 1
    edge_cnt += len(list(bl.succs()))
    
print(f'Number of Nodes = {node_cnt}')
print(f'Number of Edges = {edge_cnt}')
print(f'Cyclomatic Complexity = {edge_cnt - node_cnt + 2}')