""" Do IDA autoanalysis and dump bindiff database file as well as a pickle file with all the relevant info from the IDB for later processing with python. Call me with `idaw.exe -S"export_bindiff.py test.BinDiff" -A -B .\sample` to generate the BinDiff database _test.BinDiff_ from file _sample_. """ import sys import os import argparse import pickle import hashlib def get_api_calls(func): func_flags = GetFunctionFlags(func) if func_flags & FUNC_LIB or func_flags & FUNC_THUNK: return for inst in filter(lambda x: idaapi.is_call_insn(x), FuncItems(func)): try: api_address = CodeRefsFrom(inst, 0).next() api_flags = GetFunctionFlags(api_address) if api_flags & FUNC_LIB or api_flags & FUNC_THUNK: yield {"address": api_address, "name": NameEx(0, api_address)} except StopIteration: pass def get_many_bytes(start, length): BLOCKSIZE = 512 blocks = [(s, e - s) for s, e in zip(range(start, start + length)[:-1], range(start, start + length)[1:])] data = [] for s, l in blocks: d = GetManyBytes(s, l) if d is None: d = [] for i in range(s, s + l): v = chr(Byte(i)) if v is not None: d.append(v) else: break d = "".join(d) data.append(d) return "".join(data) def pickle_database(path): info = idaapi.get_inf_structure() if hasattr(info, "is_be"): endianness = "big" if info.is_be() else "little" elif hasattr(idaapi.cvar.inf, "mf"): endianness = "big" if idaapi.cvar.inf.mf else "little" else: endianness = None database = { "segments": [], "architecture" : { "name": info.procName, "bits": 32 if info.is_32bit() else (64 if info.is_64bit() else None), "endian": endianness, }, "entry_points": [{"index": idx, "ordinal": ordnl, "address": ea, "name": name} for \ idx, ordnl, ea, name in Entries()], "functions": [], "filename": GetInputFile(), "sha512": hashlib.sha512(open(GetInputFilePath(), "rb").read()).hexdigest(), "sha256": hashlib.sha256(open(GetInputFilePath(), "rb").read()).hexdigest(), "sha1": hashlib.sha1(open(GetInputFilePath(), "rb").read()).hexdigest(), "md5": GetInputFileMD5(), } for seg in Segments(): heads = [] for head in Heads(SegStart(seg), SegEnd(seg)): if isCode(GetFlags(head)): operands = [] for i in range(5): if GetOpnd(head, i) == "": break operands.append({ "type": GetOpType(head, i), "opnd": GetOpnd(head, i), "value": GetOperandValue(head, i)}) hd = { "type": "code", "size": ItemSize(head), "mnem": GetMnem(head), "disasm": GetDisasm(head), "operands": operands, "is_call": idaapi.is_call_insn(head), "data_refs": list(DataRefsFrom(head)), "flow_refs": list(set(CodeRefsFrom(head, True)) - set(CodeRefsFrom(head, False))), "code_refs": list(CodeRefsFrom(head, False))} if NameEx(BADADDR, head) != "": hd["name"] = NameEx(BADADDR, head) if database["architecture"]["name"] == "arm": hd["thumb"] = GetReg(head, 'T') != 0 #for dref in DataRefsFrom(head): # dhead = { # "type": "data", # "size": ItemSize(dref)} # if NameEx(BADADDR, dref) != "": # dhead["name"] = NameEx(BADADDR, dref) # heads.append((dref, dhead)) heads.append((head, hd)) database["segments"].append({ "virtual_address": SegStart(seg), "virtual_size": SegEnd(seg) - SegStart(seg), "file_offset": idaapi.get_fileregion_offset(SegStart(seg)), "data": get_many_bytes(SegStart(seg), SegEnd(seg) - SegStart(seg)), "code_heads": dict(heads)}) database["strings"] = [{ "address": x.ea, "data": str(x), "encoding_size": 1 if x.is_1_byte_encoding() else (2 if x.is_2_byte_encoding() else (4 if x.is_4_byte_encoding() else None)), "type": getattr(x, "type", getattr(x, "strtype", None))} for x in Strings()] database["functions"] = [{ "entry_point": x, "name": GetFunctionName(x), "chunks": [{"start": start, "end": end, "code_heads": [h for h in Heads(start, end) if isCode(GetFlags(h))]} for (start, end) in Chunks(x)], "basic_blocks": [{"start": bb.startEA, "end": bb.endEA, "id": bb.id, "code_heads": [h for h in Heads(bb.startEA, bb.endEA) if isCode(GetFlags(h))], "successors": [succ.id for succ in bb.succs()]} \ for bb in idaapi.FlowChart(idaapi.get_func(x))], "called_from": list(CodeRefsTo(x, False)), "api_calls": list(get_api_calls(x)), "is_library_function": bool(GetFunctionFlags(x) & idaapi.FUNC_LIB or GetFunctionFlags(x) & idaapi.FUNC_THUNK)} \ for x in Functions()] with open(path, "wb") as file_: pickle.dump(database, file_, 2) def binexport_database(path): idc.Eval("BinExport2Diff9(\"%s\")" % path) def handle_binexport(args): binexport_database(args.bindiff_output) def handle_pickle(args): pickle_database(args.pickle_output) def handle_binexport_pickle(args): binexport_database(args.bindiff_output) pickle_database(args.pickle_output) def main(args): args.handler(args) return 0 def parse_args(): parser = argparse.ArgumentParser(description = "IDA Pro script: Dump bindiff database file") subparsers = parser.add_subparsers(help = "subcommand") parser_pickle = subparsers.add_parser("pickle", help = "Dump pickled database") parser_pickle.add_argument("pickle_output", type = str, help = "Output pickle database file") parser_pickle.set_defaults(handler = handle_pickle) parser_bindiff = subparsers.add_parser("binexport", help = "Dump bindiff database") parser_bindiff.add_argument("bindiff_output", type = str, help = "Output BinExport database file") parser_bindiff.set_defaults(handler = handle_binexport) parser_bindiff_pickle = subparsers.add_parser("binexport_pickle", help = "Dump bindiff database and pickled database") parser_bindiff_pickle.add_argument("bindiff_output", type = str, help = "Output BinDiff database file") parser_bindiff_pickle.add_argument("pickle_output", type = str, help = "Output pickle database file") parser_bindiff_pickle.set_defaults(handler = handle_binexport_pickle) args = parser.parse_args(idc.ARGV[1:]) return args Wait() ret = main(parse_args()) Exit(ret)