""" Diaphora, a diffing plugin for IDA Copyright (c) 2015-2020, Joxean Koret This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. """ import os import sys import imp import time import json import decimal import difflib import sqlite3 import traceback import threading from hashlib import md5 import diaphora from pygments import highlight from pygments.lexers import NasmLexer, CppLexer, DiffLexer from pygments.formatters import HtmlFormatter from others.tarjan_sort import strongly_connected_components, robust_topological_sort from jkutils.factor import primesbelow as primes from jkutils.graph_hashes import CKoretKaramitasHash from idc import * from idaapi import * from idautils import * from PyQt5 import QtCore, QtGui, QtWidgets #------------------------------------------------------------------------------- # Constants unexported in IDA Python PRTYPE_SEMI = 0x0008 # Messages MSG_RELAXED_RATIO_ENABLED = """AUTOHIDE DATABASE\n Relaxed ratio calculations can be enabled. It will ignore many small modifications to functions and will match more functions with higher ratios. Enable this option if you're only interested in the new functionality. Disable it for patch diffing if you're interested in small modifications (like buffer sizes). This is recommended for diffing big databases (more than 20,000 functions in the database). You can disable it by un-checking the 'Relaxed calculations of differences ratios' option.""" MSG_FUNCTION_SUMMARIES_ONLY = """AUTOHIDE DATABASE\n Do not export basic blocks or instructions will be enabled. It will not export the information relative to basic blocks or instructions and 'Diff assembly in a graph' will not be available. This is automatically done for exporting huge databases with more than 100,000 functions. You can disable it by un-checking the 'Do not export basic blocks or instructions' option.""" LITTLE_ORANGE = 0x026AFD #------------------------------------------------------------------------------- def log(message): msg("[%s] %s\n" % (time.asctime(), message)) #------------------------------------------------------------------------------- def log_refresh(msg, show=False, do_log=True): if show: show_wait_box(msg) else: replace_wait_box(msg) if do_log: log(msg) #------------------------------------------------------------------------------- # TODO: FIX hack diaphora.log = log diaphora.log_refresh = log_refresh #------------------------------------------------------------------------------- g_bindiff = None def show_choosers(): global g_bindiff if g_bindiff is not None: g_bindiff.show_choosers(False) #------------------------------------------------------------------------------- def save_results(): global g_bindiff if g_bindiff is not None: filename = ask_file(1, "*.diaphora", "Select the file to store diffing results") if filename is not None: g_bindiff.save_results(filename) #------------------------------------------------------------------------------- def load_results(): tmp_diff = CIDABinDiff(":memory:") filename = ask_file(0, "*.diaphora", "Select the file to load diffing results") if filename is not None: tmp_diff.load_results(filename) #------------------------------------------------------------------------------- def import_definitions(): tmp_diff = diaphora.CIDABinDiff(":memory:") filename = ask_file(0, "*.sqlite", "Select the file to import structures, unions and enumerations from") if filename is not None: if ask_yn(1, "HIDECANCEL\nDo you really want to import all structures, unions and enumerations?") == 1: tmp_diff.import_definitions_only(filename) #------------------------------------------------------------------------------- def diaphora_decode(ea): ins = idaapi.insn_t() decoded_size = idaapi.decode_insn(ins, ea) return decoded_size, ins #------------------------------------------------------------------------------- class CHtmlViewer(PluginForm): def OnCreate(self, form): self.parent = self.FormToPyQtWidget(form) self.PopulateForm() self.browser = None self.layout = None return 1 def PopulateForm(self): self.layout = QtWidgets.QVBoxLayout() self.browser = QtWidgets.QTextBrowser() self.browser.setLineWrapMode(QtWidgets.QTextEdit.FixedColumnWidth) self.browser.setLineWrapColumnOrWidth(150) self.browser.setHtml(self.text) self.browser.setReadOnly(True) self.browser.setFontWeight(12) self.layout.addWidget(self.browser) self.parent.setLayout(self.layout) def Show(self, text, title): self.text = text return PluginForm.Show(self, title) #------------------------------------------------------------------------------- class CBasicChooser(Choose): def __init__(self, title): Choose.__init__( self, title, [ ["Id", 10 | Choose.CHCOL_PLAIN] , ["Name", 30 | Choose.CHCOL_PLAIN] ]) self.items = [] def OnGetSize(self): return len(self.items) def OnGetLine(self, n): return self.items[n] #------------------------------------------------------------------------------- # Hex-Rays finally removed AddCommand(). Now, instead of a 1 line call, we need # 2 classes... class command_handler_t(ida_kernwin.action_handler_t): def __init__(self, obj, cmd_id, num_args = 2): self.obj = obj self.cmd_id = cmd_id self.num_args = num_args ida_kernwin.action_handler_t.__init__(self) def activate(self, ctx): if self.num_args == 1: return self.obj.OnCommand(self.cmd_id) if len(self.obj.selected_items) == 0: sel = 0 else: sel = self.obj.selected_items[0] return self.obj.OnCommand(sel, self.cmd_id) def update(self, ctx): return idaapi.AST_ENABLE_ALWAYS #------------------------------------------------------------------------------- # Support for the removed AddCommand() API class CDiaphoraChooser(diaphora.CChooser, Choose): def __init__(self, title, bindiff, show_commands = True): diaphora.CChooser.__init__(self, title, bindiff, show_commands) self.actions = [] def AddCommand(self, menu_name, shortcut=None): if menu_name is not None: action_name = "Diaphora:%s" % menu_name.replace(" ", "") else: action_name = None self.actions.append([len(self.actions), action_name, menu_name, shortcut]) return len(self.actions)-1 def OnPopup(self, form, popup_handle): for num, action_name, menu_name, shortcut in self.actions: if menu_name is None: ida_kernwin.attach_action_to_popup(form, popup_handle, None) else: handler = command_handler_t(self, num, 2) desc = ida_kernwin.action_desc_t(action_name, menu_name, handler, shortcut) ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc) #------------------------------------------------------------------------------- class CIDAChooser(CDiaphoraChooser): def __init__(self, title, bindiff, show_commands=True): CDiaphoraChooser .__init__(self, title, bindiff, show_commands) if title.startswith("Unmatched in"): Choose.__init__(self, title, [ ["Line", 8], ["Address", 8], ["Name", 20] ], Choose.CH_MULTI) else: Choose.__init__(self, title, [ ["Line", 8], ["Address", 8], ["Name", 20], ["Address 2", 8], ["Name 2", 20], ["Ratio", 5], ["BBlocks 1", 5], ["BBlocks 2", 5], ["Description", 30] ], Choose.CH_MULTI) def OnSelectLine(self, n): item = self.items[n[0]] if self.primary: try: jump_ea = int(item[1], 16) # Only jump for valid addresses if is_mapped(jump_ea): jumpto(jump_ea) except: print("OnSelectLine", sys.exc_info()[1]) else: self.bindiff.show_asm(self.items[n[0]], self.primary) def OnGetLine(self, n): try: return self.items[n] except: print("OnGetLine", sys.exc_info()[1]) def OnGetSize(self): return len(self.items) def OnDeleteLine(self, items): for n in items: if n >= 0: name1 = self.items[n][2] name2 = self.items[n][4] del self.items[n] if name1 in self.bindiff.matched1: self.bindiff.matched1.remove(name1) if name2 in self.bindiff.matched2: self.bindiff.matched2.remove(name2) return [Choose.ALL_CHANGED] + items def show(self, force=False): if self.show_commands: self.items = sorted(self.items, key=lambda x: decimal.Decimal(x[5]), reverse=True) t = self.Show() if t < 0: return False if self.show_commands and (self.cmd_diff_asm is None or force): # create aditional actions handlers self.cmd_rediff = self.AddCommand("Diff again") self.cmd_save_results = self.AddCommand("Save results") self.cmd_add_manual_match = self.AddCommand("Add manual match") self.AddCommand(None) self.cmd_diff_asm = self.AddCommand("Diff assembly") self.cmd_diff_c = self.AddCommand("Diff pseudo-code") self.cmd_diff_graph = self.AddCommand("Diff assembly in a graph") self.cmd_diff_c_patch = self.AddCommand("Show pseudo-code patch") self.AddCommand(None) self.cmd_import_selected = self.AddCommand("Import selected", "Ctrl+Alt+i") self.cmd_import_selected_auto = self.AddCommand("Import selected sub_*") self.cmd_import_all = self.AddCommand("Import *all* functions") self.cmd_import_all_funcs = self.AddCommand("Import *all* data for sub_* functions") self.AddCommand(None) self.cmd_highlight_functions = self.AddCommand("Highlight matches") self.cmd_unhighlight_functions = self.AddCommand("Unhighlight matches") elif not self.show_commands and (self.cmd_show_asm is None or force): self.cmd_show_asm = self.AddCommand("Show assembly") self.cmd_show_pseudo = self.AddCommand("Show pseudo-code") return True def OnCommand(self, n, cmd_id): # Aditional right-click-menu commands handles if cmd_id == self.cmd_show_asm: self.bindiff.show_asm(self.items[n], self.primary) elif cmd_id == self.cmd_show_pseudo: self.bindiff.show_pseudo(self.items[n], self.primary) elif cmd_id == self.cmd_import_all: if ask_yn(1, "HIDECANCEL\nDo you really want to import all matched functions, comments, prototypes and definitions?") == 1: self.bindiff.import_all(self.items) elif cmd_id == self.cmd_import_all_funcs: if ask_yn(1, "HIDECANCEL\nDo you really want to import all IDA named matched functions, comments, prototypes and definitions?") == 1: self.bindiff.import_all_auto(self.items) elif cmd_id == self.cmd_import_selected or cmd_id == self.cmd_import_selected_auto: if len(self.selected_items) <= 1: self.bindiff.import_one(self.items[n]) else: if ask_yn(1, "HIDECANCEL\nDo you really want to import all selected IDA named matched functions, comments, prototypes and definitions?") == 1: self.bindiff.import_selected(self.items, self.selected_items, cmd_id == self.cmd_import_selected_auto) elif cmd_id == self.cmd_diff_c: self.bindiff.show_pseudo_diff(self.items[n]) elif cmd_id == self.cmd_diff_c_patch: self.bindiff.show_pseudo_diff(self.items[n], html=False) elif cmd_id == self.cmd_diff_asm: self.bindiff.show_asm_diff(self.items[n]) elif cmd_id == self.cmd_highlight_functions: if ask_yn(1, "HIDECANCEL\nDo you want to change the background color of each matched function?") == 1: color = self.get_color() for item in self.items: ea = int(item[1], 16) if not set_color(ea, CIC_FUNC, color): print("Error setting color for %x" % ea) self.Refresh() elif cmd_id == self.cmd_unhighlight_functions: for item in self.items: ea = int(item[1], 16) if not set_color(ea, CIC_FUNC, 0xFFFFFF): print("Error setting color for %x" % ea) self.Refresh() elif cmd_id == self.cmd_diff_graph: item = self.items[n] ea1 = int(item[1], 16) name1 = item[2] ea2 = int(item[3], 16) name2 = item[4] log("Diff graph for 0x%x - 0x%x" % (ea1, ea2)) self.bindiff.graph_diff(ea1, name1, ea2, name2) elif cmd_id == self.cmd_save_results: filename = ask_file(1, "*.diaphora", "Select the file to store diffing results") if filename is not None: self.bindiff.save_results(filename) elif cmd_id == self.cmd_add_manual_match: self.add_manual_match() elif cmd_id == self.cmd_rediff: self.bindiff.db.execute("detach diff") timeraction_t(self.bindiff.re_diff, None, 1000) return True def get_diff_functions(self): cur = self.bindiff.db_cursor() cur.execute("select cast(id as text), name from diff.functions order by id") rows = list(cur.fetchall()) rows = list(map(list, rows)) cur.close() return rows def add_manual_match(self): f = choose_func("Select a function from the current database...", 0) if f is not None: diff_chooser = CBasicChooser("Select a function from the external database...") diff_funcs = self.get_diff_functions() diff_chooser.items = diff_funcs ret = diff_chooser.Show(modal=True) if ret > -1: name1 = get_func_name(f.start_ea) name2 = diff_funcs[ret][1] if name1 in self.bindiff.matched1 or name2 in self.bindiff.matched2: line = "Either the local function %s or the foreign function %s are already matched.\n" + \ "Please remove the previously assigned match before adding a manual match." warning(line % (repr(name1), repr(name2))) else: log("Adding manual match between %s and %s" % (name1, name2)) sql = """ select distinct f.address ea, f.name name1, df.address ea2, df.name name2, 'Manual Match' description, f.pseudocode pseudo1, df.pseudocode pseudo2, f.assembly asm1, df.assembly asm2, f.pseudocode_primes pseudo_primes1, df.pseudocode_primes pseudo_primes2, f.nodes bb1, df.nodes bb2, cast(f.md_index as real) md1, cast(df.md_index as real) md2 from functions f, diff.functions df where f.name = %s and df.name = %s""" % (repr(name1), repr(name2)) self.bindiff.add_matches_from_query_ratio(sql, self.bindiff.best_chooser, self.bindiff.partial_chooser) for chooser in [self.bindiff.best_chooser, self.bindiff.partial_chooser, self.bindiff.unreliable_chooser]: chooser.Refresh() def OnSelectionChange(self, sel_list): self.selected_items = sel_list def seems_false_positive(self, item): name1 = item[2] name2 = item[4] name1 = name1.rstrip("_0") name2 = name2.rstrip("_0") if not name1.startswith("sub_") and not name2.startswith("sub_"): if name1 != name2: if name2.find(name1) == -1 and not name1.find(name2) == -1: return True return False def OnGetLineAttr(self, n): if not self.title.startswith("Unmatched"): item = self.items[n] ratio = float(item[5]) if self.seems_false_positive(item): return [LITTLE_ORANGE, 0] else: red = int(164 * (1 - ratio)) green = int(128 * ratio) blue = int(255 * (1 - ratio)) color = int("0x%02x%02x%02x" % (blue, green, red), 16) return [color, 0] return [0xFFFFFF, 0] #------------------------------------------------------------------------------- class CBinDiffExporterSetup(Form): def __init__(self): s = r"""Diaphora Please select the path to the SQLite database to save the current IDA database and the path of the SQLite database to diff against. If no SQLite diff database is selected, it will just export the current IDA database to SQLite format. Leave the 2nd field empty if you are exporting the first database. SQLite databases: Export filter limits: <#Select a file to export the current IDA database to SQLite format#Export IDA database to SQLite :{iFileSave}> <#Minimum address to find functions to export#From address:{iMinEA}> <#Select the SQLite database to diff against #SQLite database to diff against:{iFileOpen}> <#Maximum address to find functions to export#To address :{iMaxEA}> <Use the decompiler if available:{rUseDecompiler}> <Do not export library and thunk functions:{rExcludeLibraryThunk}> <#Enable if you want neither sub_* functions nor library functions to be exported#Export only non-IDA generated functions:{rNonIdaSubs}> <#Export only function summaries, not all instructions. Showing differences in a graph between functions will not be available.#Do not export instructions and basic blocks:{rFuncSummariesOnly}> <Use probably unreliable methods:{rUnreliable}> <Recommended to disable with databases with more than 5.000 functions#Use slow heuristics:{rSlowHeuristics}> <#Enable this option if you aren't interested in small changes#Relaxed calculations of differences ratios:{rRelaxRatio}> <Use experimental heuristics:{rExperimental}> <#Enable this option to ignore sub_* names for the 'Same name' heuristic.#Ignore automatically generated names:{rIgnoreSubNames}> <#Enable this option to ignore all function names for the 'Same name' heuristic.#Ignore all function names:{rIgnoreAllNames}> <#Enable this option to ignore thunk functions, nullsubs, etc....#Ignore small functions:{rIgnoreSmallFunctions}>{cGroup1}> Project specific rules: <#Select the project specific Python script rules#Python script:{iProjectSpecificRules}> NOTE: Don't select IDA database files (.IDB, .I64) as only SQLite databases are considered. """ args = {'iFileSave': Form.FileInput(save=True, swidth=40), 'iFileOpen': Form.FileInput(open=True, swidth=40), 'iMinEA': Form.NumericInput(tp=Form.FT_HEX, swidth=22), 'iMaxEA': Form.NumericInput(tp=Form.FT_HEX, swidth=22), 'cGroup1' : Form.ChkGroupControl(("rUseDecompiler", "rExcludeLibraryThunk", "rUnreliable", "rNonIdaSubs", "rSlowHeuristics", "rRelaxRatio", "rExperimental", "rFuncSummariesOnly", "rIgnoreSubNames", "rIgnoreAllNames", "rIgnoreSmallFunctions")), 'iProjectSpecificRules' : Form.FileInput(open=True)} Form.__init__(self, s, args) def set_options(self, opts): if opts.file_out is not None: self.iFileSave.value = opts.file_out if opts.file_in is not None: self.iFileOpen.value = opts.file_in if opts.project_script is not None: self.iProjectSpecificRules.value = opts.project_script self.rUseDecompiler.checked = opts.use_decompiler self.rExcludeLibraryThunk.checked = opts.exclude_library_thunk self.rUnreliable.checked = opts.unreliable self.rSlowHeuristics.checked = opts.slow self.rRelaxRatio.checked = opts.relax self.rExperimental.checked = opts.experimental self.iMinEA.value = opts.min_ea self.iMaxEA.value = opts.max_ea self.rNonIdaSubs.checked = opts.ida_subs == False self.rIgnoreSubNames.checked = opts.ignore_sub_names self.rIgnoreAllNames.checked = opts.ignore_all_names self.rIgnoreSmallFunctions.checked = opts.ignore_small_functions self.rFuncSummariesOnly.checked = opts.func_summaries_only def get_options(self): opts = dict( file_out = self.iFileSave.value, file_in = self.iFileOpen.value, use_decompiler = self.rUseDecompiler.checked, exclude_library_thunk = self.rExcludeLibraryThunk.checked, unreliable = self.rUnreliable.checked, slow = self.rSlowHeuristics.checked, relax = self.rRelaxRatio.checked, experimental = self.rExperimental.checked, min_ea = self.iMinEA.value, max_ea = self.iMaxEA.value, ida_subs = self.rNonIdaSubs.checked == False, ignore_sub_names = self.rIgnoreSubNames.checked, ignore_all_names = self.rIgnoreAllNames.checked, ignore_small_functions = self.rIgnoreSmallFunctions.checked, func_summaries_only = self.rFuncSummariesOnly.checked, project_script = self.iProjectSpecificRules.value ) return BinDiffOptions(**opts) #------------------------------------------------------------------------------- class timeraction_t(object): def __init__(self, func, args, interval): self.func = func self.args = args self.interval = interval self.obj = idaapi.register_timer(self.interval, self) if self.obj is None: raise RuntimeError("Failed to register timer") def __call__(self): if self.args is not None: self.func(self.args) else: self.func() return -1 #------------------------------------------------------------------------------- class uitimercallback_t(object): def __init__(self, g, interval): self.interval = interval self.obj = idaapi.register_timer(self.interval, self) if self.obj is None: raise RuntimeError("Failed to register timer") self.g = g def __call__(self): f = find_widget(self.g._title) activate_widget(f, 1) process_ui_action("GraphZoomFit", 0) return -1 #------------------------------------------------------------------------------- class CDiffGraphViewer(GraphViewer): def __init__(self, title, g, colours): try: GraphViewer.__init__(self, title, False) self.graph = g[0] self.relations = g[1] self.nodes = {} self.colours = colours except: warning("CDiffGraphViewer: OnInit!!! " + str(sys.exc_info()[1])) def OnRefresh(self): try: self.Clear() self.nodes = {} for key in self.graph: self.nodes[key] = self.AddNode([key, self.graph[key]]) for key in self.relations: if not key in self.nodes: self.nodes[key] = self.AddNode([key, [[0, 0, ""]]]) parent_node = self.nodes[key] for child in self.relations[key]: if not child in self.nodes: self.nodes[child] = self.AddNode([child, [[0, 0, ""]]]) child_node = self.nodes[child] self.AddEdge(parent_node, child_node) return True except: print("GraphViewer Error:", sys.exc_info()[1]) return True def OnGetText(self, node_id): try: ea, rows = self[node_id] if ea in self.colours: colour = self.colours[ea] else: colour = 0xFFFFFF ret = [] for row in rows: ret.append(row[2]) label = "\n".join(ret) return (label, colour) except: print("GraphViewer.OnGetText:", sys.exc_info()[1]) return ("ERROR", 0x000000) def Show(self): return GraphViewer.Show(self) #------------------------------------------------------------------------------- class CIdaMenuHandlerShowChoosers(idaapi.action_handler_t): def __init__(self): idaapi.action_handler_t.__init__(self) def activate(self, ctx): show_choosers() return 1 def update(self, ctx): return idaapi.AST_ENABLE_ALWAYS #------------------------------------------------------------------------------- class CIdaMenuHandlerSaveResults(idaapi.action_handler_t): def __init__(self): idaapi.action_handler_t.__init__(self) def activate(self, ctx): save_results() return 1 def update(self, ctx): return idaapi.AST_ENABLE_ALWAYS #------------------------------------------------------------------------------- class CIdaMenuHandlerLoadResults(idaapi.action_handler_t): def __init__(self): idaapi.action_handler_t.__init__(self) def activate(self, ctx): load_results() return 1 def update(self, ctx): return idaapi.AST_ENABLE_ALWAYS #------------------------------------------------------------------------------- class CIDABinDiff(diaphora.CBinDiff): def __init__(self, db_name): diaphora.CBinDiff.__init__(self, db_name, chooser=CIDAChooser) self.decompiler_available = True self.names = dict(Names()) self.min_ea = get_inf_attr(INF_MIN_EA) self.max_ea = get_inf_attr(INF_MAX_EA) self.project_script = None self.hooks = None def load_hooks(self): if self.project_script is None or self.project_script == "": return True try: log("Loading project specific Python script %s" % self.project_script) module = imp.load_source("diaphora_hooks", self.project_script) except: print("Error loading project specific Python script: %s" % str(sys.exc_info()[1])) return False if module is None: # How can it be? return False keys = dir(module) if 'HOOKS' not in keys: log("Error: The project specific script doesn't export the HOOKS dictionary") return False hooks = module.HOOKS if 'DiaphoraHooks' not in hooks: log("Error: The project specific script exports the HOOK dictionary but it doesn't contain a 'DiaphoraHooks' entry.") return False hook_class = hooks["DiaphoraHooks"] self.hooks = hook_class(self) return True def refresh(self): idaapi.request_refresh(0xFFFFFFFF) def show_choosers(self, force=False): if len(self.best_chooser.items) > 0: self.best_chooser.show(force) if len(self.partial_chooser.items) > 0: self.partial_chooser.show(force) if self.unreliable_chooser is not None and len(self.unreliable_chooser.items) > 0: self.unreliable_chooser.show(force) if self.unmatched_primary is not None and len(self.unmatched_primary.items) > 0: self.unmatched_primary.show(force) if self.unmatched_second is not None and len(self.unmatched_second.items) > 0: self.unmatched_second.show(force) def diff(self, db): res = diaphora.CBinDiff.diff(self, db) if res: # And, finally, show the list of best and partial matches and # register the hotkey for re-opening results self.show_choosers() self.register_menu() hide_wait_box() return res def get_last_crash_func(self): sql = "select address from functions order by id desc limit 1" cur = self.db_cursor() cur.execute(sql) row = cur.fetchone() if not row: return None address = int(row[0]) cur.close() return address def recalculate_primes(self): sql = "select primes_value from functions" callgraph_primes = 1 callgraph_all_primes = {} cur = self.db_cursor() cur.execute(sql) for row in cur.fetchall(): ret = row[0] callgraph_primes *= decimal.Decimal(row[0]) try: callgraph_all_primes[ret] += 1 except KeyError: callgraph_all_primes[ret] = 1 cur.close() return callgraph_primes, callgraph_all_primes def do_export(self, crashed_before = False): callgraph_primes = 1 callgraph_all_primes = {} func_list = list(Functions(self.min_ea, self.max_ea)) total_funcs = len(func_list) t = time.time() if crashed_before: start_func = self.get_last_crash_func() if start_func is None: warning("Diaphora cannot resume the previous crashed session, the export process will start from scratch.") crashed_before = False else: callgraph_primes, callgraph_all_primes = self.recalculate_primes() self.db.commit() self.db.execute("PRAGMA synchronous = OFF") self.db.execute("PRAGMA journal_mode = MEMORY") self.db.execute("BEGIN transaction") i = 0 for func in func_list: if user_cancelled(): raise Exception("Canceled.") i += 1 if (total_funcs >= 100) and i % (int(total_funcs/100)) == 0 or i == 1: line = "Exported %d function(s) out of %d total.\nElapsed %d:%02d:%02d second(s), remaining time ~%d:%02d:%02d" elapsed = time.time() - t remaining = (elapsed / i) * (total_funcs - i) m, s = divmod(remaining, 60) h, m = divmod(m, 60) m_elapsed, s_elapsed = divmod(elapsed, 60) h_elapsed, m_elapsed = divmod(m_elapsed, 60) replace_wait_box(line % (i, total_funcs, h_elapsed, m_elapsed, s_elapsed, h, m, s)) if crashed_before: rva = func - self.get_base_address() if rva != start_func: continue # When we get to the last function that was previously exported, switch # off the 'crash' flag and continue with the next row. crashed_before = False continue props = self.read_function(func) if props == False: continue ret = props[11] callgraph_primes *= decimal.Decimal(ret) try: callgraph_all_primes[ret] += 1 except KeyError: callgraph_all_primes[ret] = 1 self.save_function(props) # Try to fix bug #30 and, also, try to speed up operations as # doing a commit every 10 functions, as before, is overkill. if total_funcs > 5000 and i % (total_funcs/10) == 0: self.db.commit() self.db.execute("PRAGMA synchronous = OFF") self.db.execute("PRAGMA journal_mode = MEMORY") self.db.execute("BEGIN transaction") md5sum = GetInputFileMD5() self.save_callgraph(str(callgraph_primes), json.dumps(callgraph_all_primes), md5sum) self.export_structures() self.export_til() replace_wait_box("Creating indexes...") self.create_indexes() def export(self): if self.project_script is not None: log("Loading project specific Python script...") if not self.load_hooks(): return False crashed_before = False crash_file = "%s-crash" % self.db_name if os.path.exists(crash_file): log("Resuming a previously crashed session...") crashed_before = True log("Creating crash file %s..." % crash_file) with open(crash_file, "wb") as f: f.close() try: show_wait_box("Exporting database") self.do_export(crashed_before) finally: hide_wait_box() self.db.commit() log("Removing crash file %s-crash..." % self.db_name) os.remove("%s-crash" % self.db_name) cur = self.db_cursor() cur.execute("analyze") cur.close() self.db_close() def import_til(self): log("Importing type libraries...") cur = self.db_cursor() sql = "select name from diff.program_data where type = 'til'" cur.execute(sql) for row in cur.fetchall(): til = row["name"] if type(til) is bytes: til = til.decode("utf-8") try: add_default_til(til) except: log("Error loading til %s: %s" % (row["name"], str(sys.exc_info()[1]))) cur.close() auto_wait() def import_definitions(self): cur = self.db_cursor() sql = "select type, name, value from diff.program_data where type in ('structure', 'struct', 'enum')" cur.execute(sql) rows = diaphora.result_iter(cur) new_rows = set() for row in rows: if row["name"] is None: continue the_name = row["name"].split(" ")[0] if get_struc_id(the_name) == BADADDR: type_name = "struct" if row["type"] == "enum": type_name = "enum" elif row["type"] == "union": type_name == "union" new_rows.add(row) line = "%s %s;" % (type_name, row["name"]) try: ret = idc.parse_decls(line) if ret != 0: pass except: log("Error importing type: %s" % str(sys.exc_info()[1])) for _ in range(10): for row in new_rows: if row["name"] is None: continue the_name = row["name"].split(" ")[0] if get_struc_id(the_name) == BADADDR and get_struc_id(row["name"]) == BADADDR: definition = self.get_valid_definition(row["value"]) ret = idc.parse_decls(definition) # Remove the "idc." to reproduce some strange behaviour if ret != 0: pass cur.close() auto_wait() def reinit(self, main_db, diff_db, create_choosers=True): log("Main database '%s'." % main_db) log("Diff database '%s'." % diff_db) self.__init__(main_db) self.attach_database(diff_db) if create_choosers: self.create_choosers() def import_definitions_only(self, filename): self.reinit(":memory:", filename) self.import_til() self.import_definitions() def show_asm_diff(self, item): cur = self.db_cursor() sql = """select * from ( select prototype, assembly, name, 1 from functions where address = ? and assembly is not null union select prototype, assembly, name, 2 from diff.functions where address = ? and assembly is not null) order by 4 asc""" ea1 = str(int(item[1], 16)) ea2 = str(int(item[3], 16)) cur.execute(sql, (ea1, ea2)) rows = cur.fetchall() if len(rows) != 2: warning("Sorry, there is no assembly available for either the first or the second database.") else: row1 = rows[0] row2 = rows[1] html_diff = CHtmlDiff() asm1 = self.prettify_asm(row1["assembly"]) asm2 = self.prettify_asm(row2["assembly"]) buf1 = "%s proc near\n%s\n%s endp" % (row1["name"], asm1, row1["name"]) buf2 = "%s proc near\n%s\n%s endp" % (row2["name"], asm2, row2["name"]) fmt = HtmlFormatter() fmt.noclasses = True fmt.linenos = False fmt.nobackground = True src = html_diff.make_file(buf1.split("\n"), buf2.split("\n"), fmt, NasmLexer()) title = "Diff assembler %s - %s" % (row1["name"], row2["name"]) cdiffer = CHtmlViewer() cdiffer.Show(src, title) cur.close() def import_one(self, item): ret = ask_yn(1, "AUTOHIDE DATABASE\nDo you want to import all the type libraries, structs and enumerations?") if ret == 1: # Import all the type libraries from the diff database self.import_til() # Import all the struct and enum definitions self.import_definitions() elif ret == -1: return # Import just the selected item ea1 = str(int(item[1], 16)) ea2 = str(int(item[3], 16)) self.do_import_one(ea1, ea2, True) new_func = self.read_function(str(ea1)) self.delete_function(ea1) self.save_function(new_func) self.db.commit() self.update_choosers() def show_asm(self, item, primary): cur = self.db_cursor() if primary: db = "main" else: db = "diff" ea = str(int(item[1], 16)) sql = "select prototype, assembly, name from %s.functions where address = ?" sql = sql % db cur.execute(sql, (ea, )) row = cur.fetchone() if row is None: warning("Sorry, there is no assembly available for the selected function.") else: fmt = HtmlFormatter() fmt.noclasses = True fmt.linenos = True asm = self.prettify_asm(row["assembly"]) final_asm = "; %s\n%s proc near\n%s\n%s endp\n" final_asm = final_asm % (row["prototype"], row["name"], asm, row["name"]) src = highlight(final_asm, NasmLexer(), fmt) title = "Assembly for %s" % row["name"] cdiffer = CHtmlViewer() cdiffer.Show(src, title) cur.close() def show_pseudo(self, item, primary): cur = self.db_cursor() if primary: db = "main" else: db = "diff" ea = str(int(item[1], 16)) sql = "select prototype, pseudocode, name from %s.functions where address = ?" sql = sql % db cur.execute(sql, (str(ea), )) row = cur.fetchone() if row is None or row["prototype"] is None or row["pseudocode"] is None: warning("Sorry, there is no pseudo-code available for the selected function.") else: fmt = HtmlFormatter() fmt.noclasses = True fmt.linenos = True func = "%s\n%s" % (row["prototype"], row["pseudocode"]) src = highlight(func, CppLexer(), fmt) title = "Pseudo-code for %s" % row["name"] cdiffer = CHtmlViewer() cdiffer.Show(src, title) cur.close() def show_pseudo_diff(self, item, html = True): cur = self.db_cursor() sql = """select * from ( select prototype, pseudocode, name, 1 from functions where address = ? and pseudocode is not null union select prototype, pseudocode, name, 2 from diff.functions where address = ? and pseudocode is not null) order by 4 asc""" ea1 = str(int(item[1], 16)) ea2 = str(int(item[3], 16)) cur.execute(sql, (ea1, ea2)) rows = cur.fetchall() if len(rows) != 2: warning("Sorry, there is no pseudo-code available for either the first or the second database.") else: row1 = rows[0] row2 = rows[1] html_diff = CHtmlDiff() proto1 = self.decompile_and_get(int(ea1)) if proto1: buf1 = proto1 + "\n" + "\n".join(self.pseudo[int(ea1)]) else: log("warning: cannot retrieve the current pseudo-code for the function, using the previously saved one...") buf1 = row1["prototype"] + "\n" + row1["pseudocode"] buf2 = row2["prototype"] + "\n" + row2["pseudocode"] if buf1 == buf2: warning("Both pseudo-codes are equal.") return fmt = HtmlFormatter() fmt.noclasses = True fmt.linenos = False fmt.nobackground = True if not html: uni_diff = difflib.unified_diff(buf1.split("\n"), buf2.split("\n")) tmp = [] for line in uni_diff: tmp.append(line.strip("\n")) tmp = tmp[2:] buf = "\n".join(tmp) src = highlight(buf, DiffLexer(), fmt) else: src = html_diff.make_file(buf1.split("\n"), buf2.split("\n"), fmt, CppLexer()) title = "Diff pseudo-code %s - %s" % (row1["name"], row2["name"]) cdiffer = CHtmlViewer() cdiffer.Show(src, title) cur.close() def graph_diff(self, ea1, name1, ea2, name2): g1 = self.get_graph(str(ea1), True) g2 = self.get_graph(str(ea2)) if g1 == ({}, {}) or g2 == ({}, {}): warning("Sorry, graph information is not available for one of the databases.") return False colours = self.compare_graphs(g1, ea1, g2, ea2) title1 = "Graph for %s (primary)" % name1 title2 = "Graph for %s (secondary)" % name2 graph1 = CDiffGraphViewer(title1, g1, colours[0]) graph2 = CDiffGraphViewer(title2, g2, colours[1]) graph1.Show() graph2.Show() set_dock_pos(title1, title2, DP_RIGHT) uitimercallback_t(graph1, 100) uitimercallback_t(graph2, 100) def import_instruction(self, ins_data1, ins_data2): ea1 = self.get_base_address() + int(ins_data1[0]) ea2, cmt1, cmt2, name, mtype, mdis, mcmt, mitp = ins_data2 # Set instruction level comments if cmt1 is not None and get_cmt(ea1, 0) is None: set_cmt(ea1, cmt1, 0) if cmt2 is not None and get_cmt(ea1, 1) is None: set_cmt(ea1, cmt2, 1) if mcmt is not None: cfunc = decompile(ea1) if cfunc is not None: tl = idaapi.treeloc_t() tl.ea = ea1 tl.itp = mitp comment = mcmt cfunc.set_user_cmt(tl, comment) cfunc.save_user_cmts() tmp_ea = None set_type = False data_refs = list(DataRefsFrom(ea1)) if len(data_refs) > 0: # Global variables tmp_ea = data_refs[0] if tmp_ea in self.names: curr_name = get_ea_name(tmp_ea) if curr_name != name and self.is_auto_generated(curr_name): set_name(tmp_ea, name, SN_CHECK) set_type = False else: # If it's an object, we don't want to rename the offset, we want to # rename the true global variable. if is_off(get_full_flags(tmp_ea), OPND_ALL): tmp_ea = next(DataRefsFrom(tmp_ea), tmp_ea) set_name(tmp_ea, name, SN_CHECK) set_type = True else: # Functions code_refs = list(CodeRefsFrom(ea1, 0)) if len(code_refs) == 0: code_refs = list(CodeRefsFrom(ea1, 1)) if len(code_refs) > 0: curr_name = get_ea_name(code_refs[0]) if curr_name != name and self.is_auto_generated(curr_name): set_name(code_refs[0], name, SN_CHECK) tmp_ea = code_refs[0] set_type = True if tmp_ea is not None and set_type: if mtype is not None and idc.get_type(tmp_ea) != mtype: if type(mtype) is bytes: mtype = mtype.decode("utf-8") SetType(tmp_ea, mtype) def row_is_importable(self, ea2, import_syms): ea = str(ea2) if not ea in import_syms: return False # Has cmt1 if import_syms[ea][1] is not None: return True # Has cmt2 if import_syms[ea][2] is not None: return True # Has a name if import_syms[ea][2] is not None: return True # Has pseudocode comment if import_syms[ea][6] is not None: return True return False def import_instruction_level(self, ea1, ea2, cur): cur = self.db_cursor() try: # Check first if we have any importable items sql = """ select ins.address ea, ins.disasm dis, ins.comment1 cmt1, ins.comment2 cmt2, ins.name name, ins.type type, ins.pseudocomment cmt, ins.pseudoitp itp from diff.function_bblocks bb, diff.functions f, diff.bb_instructions bbi, diff.instructions ins where f.id = bb.function_id and bbi.basic_block_id = bb.basic_block_id and ins.id = bbi.instruction_id and f.address = ? and (ins.comment1 is not null or ins.comment2 is not null or ins.name is not null or pseudocomment is not null) """ cur.execute(sql, (str(ea2),)) import_rows = cur.fetchall() if len(import_rows) > 0: import_syms = {} for row in import_rows: import_syms[row["ea"]] = [row["ea"], row["cmt1"], row["cmt2"], row["name"], row["type"], row["dis"], row["cmt"], row["itp"]] # Check in the current database sql = """ select distinct ins.address ea, ins.disasm dis, ins.comment1 cmt1, ins.comment2 cmt2, ins.name name, ins.type type, ins.pseudocomment cmt, ins.pseudoitp itp from function_bblocks bb, functions f, bb_instructions bbi, instructions ins where f.id = bb.function_id and bbi.basic_block_id = bb.basic_block_id and ins.id = bbi.instruction_id and f.address = ?""" cur.execute(sql, (str(ea1),)) match_rows = cur.fetchall() if len(match_rows) > 0: matched_syms = {} for row in match_rows: matched_syms[row["ea"]] = [row["ea"], row["cmt1"], row["cmt2"], row["name"], row["type"], row["dis"], row["cmt"], row["itp"]] # We have 'something' to import, let's diff the assembly... sql = """select * from ( select assembly, assembly_addrs, 1 from functions where address = ? and assembly is not null union select assembly, assembly_addrs, 2 from diff.functions where address = ? and assembly is not null) order by 2 asc""" cur.execute(sql, (str(ea1), str(ea2))) diff_rows = cur.fetchall() if len(diff_rows) > 0: lines1 = diff_rows[0]["assembly"] lines2 = diff_rows[1]["assembly"] address1 = json.loads(diff_rows[0]["assembly_addrs"]) address2 = json.loads(diff_rows[1]["assembly_addrs"]) diff_list = difflib._mdiff(lines1.splitlines(1), lines2.splitlines(1)) for x in diff_list: left, right, ignore = x left_line = left[0] right_line = right[0] if right_line == "" or left_line == "": continue # At this point, we know which line number matches with # which another line number in both databases. ea1 = address1[int(left_line)-1] ea2 = address2[int(right_line)-1] changed = left[1].startswith('\x00-') and right[1].startswith('\x00+') is_importable = self.row_is_importable(ea2, import_syms) if changed or is_importable: ea1 = str(ea1) ea2 = str(ea2) if ea2 in matched_syms and ea1 in import_syms: self.import_instruction(matched_syms[ea2], import_syms[ea1]) finally: cur.close() def do_import_one(self, ea1, ea2, force = False): cur = self.db_cursor() sql = "select prototype, comment, mangled_function, function_flags from diff.functions where address = ?" cur.execute(sql, (str(ea2),)) row = cur.fetchone() if row is not None: proto = row["prototype"] comment = row["comment"] name = row["mangled_function"] flags = row["function_flags"] ea1 = int(ea1) if not name.startswith("sub_") or force: if not set_name(ea1, name, SN_NOWARN|SN_NOCHECK): for i in range(10): if set_name(ea1, "%s_%d" % (name, i), SN_NOWARN|SN_NOCHECK): break if proto is not None and proto != "int()": SetType(ea1, proto) if comment is not None and comment != "": func = get_func(ea1) if func is not None: set_func_cmt(func, comment, 1) if flags is not None: set_func_attr(ea1, FUNCATTR_FLAGS, flags) self.import_instruction_level(ea1, ea2, cur) cur.close() def import_selected(self, items, selected, only_auto): # Import all the type libraries from the diff database self.import_til() # Import all the struct and enum definitions self.import_definitions() new_items = [] for index in selected: item = items[index] name1 = item[2] if not only_auto or name1.startswith("sub_"): new_items.append(item) self.import_items(new_items) def import_items(self, items): to_import = set() # Import all the function names and comments for item in items: ea1 = str(int(item[1], 16)) ea2 = str(int(item[3], 16)) self.do_import_one(ea1, ea2) to_import.add(ea1) try: show_wait_box("Updating primary database...") total = 0 for ea in to_import: ea = str(ea) new_func = self.read_function(ea) self.delete_function(ea) self.save_function(new_func) total += 1 self.db.commit() # Update the choosers after importing self.update_choosers() finally: hide_wait_box() def update_choosers(self): for chooser in [self.best_chooser, self.partial_chooser, self.unreliable_chooser]: for i, item in enumerate(chooser.items): ea = int(item[1], 16) name = item[2] func_name = get_func_name(ea) if func_name is not None and func_name != "" and func_name != name: chooser.items[i][2] = func_name chooser.Refresh() def do_import_all(self, items): # Import all the type libraries from the diff database self.import_til() # Import all the struct and enum definitions self.import_definitions() # Import all the items in the chooser self.import_items(items) def do_import_all_auto(self, items): # Import all the type libraries from the diff database self.import_til() # Import all the struct and enum definitions self.import_definitions() # Import all the items in the chooser for sub_* functions new_items = [] for item in items: name1 = item[2] if name1.startswith("sub_"): new_items.append(item) self.import_items(new_items) def import_all(self, items): try: self.do_import_all(items) except: log("import_all(): %s" % str(sys.exc_info()[1])) traceback.print_exc() def import_all_auto(self, items): try: self.do_import_all_auto(items) except: log("import_all(): %s" % str(sys.exc_info()[1])) traceback.print_exc() def do_decompile(self, f): if IDA_SDK_VERSION >= 730: return decompile(f, flags=DECOMP_NO_WAIT) return decompile(f) def decompile_and_get(self, ea): if not self.decompiler_available: return False decompiler_plugin = os.getenv("DIAPHORA_DECOMPILER_PLUGIN") if decompiler_plugin is None: decompiler_plugin = "hexrays" if not init_hexrays_plugin() and not (load_plugin(decompiler_plugin) and init_hexrays_plugin()): self.decompiler_available = False return False f = get_func(ea) if f is None: return False cfunc = self.do_decompile(f) if cfunc is None: # Failed to decompile return False visitor = CAstVisitor(cfunc) visitor.apply_to(cfunc.body, None) self.pseudo_hash[ea] = visitor.primes_hash cmts = idaapi.restore_user_cmts(cfunc.entry_ea) if cmts is not None: for tl, cmt in cmts.items(): self.pseudo_comments[tl.ea - self.get_base_address()] = [str(cmt), tl.itp] sv = cfunc.get_pseudocode() self.pseudo[ea] = [] first_line = None for sline in sv: line = tag_remove(sline.line) if line.startswith("//"): continue if first_line is None: first_line = line else: self.pseudo[ea].append(line) return first_line def guess_type(self, ea): t = guess_type(ea) if not self.use_decompiler_always: return t else: try: ret = self.decompile_and_get(ea) if ret: t = ret except: log("Cannot decompile 0x%x: %s" % (ea, str(sys.exc_info()[1]))) return t def register_menu_action(self, action_name, action_desc, handler, hotkey = None): show_choosers_action = idaapi.action_desc_t( action_name, action_desc, handler, hotkey, None, -1) idaapi.register_action(show_choosers_action) idaapi.attach_action_to_menu( 'Edit/Plugins/%s' % action_desc, action_name, idaapi.SETMENU_APP) def register_menu(self): global g_bindiff g_bindiff = self menu_items = [ ['diaphora:show_results', 'Diaphora - Show results', CIdaMenuHandlerShowChoosers(), "F3"], ['diaphora:save_results', 'Diaphora - Save results', CIdaMenuHandlerSaveResults(), None], ['diaphora:load_results', 'Diaphora - Load results', CIdaMenuHandlerLoadResults(), None] ] for item in menu_items: action_name, action_desc, action_handler, hotkey = item self.register_menu_action(action_name, action_desc, action_handler, hotkey) warning("""AUTOHIDE REGISTRY\nIf you close one tab you can always re-open it by pressing F3 or selecting Edit -> Plugins -> Diaphora - Show results""") # Ripped out from REgoogle def constant_filter(self, value): """Filter for certain constants/immediate values. Not all values should be taken into account for searching. Especially not very small values that may just contain the stack frame size. @param value: constant value @type value: int @return: C{True} if value should be included in query. C{False} otherwise """ # no small values if value < 0x1000: return False if value & 0xFFFFFF00 == 0xFFFFFF00 or value & 0xFFFF00 == 0xFFFF00 or \ value & 0xFFFFFFFFFFFFFF00 == 0xFFFFFFFFFFFFFF00 or \ value & 0xFFFFFFFFFFFF00 == 0xFFFFFFFFFFFF00: return False #no single bits sets - mostly defines / flags for i in range(64): if value == (1 << i): return False return True def is_constant(self, oper, ea): value = oper.value # make sure, its not a reference but really constant if value in DataRefsFrom(ea): return False return True def read_function(self, f, discard=False): name = get_func_name(int(f)) true_name = name demangle_named_name = demangle_name(name, INF_SHORT_DN) if demangle_named_name == "": demangle_named_name = None if demangle_named_name is not None: name = demangle_named_name if self.hooks is not None: ret = self.hooks.before_export_function(f, name) if not ret: return ret f = int(f) func = get_func(f) if not func: log("Cannot get a function object for 0x%x" % f) return False flow = FlowChart(func) size = 0 if not self.ida_subs: # Unnamed function, ignore it... if name.startswith("sub_") or name.startswith("j_") or name.startswith("unknown") or name.startswith("nullsub_"): return False # Already recognized runtime's function? flags = get_func_attr(f, FUNCATTR_FLAGS) if flags & FUNC_LIB or flags == -1: return False if self.exclude_library_thunk: # Skip library and thunk functions flags = get_func_attr(f, FUNCATTR_FLAGS) if flags & FUNC_LIB or flags & FUNC_THUNK or flags == -1: return False image_base = self.get_base_address() nodes = 0 edges = 0 instructions = 0 mnems = [] dones = {} names = set() bytes_hash = [] bytes_sum = 0 function_hash = [] outdegree = 0 indegree = len(list(CodeRefsTo(f, 1))) assembly = {} basic_blocks_data = {} bb_relations = {} bb_topo_num = {} bb_topological = {} switches = [] bb_degree = {} bb_edges = [] constants = [] # The callees will be calculated later callees = list() # Calculate the callers callers = list() for caller in list(CodeRefsTo(f, 0)): caller_func = get_func(caller) if caller_func and caller_func.start_ea not in callers: callers.append(caller_func.start_ea) mnemonics_spp = 1 cpu_ins_list = GetInstructionList() cpu_ins_list.sort() for block in flow: if block.end_ea == 0 or block.end_ea == BADADDR: print(("0x%08x: Skipping bad basic block" % f)) continue nodes += 1 instructions_data = [] block_ea = block.start_ea - image_base idx = len(bb_topological) bb_topological[idx] = [] bb_topo_num[block_ea] = idx for x in list(Heads(block.start_ea, block.end_ea)): mnem = print_insn_mnem(x) disasm = GetDisasm(x) size += get_item_size(x) instructions += 1 if mnem in cpu_ins_list: mnemonics_spp *= self.primes[cpu_ins_list.index(mnem)] try: assembly[block_ea].append([x - image_base, disasm]) except KeyError: if nodes == 1: assembly[block_ea] = [[x - image_base, disasm]] else: assembly[block_ea] = [[x - image_base, "loc_%x:" % x], [x - image_base, disasm]] decoded_size, ins = diaphora_decode(x) if ins.ops[0].type in [o_mem, o_imm, o_far, o_near, o_displ]: decoded_size -= ins.ops[0].offb if ins.ops[1].type in [o_mem, o_imm, o_far, o_near, o_displ]: decoded_size -= ins.ops[1].offb if decoded_size <= 0: decoded_size = 1 for oper in ins.ops: if oper.type == o_imm: if self.is_constant(oper, x) and self.constant_filter(oper.value): constants.append(oper.value) drefs = list(DataRefsFrom(x)) if len(drefs) > 0: for dref in drefs: if get_func(dref) is None: str_constant = get_strlit_contents(dref, -1, -1) if str_constant is not None: str_constant = str_constant.decode("utf-8", "backslashreplace") if str_constant not in constants: constants.append(str_constant) curr_bytes = get_bytes(x, decoded_size, False) if curr_bytes is None or len(curr_bytes) != decoded_size: log("Failed to read %d bytes at [%08x]" % (decoded_size, x)) continue bytes_hash.append(curr_bytes) bytes_sum += sum(curr_bytes) function_hash.append(get_bytes(x, get_item_size(x), False)) outdegree += len(list(CodeRefsFrom(x, 0))) mnems.append(mnem) op_value = get_operand_value(x, 1) if op_value == -1: op_value = get_operand_value(x, 0) tmp_name = None if op_value != BADADDR and op_value in self.names: tmp_name = self.names[op_value] demangle_named_name = demangle_name(tmp_name, INF_SHORT_DN) if demangle_named_name is not None: tmp_name = demangle_named_name pos = tmp_name.find("(") if pos > -1: tmp_name = tmp_name[:pos] if not tmp_name.startswith("sub_") and not tmp_name.startswith("nullsub_"): names.add(tmp_name) # Calculate the callees l = list(CodeRefsFrom(x, 0)) for callee in l: callee_func = get_func(callee) if callee_func and callee_func.start_ea != func.start_ea: if callee_func.start_ea not in callees: callees.append(callee_func.start_ea) if len(l) == 0: l = DataRefsFrom(x) tmp_type = None for ref in l: if ref in self.names: tmp_name = self.names[ref] tmp_type = idc.get_type(ref) ins_cmt1 = GetCommentEx(x, 0) ins_cmt2 = GetCommentEx(x, 1) instructions_data.append([x - image_base, mnem, disasm, ins_cmt1, ins_cmt2, tmp_name, tmp_type]) switch = get_switch_info(x) if switch: switch_cases = switch.get_jtable_size() results = calc_switch_cases(x, switch) if results is not None: # It seems that IDAPython for idaq64 has some bug when reading # switch's cases. Do not attempt to read them if the 'cur_case' # returned object is not iterable. can_iter = False switch_cases_values = set() for idx in range(len(results.cases)): cur_case = results.cases[idx] if not '__iter__' in dir(cur_case): break can_iter |= True for cidx in range(len(cur_case)): case_id = cur_case[cidx] switch_cases_values.add(case_id) if can_iter: switches.append([switch_cases, list(switch_cases_values)]) basic_blocks_data[block_ea] = instructions_data bb_relations[block_ea] = [] if block_ea not in bb_degree: # bb in degree, out degree bb_degree[block_ea] = [0, 0] for succ_block in block.succs(): if succ_block.end_ea == 0: continue succ_base = succ_block.start_ea - image_base bb_relations[block_ea].append(succ_base) bb_degree[block_ea][1] += 1 bb_edges.append((block_ea, succ_base)) if succ_base not in bb_degree: bb_degree[succ_base] = [0, 0] bb_degree[succ_base][0] += 1 edges += 1 indegree += 1 if succ_block.id not in dones: dones[succ_block] = 1 for pred_block in block.preds(): if pred_block.end_ea == 0: continue try: bb_relations[pred_block.start_ea - image_base].append(block.start_ea - image_base) except KeyError: bb_relations[pred_block.start_ea - image_base] = [block.start_ea - image_base] edges += 1 outdegree += 1 if pred_block.id not in dones: dones[pred_block] = 1 for block in flow: if block.end_ea == 0: continue block_ea = block.start_ea - image_base for succ_block in block.succs(): if succ_block.end_ea == 0: continue succ_base = succ_block.start_ea - image_base bb_topological[bb_topo_num[block_ea]].append(bb_topo_num[succ_base]) strongly_connected_spp = 0 try: strongly_connected = strongly_connected_components(bb_relations) bb_topological_sorted = robust_topological_sort(bb_topological) bb_topological = json.dumps(bb_topological_sorted) strongly_connected_spp = 1 for item in strongly_connected: val = len(item) if val > 1: strongly_connected_spp *= self.primes[val] except: # XXX: FIXME: The original implementation that we're using is # recursive and can fail. We really need to create our own non # recursive version. strongly_connected = [] bb_topological = None loops = 0 for sc in strongly_connected: if len(sc) > 1: loops += 1 else: if sc[0] in bb_relations and sc[0] in bb_relations[sc[0]]: loops += 1 asm = [] keys = list(assembly.keys()) keys.sort() # Collect the ordered list of addresses, as shown in the assembly # viewer (when diffing). It will be extremely useful for importing # stuff later on. assembly_addrs = [] # After sorting our the addresses of basic blocks, be sure that the # very first address is always the entry point, no matter at what # address it is. keys.remove(f - image_base) keys.insert(0, f - image_base) for key in keys: for line in assembly[key]: assembly_addrs.append(line[0]) asm.append(line[1]) asm = "\n".join(asm) cc = edges - nodes + 2 proto = self.guess_type(f) proto2 = idc.get_type(f) try: prime = str(self.primes[cc]) except: log("Cyclomatic complexity too big: 0x%x -> %d" % (f, cc)) prime = 0 comment = idc.get_func_cmt(f, 1) bytes_hash = md5(b"".join(bytes_hash)).hexdigest() function_hash = md5(b"".join(function_hash)).hexdigest() function_flags = get_func_attr(f, FUNCATTR_FLAGS) pseudo = None pseudo_hash1 = None pseudo_hash2 = None pseudo_hash3 = None pseudo_lines = 0 pseudocode_primes = None if f in self.pseudo: pseudo = "\n".join(self.pseudo[f]) pseudo_lines = len(self.pseudo[f]) pseudo_hash1, pseudo_hash2, pseudo_hash3 = self.kfh.hash_bytes(pseudo).split(";") if pseudo_hash1 == "": pseudo_hash1 = None if pseudo_hash2 == "": pseudo_hash2 = None if pseudo_hash3 == "": pseudo_hash3 = None pseudocode_primes = str(self.pseudo_hash[f]) try: clean_assembly = self.get_cmp_asm_lines(asm) except: clean_assembly = "" print("Error getting assembly for 0x%x" % f) clean_pseudo = self.get_cmp_pseudo_lines(pseudo) md_index = 0 if bb_topological: bb_topo_order = {} for i, scc in enumerate(bb_topological_sorted): for bb in scc: bb_topo_order[bb] = i tuples = [] for src, dst in bb_edges: tuples.append(( bb_topo_order[bb_topo_num[src]], bb_degree[src][0], bb_degree[src][1], bb_degree[dst][0], bb_degree[dst][1],)) rt2, rt3, rt5, rt7 = (decimal.Decimal(p).sqrt() for p in (2, 3, 5, 7)) emb_tuples = (sum((z0, z1 * rt2, z2 * rt3, z3 * rt5, z4 * rt7)) for z0, z1, z2, z3, z4 in tuples) md_index = sum((1 / emb_t.sqrt() for emb_t in emb_tuples)) md_index = str(md_index) seg_rva = x - get_segm_start(x) kgh = CKoretKaramitasHash() kgh_hash = kgh.calculate(f) rva = f - self.get_base_address() l = (name, nodes, edges, indegree, outdegree, size, instructions, mnems, names, proto, cc, prime, f, comment, true_name, bytes_hash, pseudo, pseudo_lines, pseudo_hash1, pseudocode_primes, function_flags, asm, proto2, pseudo_hash2, pseudo_hash3, len(strongly_connected), loops, rva, bb_topological, strongly_connected_spp, clean_assembly, clean_pseudo, mnemonics_spp, switches, function_hash, bytes_sum, md_index, constants, len(constants), seg_rva, assembly_addrs, kgh_hash, None, callers, callees, basic_blocks_data, bb_relations) if self.hooks is not None: d = self.create_function_dictionary(l) d = self.hooks.after_export_function(d) l = self.get_function_from_dictionary(d) return l def get_function_from_dictionary(self, d): l = ( d["name"], d["nodes"], d["edges"], d["indegree"], d["outdegree"], d["size"], d["instructions"], d["mnems"], d["names"], d["proto"], d["cc"], d["prime"], d["f"], d["comment"], d["true_name"], d["bytes_hash"], d["pseudo"], d["pseudo_lines"], d["pseudo_hash1"], d["pseudocode_primes"], d["function_flags"], d["asm"], d["proto2"], d["pseudo_hash2"], d["pseudo_hash3"], d["strongly_connected_size"], d["loops"], d["rva"], d["bb_topological"], d["strongly_connected_spp"], d["clean_assembly"], d["clean_pseudo"], d["mnemonics_spp"], d["switches"], d["function_hash"], d["bytes_sum"], d["md_index"], d["constants"], d["constants_size"], d["seg_rva"], d["assembly_addrs"], d["kgh_hash"], d["callers"], d["callees"], d["basic_blocks_data"], d["bb_relations"], d["userdata"]) return l def create_function_dictionary(self, l): (name, nodes, edges, indegree, outdegree, size, instructions, mnems, names, proto, cc, prime, f, comment, true_name, bytes_hash, pseudo, pseudo_lines, pseudo_hash1, pseudocode_primes, function_flags, asm, proto2, pseudo_hash2, pseudo_hash3, strongly_connected_size, loops, rva, bb_topological, strongly_connected_spp, clean_assembly, clean_pseudo, mnemonics_spp, switches, function_hash, bytes_sum, md_index, constants, constants_size, seg_rva, assembly_addrs, kgh_hash, userdata, callers, callees, basic_blocks_data, bb_relations) = l d = dict( name = name, nodes = nodes, edges = edges, indegree = indegree, outdegree = outdegree, size = size, instructions = instructions, mnems = mnems, names = names, proto = proto, cc = cc, prime = prime, f = f, comment = comment, true_name = true_name, bytes_hash = bytes_hash, pseudo = pseudo, pseudo_lines = pseudo_lines, pseudo_hash1 = pseudo_hash1, pseudocode_primes = pseudocode_primes, function_flags = function_flags, asm = asm, proto2 = proto2, pseudo_hash2 = pseudo_hash2, pseudo_hash3 = pseudo_hash3, strongly_connected_size = strongly_connected_size, loops = loops, rva = rva, bb_topological = bb_topological, strongly_connected_spp = strongly_connected_spp, clean_assembly = clean_assembly, clean_pseudo = clean_pseudo, mnemonics_spp = mnemonics_spp, switches = switches, function_hash = function_hash, bytes_sum = bytes_sum, md_index = md_index, constants = constants, constants_size = constants_size, seg_rva = seg_rva, assembly_addrs = assembly_addrs, kgh_hash = kgh_hash, callers = callers, callees = callees, basic_blocks_data = basic_blocks_data, bb_relations = bb_relations, userdata = userdata) return d def get_base_address(self): return idaapi.get_imagebase() def save_callgraph(self, primes, all_primes, md5sum): cur = self.db_cursor() sql = "insert into main.program (callgraph_primes, callgraph_all_primes, processor, md5sum) values (?, ?, ?, ?)" proc = idaapi.get_idp_name() if BADADDR == 0xFFFFFFFFFFFFFFFF: proc += "64" cur.execute(sql, (primes, all_primes, proc, md5sum)) cur.close() def GetLocalType(self, ordinal, flags): ret = get_local_tinfo(ordinal) if ret is not None: (stype, fields) = ret if stype: name = idc.get_numbered_type_name(ordinal) try: return idc_print_type(stype, fields, name, flags) except: log("Error: %s" % str(sys.exc_info()[1])) return "" return "" def export_structures(self): # It seems that get_ordinal_qty, sometimes, can return negative # numbers, according to one beta-tester. My guess is that it's a bug # in IDA. However, as we cannot reproduce, at least handle this # condition. local_types = idc.get_ordinal_qty() if (local_types & 0x80000000) != 0: log("warning: get_ordinal_qty returned a negative number (0x%x)!" % local_types) return for i in range(local_types): name = idc.get_numbered_type_name(i+1) definition = self.GetLocalType(i+1, PRTYPE_MULTI | PRTYPE_TYPE | PRTYPE_SEMI | PRTYPE_PRAGMA) if definition is None: continue type_name = "struct" if definition.startswith("enum"): type_name = "enum" elif definition.startswith("union"): type_name = "union" # For some reason, IDA my return types with the form "__int128 unsigned", # we want it the right way "unsigned __int128". if name and name.find(" ") > -1: names = name.split(" ") name = names[0] if names[1] == "unsigned": name = "unsigned %s" % name self.add_program_data(type_name, name, definition) def get_til_names(self): idb_path = get_idb_path() filename, ext = os.path.splitext(idb_path) til_path = "%s.til" % filename with open(til_path, "rb") as f: line = f.readline() pos = line.find(b"Local type definitions") if pos > -1: tmp = line[pos+len(b"Local type definitions")+1:] pos = tmp.find(b"\x00") if pos > -1: defs = tmp[:pos].split(b",") return defs return None def export_til(self): til_names = self.get_til_names() if til_names is not None: for til in til_names: self.add_program_data("til", til, None) def load_results(self, filename): results_db = sqlite3.connect(filename, check_same_thread=False) results_db.text_factory = str results_db.row_factory = sqlite3.Row cur = results_db.cursor() try: sql = "select main_db, diff_db, version from config" cur.execute(sql) rows = cur.fetchall() if len(rows) != 1: warning("Malformed results database!") msg("Malformed results database!") return False row = rows[0] version = row["version"] if version != diaphora.VERSION_VALUE: line = "The version of the diff results is %s and current version is %s, there can be some incompatibilities." warning(line % (version, diaphora.VERSION_VALUE)) msg(line) main_db = row["main_db"] diff_db = row["diff_db"] if not os.path.exists(main_db): log("Primary database %s not found." % main_db) main_db = ask_file(0, main_db, "Select the primary database path") if main_db is None: return False if not os.path.exists(diff_db): diff_db = ask_file(0, main_db, "Select the secondary database path") if diff_db is None: return False self.reinit(main_db, diff_db) sql = "select * from results" cur.execute(sql) for row in diaphora.result_iter(cur): if row["type"] == "best": choose = self.best_chooser elif row["type"] == "partial": choose = self.partial_chooser else: choose = self.unreliable_chooser ea1 = int(row["address"], 16) name1 = row["name"] ea2 = int(row["address2"], 16) name2 = row["name2"] desc = row["description"] ratio = float(row["ratio"]) bb1 = int(row["bb1"]) bb2 = int(row["bb2"]) choose.add_item(diaphora.CChooser.Item(ea1, name1, ea2, name2, desc, ratio, bb1, bb2)) sql = "select * from unmatched" cur.execute(sql) for row in diaphora.result_iter(cur): if row["type"] == "primary": choose = self.unmatched_primary else: choose = self.unmatched_second choose.add_item(diaphora.CChooser.Item(int(row["address"], 16), row["name"])) log("Showing diff results.") self.show_choosers() return True finally: cur.close() results_db.close() return False def re_diff(self): self.best_chooser.Close() self.partial_chooser.Close() if self.unreliable_chooser is not None: self.unreliable_chooser.Close() if self.unmatched_primary is not None: self.unmatched_primary.Close() if self.unmatched_second is not None: self.unmatched_second.Close() _diff_or_export(use_ui=True, file_in=self.last_diff_db, project_script = self.project_script) def equal_db(self): are_equal = diaphora.CBinDiff.equal_db(self) if are_equal: if ask_yn(0, "HIDECANCEL\nThe databases seems to be 100% equal. Do you want to continue anyway?") != 1: self.do_continue = False return are_equal #------------------------------------------------------------------------------- def _diff_or_export(use_ui, **options): global g_bindiff total_functions = len(list(Functions())) if get_idb_path() == "" or total_functions == 0: warning("No IDA database opened or no function in the database.\nPlease open an IDA database and create some functions before running this script.") return opts = BinDiffOptions(**options) if use_ui: x = CBinDiffExporterSetup() x.Compile() x.set_options(opts) if not x.Execute(): return opts = x.get_options() if opts.file_out == opts.file_in: warning("Both databases are the same file!") return elif opts.file_out == "" or len(opts.file_out) < 5: warning("No output database selected or invalid filename. Please select a database file.") return elif is_ida_file(opts.file_in) or is_ida_file(opts.file_out): warning("One of the selected databases is an IDA file. Please select only database files") return export = True if os.path.exists(opts.file_out): crash_file = "%s-crash" % opts.file_out resume_crashed = False crashed_before = False if os.path.exists(crash_file): crashed_before = True ret = ask_yn(1, "The previous export session crashed. Do you want to resume the previous crashed session?") if ret == -1: log("Cancelled") return elif ret == 1: resume_crashed = True if not resume_crashed and not crashed_before: ret = ask_yn(0, "Export database already exists. Do you want to overwrite it?") if ret == -1: log("Cancelled") return if ret == 0: export = False if export: if g_bindiff is not None: g_bindiff = None if not resume_crashed: remove_file(opts.file_out) log("Database %s removed" % repr(opts.file_out)) if os.path.exists(crash_file): os.remove(crash_file) t0 = time.time() try: bd = CIDABinDiff(opts.file_out) bd.use_decompiler_always = opts.use_decompiler bd.exclude_library_thunk = opts.exclude_library_thunk bd.unreliable = opts.unreliable bd.slow_heuristics = opts.slow bd.relaxed_ratio = opts.relax bd.experimental = opts.experimental bd.min_ea = opts.min_ea bd.max_ea = opts.max_ea bd.ida_subs = opts.ida_subs bd.ignore_sub_names = opts.ignore_sub_names bd.ignore_all_names = opts.ignore_all_names bd.ignore_small_functions = opts.ignore_small_functions bd.function_summaries_only = opts.func_summaries_only bd.max_processed_rows = diaphora.MAX_PROCESSED_ROWS * max(total_functions / 20000, 1) bd.timeout = diaphora.TIMEOUT_LIMIT * max(total_functions / 20000, 1) bd.project_script = opts.project_script if export: exported = False if os.getenv("DIAPHORA_PROFILE") is not None: log("*** Profiling export ***") import cProfile profiler = cProfile.Profile() profiler.runcall(bd.export) exported = True profiler.print_stats(sort="time") else: try: bd.export() exported = True except KeyboardInterrupt: log("Aborted by user, removing crash file %s-crash..." % opts.file_out) os.remove("%s-crash" % opts.file_out) if exported: log("Database exported. Took {} seconds.".format(time.time() - t0)) hide_wait_box() if opts.file_in != "": if os.getenv("DIAPHORA_PROFILE") is not None: log("*** Profiling diff ***") import cProfile profiler = cProfile.Profile() profiler.runcall(bd.diff, opts.file_in) profiler.print_stats(sort="time") else: bd.diff(opts.file_in) except: print(("Error: %s" % sys.exc_info()[1])) traceback.print_exc() return bd #------------------------------------------------------------------------------- class BinDiffOptions: def __init__(self, **kwargs): total_functions = len(list(Functions())) sqlite_db = os.path.splitext(get_idb_path())[0] + ".sqlite" self.file_out = kwargs.get('file_out', sqlite_db) self.file_in = kwargs.get('file_in', '') self.use_decompiler = kwargs.get('use_decompiler', True) self.exclude_library_thunk = kwargs.get('exclude_library_thunk', True) self.relax = kwargs.get('relax') if self.relax: warning(MSG_RELAXED_RATIO_ENABLED) self.unreliable = kwargs.get('unreliable', False) self.slow = kwargs.get('slow', False) self.experimental = kwargs.get('experimental', False) self.min_ea = kwargs.get('min_ea', get_inf_attr(INF_MIN_EA)) self.max_ea = kwargs.get('max_ea', get_inf_attr(INF_MAX_EA)) self.ida_subs = kwargs.get('ida_subs', True) self.ignore_sub_names = kwargs.get('ignore_sub_names', True) self.ignore_all_names = kwargs.get('ignore_all_names', False) self.ignore_small_functions = kwargs.get('ignore_small_functions', False) # Enable, by default, exporting only function summaries for huge dbs. self.func_summaries_only = kwargs.get('func_summaries_only', total_functions > 100000) # Python script to run for both the export and diffing process self.project_script = kwargs.get('project_script') #------------------------------------------------------------------------------- class CHtmlDiff: """A replacement for difflib.HtmlDiff that tries to enforce a max width The main challenge is to do this given QTextBrowser's limitations. In particular, QTextBrowser only implements a minimum of CSS. """ _html_template = """ <html> <head> <style>%(style)s</style> </head> <body> <table class="diff_tab" cellspacing=0> %(rows)s </table> </body> </html> """ _style = """ table.diff_tab { font-family: Courier, monospace; table-layout: fixed; width: 100%; } table td { white-space: nowrap; overflow: hidden; } .diff_add { background-color: #aaffaa; } .diff_chg { background-color: #ffff77; } .diff_sub { background-color: #ffaaaa; } .diff_lineno { text-align: right; background-color: #e0e0e0; } """ _row_template = """ <tr> <td class="diff_lineno" width="auto">%s</td> <td class="diff_play" nowrap width="45%%">%s</td> <td class="diff_lineno" width="auto">%s</td> <td class="diff_play" nowrap width="45%%">%s</td> </tr> """ _rexp_too_much_space = re.compile("^\t[.\\w]+ {8}") def make_file(self, lhs, rhs, fmt, lex): rows = [] for left, right, changed in difflib._mdiff(lhs, rhs): lno, ltxt = left rno, rtxt = right if not changed: ltxt = highlight(ltxt, lex, fmt) rtxt = highlight(rtxt, lex, fmt) else: ltxt = self._stop_wasting_space(ltxt) rtxt = self._stop_wasting_space(rtxt) ltxt = ltxt.replace(" ", " ") rtxt = rtxt.replace(" ", " ") ltxt = ltxt.replace("<", "<") ltxt = ltxt.replace(">", ">") rtxt = rtxt.replace("<", "<") rtxt = rtxt.replace(">", ">") row = self._row_template % (str(lno), ltxt, str(rno), rtxt) rows.append(row) all_the_rows = "\n".join(rows) all_the_rows = all_the_rows.replace( "\x00+", '<span class="diff_add">').replace( "\x00-", '<span class="diff_sub">').replace( "\x00^", '<span class="diff_chg">').replace( "\x01", '</span>').replace( "\t", 4 * " ") res = self._html_template % {"style": self._style, "rows": all_the_rows} return res def _stop_wasting_space(self, s): """I never understood why you'd want to have 13 spaces between instruction and args' """ m = self._rexp_too_much_space.search(s) if m: mlen = len(m.group(0)) return s[:mlen-4] + s[mlen:] else: return s #------------------------------------------------------------------------------- try: class CAstVisitorInherits(ctree_visitor_t): pass except: class CAstVisitorInherits: pass #------------------------------------------------------------------------------- class CAstVisitor(CAstVisitorInherits): def __init__(self, cfunc): self.primes = primes(4096) ctree_visitor_t.__init__(self, CV_FAST) self.cfunc = cfunc self.primes_hash = 1 return def visit_expr(self, expr): try: self.primes_hash *= self.primes[expr.op] except: traceback.print_exc() return 0 def visit_insn(self, ins): try: self.primes_hash *= self.primes[ins.op] except: traceback.print_exc() return 0 #------------------------------------------------------------------------------- def is_ida_file(filename): filename = filename.lower() return filename.endswith(".idb") or filename.endswith(".i64") or \ filename.endswith(".til") or filename.endswith(".id0") or \ filename.endswith(".id1") or filename.endswith(".nam") #------------------------------------------------------------------------------- def remove_file(filename): try: os.remove(filename) except: # Fix for Bug #5: https://github.com/joxeankoret/diaphora/issues/5 # # For some reason, in Windows, the handle to the SQLite database is # not closed, and I really try to be sure that all the databases are # detached, no cursor is leaked, etc... So, in case we cannot remove # the database file because it's still being used by IDA in Windows # for some unknown reason, just drop the database's tables and after # that continue normally. with sqlite3.connect(filename, check_same_thread=False) as db: cur = db.cursor() try: funcs = ["functions", "program", "program_data", "version", "instructions", "basic_blocks", "bb_relations", "bb_instructions", "function_bblocks"] for func in funcs: db.execute("drop table if exists %s" % func) finally: cur.close() #------------------------------------------------------------------------------- def main(): global g_bindiff if os.getenv("DIAPHORA_AUTO") is not None: file_out = os.getenv("DIAPHORA_EXPORT_FILE") if file_out is None: raise Exception("No export file specified!") use_decompiler = os.getenv("DIAPHORA_USE_DECOMPILER") if use_decompiler is None: use_decompiler = False auto_wait() if os.path.exists(file_out): if g_bindiff is not None: g_bindiff = None remove_file(file_out) log("Database %s removed" % repr(file_out)) bd = CIDABinDiff(file_out) project_script = os.getenv("DIAPHORA_PROJECT_SCRIPT") if project_script is not None: bd.project_script = project_script bd.use_decompiler_always = use_decompiler bd.exclude_library_thunk = bd.get_value_for("exclude_library_thunk", bd.exclude_library_thunk) bd.ida_subs = bd.get_value_for("ida_subs", bd.ida_subs) bd.ignore_sub_names = bd.get_value_for("ignore_sub_names", bd.ignore_sub_names) bd.function_summaries_only = bd.get_value_for("function_summaries_only", bd.function_summaries_only) try: bd.export() except KeyboardInterrupt: log("Aborted by user, removing crash file %s-crash..." % file_out) os.remove("%s-crash" % file_out) idaapi.qexit(0) else: _diff_or_export(True) if __name__ == "__main__": main()