############################################ # Copyright (C) 2018 FireEye, Inc. # # Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or # http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-BSD-3-CLAUSE or # https://opensource.org/licenses/BSD-3-Clause>, at your option. This file may not be # copied, modified, or distributed except according to those terms. # # Author: James T. Bennett # # objc2_analyzer uses emulation to perform analysis of Objective-C code in a Mach-O # # Currently supports Objective-C 2.0 for x86_64, ARM, and ARM64 # selRefs are changed to point to implementation of method where applicable. # xrefs are added from msgSend calls to the implementation where applicable. # Helpful Objective-C syntax comments are added to each msgSend call # Does not track id/sel pointers across function boundaries. # Helps IDA where it failed to track msgSend function pointers across registers. # Tracks ivar types throughout a function # # Dependencies: # https://github.com/fireeye/flare-emu ############################################ from __future__ import print_function import idc import idaapi import idautils import unicorn import unicorn.x86_const import unicorn.arm_const import unicorn.arm64_const import logging import struct import flare_emu import re UNKNOWN = "??" MAX_STR_DISPLAY_LEN = 20 class Objc2Analyzer(): def __init__(self): self.magicMask64 = 0xabbadabbad000000 self.magicMask32 = 0xabba0000 self.magicMaskMask64 = 0xffffffffffff0000 self.magicMaskMask32 = 0xffff0000 self.callMnems = ["call", "jmp", "BL", "BLX", "BLEQ", "BLXEQ", "BLR", "BLREQ", "B"] self.objcData = None self.objcSelRefs = None self.objcMsgRefs = None self.objcConst = None self.objcClassRefs = None self.objcCatList = None self.fixedSelXRefs = [] self.ivarSetters = {} self.notIvarSetters = [] for segVA in idautils.Segments(): segName = idc.get_segm_name(segVA) endVA = idc.get_segm_end(segVA) if segName == "__objc_data": self.objcData = (segVA, endVA) elif segName == "__objc_selrefs": self.objcSelRefs = (segVA, endVA) elif segName == "__objc_msgrefs": self.objcMsgRefs = (segVA, endVA) elif segName == "__objc_const": self.objcConst = (segVA, endVA) elif segName == "__objc_classrefs": self.objcClassRefs = (segVA, endVA) elif segName == "__objc_catlist": self.objcCatList = (segVA, endVA) if self.objcSelRefs or self.objcMsgRefs: self.processObjc() else: logging.debug("this Mach-O does not implement any Objective-C classes") # it appears idc.get_name_ea_simple does not work for selector reference names that end in "_" def selRefLocByName(self, name): if name[:6] == "selRef": addr = self.objcSelRefs[0] endAddr = self.objcSelRefs[1] else: addr = self.objcMsgRefs[0] endAddr = self.objcMsgRefs[1] while addr < endAddr: if idc.get_name(addr, idc.ida_name.GN_VISIBLE) == name: return addr addr = idc.next_head(addr, idc.get_inf_attr(idc.INF_MAX_EA)) def objc2AnalyzeHookX64(self, uc, address, size, userData): try: eh = userData["EmuHelper"] # move ivar ptr into reg instead of value of ivar, so we can check for ivar refs coming up in the code if idc.print_insn_mnem(address) == "mov" and idc.get_operand_type(address, 1) == 2: opval = idc.get_operand_value(address, 1) srcOpName = idc.get_name(opval, idc.ida_name.GN_VISIBLE) if srcOpName[:13] == "_OBJC_IVAR_$_": logging.debug("IVAR reference found for %s, storing %s" % (srcOpName, eh.hexString(opval))) uc.reg_write(eh.regs[idc.print_operand(address, 0)], opval) eh.skipInstruction(userData) return # look for mov instruction with [reg + reg] and check each reg for pointing to ivar, store ivar info in dst # operand if (idc.print_insn_mnem(address) == "mov" and idc.get_operand_type(address, 0) == 1 and idc.get_operand_type(address, 1) in [3, 4]): dstopnd = idc.print_operand(address, 0) srcopnd = idc.print_operand(address, 1) regs = srcopnd[1:-1] if (dstopnd[0] == "r" and ((len(regs) == 7 and regs[3:5] == "+r") or (len(regs) == 9 and regs[3] == "+" and regs[7:] == "+0"))): regs = regs.split("+") reg1 = None reg2 = None if regs[0] in eh.regs: reg1 = eh.getRegVal(regs[0]) if regs[1] in eh.regs: reg2 = eh.getRegVal(regs[1]) logging.debug("possible IVAR reference found @%s, reg1: %s reg2: %s" % ( eh.hexString(address), eh.hexString(reg1), eh.hexString(reg2))) if type(reg1) is long and idc.get_name(reg1, idc.ida_name.GN_VISIBLE)[:13] == "_OBJC_IVAR_$_": uc.reg_write(eh.regs[dstopnd], self.getIvarInfo(eh, reg1, userData)) eh.skipInstruction(userData) return elif type(reg2) is long and idc.get_name(reg2, idc.ida_name.GN_VISIBLE)[:13] == "_OBJC_IVAR_$_": uc.reg_write(eh.regs[dstopnd], self.getIvarInfo(eh, reg2, userData)) eh.skipInstruction(userData) return # track selector xrefs srcOpName = idc.get_name(idc.get_operand_value(address, 1), idc.ida_name.GN_VISIBLE) sel = None if idc.print_insn_mnem(address) == "mov" and srcOpName[:7] == "selRef_": sel = eh.getEmuPtr(idc.get_operand_value(address, 1)) elif idc.print_insn_mnem(address) == "lea" and srcOpName[:7] == "msgRef_": sel = idc.get_operand_value(address, 1) if sel: userData["magicVals"].append((address, sel)) mv = self.magicMask64 | userData["magicValsCount"] userData["magicValsCount"] += 1 uc.reg_write(eh.regs[idc.print_operand(address, 0)], mv) logging.debug("writing magic value %s to %s for %s @%s" % (eh.hexString( mv), idc.print_operand(address, 0), srcOpName, eh.hexString(address))) eh.skipInstruction(userData) return except Exception as e: logging.debug("exception in objc2AnalyzeHookX64 @%s: (%s) %s" % (eh.hexString(address), type(e), e)) print("exception in objc2AnalyzeHookX64 @%s: (%s) %s" % (eh.hexString(address), type(e), e)) eh.stopEmulation(userData) def objc2AnalyzeHookARM(self, uc, address, size, userData): try: eh = userData["EmuHelper"] opCnt = 0 while idc.get_operand_type(address, opCnt) != 0: opCnt += 1 # LDR commands may be used for getting ptr to IVARs or selector references if idc.print_insn_mnem(address)[:3] == "LDR": opnd = idc.print_operand(address, 1) if len(opnd) < 6 and opnd[0] == "[": m = re.match(r"\[([^\,\]]+)\]", opnd) if m: opreg = m.group(1) opval = eh.getRegVal(opreg) if opval & self.magicMaskMask32 == self.magicMask32: logging.debug("magic value found in %s @%s, storing %s" % ( opreg, eh.hexString(address), eh.hexString(opval))) uc.reg_write( eh.regs[idc.print_operand(address, 0)], opval) eh.skipInstruction(userData, True) return # LDR to get ivar address, just store magic val if idc.get_operand_type(address, 1) == 3: m = re.match(r"\[([^\,\]]+),([^\,\]]+)\]", opnd) if m: dstopnd = idc.print_operand(address, 0) mv = None for i in range(1, 3): if m.group(i) in eh.regs: regVal = eh.getRegVal(m.group(i)) if type(regVal) is long and regVal & self.magicMaskMask32 == self.magicMask32: # if both regs contain magic val, choose the ivar val over the self val if mv is None or (len(userData["magicVals"][regVal & 0xFFFF]) == 2 and ")self" not in userData["magicVals"][regVal & 0xFFFF][0]): mv = regVal if mv: uc.reg_write(eh.regs[dstopnd], mv) eh.skipInstruction(userData, True) return # MOV instructions may be moving sel/msg refs or ivars elif idc.print_insn_mnem(address)[:3] == "MOV" and idc.print_operand(address, 1)[0] == "#": srcOpnd = idc.print_operand(address, 1) dstOpnd = idc.print_operand(address, 0) sel = None if srcOpnd[:9] == "#(selRef_": srcOpName = srcOpnd[2:srcOpnd.find(" ")] sel = eh.getEmuPtr(self.selRefLocByName(srcOpName)) elif srcOpnd[:9] == "#(msgRef_": srcOpName = srcOpnd[2:srcOpnd.find(" ")] sel = self.selRefLocByName(srcOpName) elif srcOpnd[:18] == "#:upper16:(selRef_": # just skip the upper MOVs, we handle the lower MOVs eh.skipInstruction(userData, True) return elif srcOpnd[:18] == "#:lower16:(selRef_": srcOpName = srcOpnd[11:srcOpnd.find(" ")] sel = eh.getEmuPtr(self.selRefLocByName(srcOpName)) elif srcOpnd[:18] == "#:upper16:(msgRef_": # just skip the upper MOVs, we handle the lower MOVs eh.skipInstruction(userData, True) return elif srcOpnd[:18] == "#:lower16:(msgRef_": srcOpName = srcOpnd[11:srcOpnd.find(" ")] sel = self.selRefLocByName(srcOpName) elif srcOpnd[:15] == "#(_OBJC_IVAR_$_": ivarVa = idc.get_name_ea_simple( srcOpnd[srcOpnd.find("_OBJC_IVAR_$_"):srcOpnd.find(" ")]) mv = self.getIvarInfo(eh, ivarVa, userData) logging.debug("IVAR reference found @%s, storing magic value %s in %s" % ( eh.hexString(address), eh.hexString(mv), dstOpnd)) uc.reg_write(eh.regs[dstOpnd], mv) eh.skipInstruction(userData, True) return elif srcOpnd[:24] == "#:lower16:(_OBJC_IVAR_$_": ivarVa = idc.get_name_ea_simple( srcOpnd[srcOpnd.find("_OBJC_IVAR_$_"):srcOpnd.find(" ")]) mv = self.getIvarInfo(eh, ivarVa, userData) logging.debug("IVAR reference found @%s, storing magic value %s in %s" % ( eh.hexString(address), eh.hexString(mv), dstOpnd)) uc.reg_write(eh.regs[dstOpnd], mv) eh.skipInstruction(userData, True) return elif srcOpnd[:24] == "#:upper16:(_OBJC_IVAR_$_": # just skip the upper MOVs, we handle the lower MOVs eh.skipInstruction(userData, True) return # track sel xrefs if sel: userData["magicVals"].append((address, sel)) mv = self.magicMask32 | userData["magicValsCount"] userData["magicValsCount"] += 1 uc.reg_write(eh.regs[dstOpnd], mv) logging.debug("writing magic value %s to %s for %s @%s" % ( eh.hexString(mv), dstOpnd, srcOpName, eh.hexString(address))) eh.skipInstruction(userData, True) return # skip the ADD instructions with two operands for selrefs and magic vals elif (idc.print_insn_mnem(address)[:3] == "ADD" and opCnt == 2 and (idc.get_name(eh.getRegVal(idc.print_operand(address, 0)), idc.ida_name.GN_VISIBLE)[:7] == "selRef_" or eh.getRegVal(idc.print_operand(address, 0)) & self.magicMaskMask32 == self.magicMask32)): # if the 2nd operand is an ivar magic val overwrite the 1st reg with it if idc.get_operand_type(address, 1) == 1: regVal = eh.getRegVal(idc.print_operand(address, 1)) if (type(regVal) is long and regVal & self.magicMaskMask32 == self.magicMask32 and (len(userData["magicVals"][regVal & 0xFFFF]) == 2 and type(userData["magicVals"][regVal & 0xFFFF][0]) is str and ")self" not in userData["magicVals"][regVal & 0xFFFF][0] and userData["magicVals"][regVal & 0xFFFF][0][0] != "[")): uc.reg_write(eh.regs[idc.print_operand(address, 0)], regVal) eh.skipInstruction(userData, True) return # look for ADD instructions that are adding registers and check each reg for pointing to magic val, store # magic val in dst operand elif (idc.print_insn_mnem(address)[:3] == "ADD" and idc.get_operand_type(address, 0) == 1 and idc.get_operand_type(address, 1) == 1): dstOpnd = idc.print_operand(address, 0) i = 1 mv = None while idc.get_operand_type(address, i) != 0: if idc.get_operand_type(address, i) == 1: reg = idc.print_operand(address, i) if reg in eh.regs: regVal = eh.getRegVal(reg) if type(regVal) is long and regVal & self.magicMaskMask32 == self.magicMask32: # favor the ivar over the returned id or self id if mv is None or (len(userData["magicVals"][regVal & 0xFFFF]) == 2 and type(userData["magicVals"][regVal & 0xFFFF][0]) is str and ")self" not in userData["magicVals"][regVal & 0xFFFF][0] and userData["magicVals"][regVal & 0xFFFF][0][0] != "["): mv = regVal i += 1 if mv: uc.reg_write(eh.regs[dstOpnd], mv) eh.skipInstruction(userData) return except Exception as e: logging.debug("exception in objc2AnalyzeHookARM @%s: (%s) %s" % (eh.hexString(address), type(e), e)) print("exception in objc2AnalyzeHookARM @%s: (%s) %s" % (eh.hexString(address), type(e), e)) eh.stopEmulation(userData) def objc2AnalyzeHookARM64(self, uc, address, size, userData): try: eh = userData["EmuHelper"] opCnt = 0 while idc.get_operand_type(address, opCnt) != 0: opCnt += 1 # LDR instructions may be used for getting ptr to IVARs or selector references in # Link Time Optimized Mach-Os # LDRSW X8, =8 ; NSString *_myVar; # # LDR X20, =sel_new if idc.print_insn_mnem(address)[:3] == "LDR": srcOpnd = idc.print_operand(address, 1) dstOpnd = idc.print_operand(address, 0) # dereferencing PC offset if srcOpnd[0] == "=": for x in idautils.XrefsFrom(address): if (idc.get_segm_name(x.to) == "__objc_ivar" and idc.get_name(x.to, idc.ida_name.GN_VISIBLE)[:13] == "_OBJC_IVAR_$_"): srcOpName = idc.get_name(x.to, idc.ida_name.GN_VISIBLE) mv = self.getIvarInfo(eh, x.to, userData) logging.debug("IVAR reference found @%s for %s, storing magic value %s in %s" % ( eh.hexString(address), srcOpName, eh.hexString(mv), dstOpnd)) uc.reg_write(eh.regs[dstOpnd], mv) eh.skipInstruction(userData) return # Link Time Optimized Mach-Os will use LDR and IDA will xref to sel itself elif (idc.get_segm_name(x.to) == "__objc_methname" and idc.get_name(x.to, idc.ida_name.GN_VISIBLE)[:4] == "sel_"): # track sel xrefs selName = idc.get_name(x.to, idc.ida_name.GN_VISIBLE) sel = x.to userData["magicVals"].append((address, sel)) mv = self.magicMask64 | userData["magicValsCount"] userData["magicValsCount"] += 1 uc.reg_write(eh.regs[dstOpnd], mv) logging.debug("writing magic value %s to %s for %s @%s" % ( eh.hexString(mv), dstOpnd, selName, eh.hexString(address))) eh.skipInstruction(userData) return # accessing Ivar offset in class object or classref/selref offset from ADRP page base # LDR instruction to get IVAR offset from id pointer # LDRSW X8, =8 ; NSString *_myVar; # LDR X0, [X0,X8] # # ADRP X25, #classRef_SimpleClass@PAGE # LDR X0, [X25,#classRef_SimpleClass@PAGEOFF] elif idc.get_operand_type(address, 1) == 3: m = re.match(r"\[([^\,\]]+)\,([^\,\]]+)\]", srcOpnd) if m: mv = None for i in range(1, 3): if m.group(i)[:8] == "#selRef_": selref = self.selRefLocByName( m.group(i)[1:m.group(i).find("@")]) sel = eh.getEmuPtr(selref) userData["magicVals"].append((address, sel)) mv = self.magicMask64 | userData["magicValsCount"] userData["magicValsCount"] += 1 uc.reg_write(eh.regs[dstOpnd], mv) logging.debug("writing magic value %s to %s for %s @%s" % ( eh.hexString(mv), dstOpnd, m.group(i)[1:], eh.hexString(address))) eh.skipInstruction(userData) return elif m.group(i)[:10] == "#classRef_": clsRef = m.group(i)[1:m.group(i).find("@")] id = eh.getEmuPtr(idc.get_name_ea_simple(clsRef)) userData["magicVals"].append((id, "")) mv = self.magicMask64 | userData["magicValsCount"] userData["magicValsCount"] += 1 uc.reg_write(eh.regs[dstOpnd], mv) logging.debug("writing magic value %s to %s for %s @%s" % ( eh.hexString(mv), dstOpnd, m.group(i)[1:], eh.hexString(address))) eh.skipInstruction(userData) return elif m.group(i) in eh.regs: regVal = eh.getRegVal(m.group(i)) if type(regVal) is long and regVal & self.magicMaskMask64 == self.magicMask64: if mv is None or (len(userData["magicVals"][regVal & 0xFFFF]) == 2 and type(userData["magicVals"][regVal & 0xFFFF][0]) is str and ")self" not in userData["magicVals"][regVal & 0xFFFF][0]): mv = regVal if mv: uc.reg_write(eh.regs[dstOpnd], mv) eh.skipInstruction(userData) return # LDR instruction to retrieve sel/ivar when not Link Time Optimized # LDR X1, [X1] ; "new" elif idc.get_operand_type(address, 1) == 4: if srcOpnd[1:-1] in eh.regs: regVal = eh.getRegVal(srcOpnd[1:-1]) if type(regVal) is long and regVal & self.magicMaskMask64 == self.magicMask64: uc.reg_write(eh.regs[dstOpnd], regVal) eh.skipInstruction(userData) return # Non Link Time Optimized Mach-O uses ADRP/ADD to retrieve sels/ivars # ADRP X1, #selRef_new@PAGE # ADD X1, X1, #selRef_new@PAGEOFF elif idc.print_insn_mnem(address) == "ADRP": srcOpnd = idc.print_operand(address, 1) dstOpnd = idc.print_operand(address, 0) if srcOpnd[0] == "#": for x in idautils.XrefsFrom(address): if (idc.get_segm_name(x.to) == "__objc_ivar" and idc.get_name(x.to, idc.ida_name.GN_VISIBLE)[:13] == "_OBJC_IVAR_$_"): srcOpName = idc.get_name(x.to, idc.ida_name.GN_VISIBLE) mv = self.getIvarInfo(eh, x.to, userData) logging.debug("IVAR reference found @%s for %s, storing magic value %s in %s" % ( eh.hexString(address), srcOpName, eh.hexString(mv), dstOpnd)) uc.reg_write(eh.regs[dstOpnd], mv) eh.skipInstruction(userData) return elif (idc.get_segm_name(x.to) == "__objc_selrefs" and idc.get_name(x.to, idc.ida_name.GN_VISIBLE)[:7] == "selRef_"): # track sel xrefs sel = eh.derefPtr(x.to) selName = idc.get_name(sel, idc.ida_name.GN_VISIBLE) userData["magicVals"].append((address, sel)) mv = self.magicMask64 | userData["magicValsCount"] userData["magicValsCount"] += 1 uc.reg_write(eh.regs[dstOpnd], mv) logging.debug("writing magic value %s to %s for %s @%s" % ( eh.hexString(mv), dstOpnd, selName, eh.hexString(address))) # skip two instructions to skip succeeding ADD uc.reg_write(eh.regs["pc"], userData["currAddr"] + 8) return return # skip the ADD instructions with two operands for our magic values elif (idc.print_insn_mnem(address)[:3] == "ADD" and opCnt == 2 and eh.getRegVal(idc.print_operand(address, 0)) & self.magicMaskMask64 == self.magicMask64): eh.skipInstruction(userData) return # look for ADD instructions that are adding registers and check each reg for pointing to magic val, store # magic val in dst operand elif (idc.print_insn_mnem(address)[:3] == "ADD" and idc.get_operand_type(address, 0) == 1 and idc.get_operand_type(address, 1) == 1): dstopnd = idc.print_operand(address, 0) i = 1 mv = None while idc.get_operand_type(address, i) != 0: if idc.get_operand_type(address, i) in [1, 8]: reg = idc.print_operand(address, i) if reg in eh.regs: regVal = eh.getRegVal(reg) if type(regVal) is long and regVal & self.magicMaskMask64 == self.magicMask64: # favor the ivar over the returned id or self id if mv is None or (len(userData["magicVals"][regVal & 0xFFFF]) == 2 and type(userData["magicVals"][regVal & 0xFFFF][0]) is str and ")self" not in userData["magicVals"][regVal & 0xFFFF][0] and userData["magicVals"][regVal & 0xFFFF][0][0] != "["): mv = regVal i += 1 if mv: uc.reg_write(eh.regs[dstopnd], mv) eh.skipInstruction(userData) return except Exception as e: logging.debug("exception in objc2AnalyzeHookARM64 @%s: (%s) %s" % (eh.hexString(address), type(e), e)) print("exception in objc2AnalyzeHookARM64 @%s: (%s) %s" % (eh.hexString(address), type(e), e)) eh.stopEmulation(userData) # used to work backwards and get selref from imp ptr in cases where patch has already been applied def getSelRefFromImpPtr(self, eh, imp): selref = None retClsName = "" if eh.arch == unicorn.UC_ARCH_ARM and eh.isThumbMode(imp): imp |= 1 logging.debug("checking xrefs for IMP %s" % eh.hexString(imp)) for x in idautils.XrefsTo(imp): if x.frm >= self.objcConst[0] and x.frm < self.objcConst[1]: # even though imp ptr is stored at offset 0x10 in struct, xref just goes to base of struct, we want the # first field for y in idautils.XrefsTo(eh.derefPtr(x.frm)): if y.frm >= self.objcSelRefs[0] and y.frm < self.objcSelRefs[1]: selref = y.frm break # determine return value's type # check type string to see if id is returned typeStr = eh.getIDBString(eh.derefPtr(x.frm + eh.size_pointer)) if len(typeStr) > 0 and typeStr[0] == "@": # scan imp for ivar reference, grab its type if eh.arch == unicorn.UC_ARCH_ARM and eh.isThumbMode(imp): imp = imp & ~1 retClsName = self.getIvarTypeFromFunc(eh, imp) return selref, retClsName def callHook(self, address, argv, funcName, userData): eh = userData["EmuHelper"] if eh.size_pointer == 4: magicMask = self.magicMask32 magicMaskMask = self.magicMaskMask32 else: magicMask = self.magicMask64 magicMaskMask = self.magicMaskMask64 if "retainAutorelease" in funcName or "_objc_retain" in funcName: eh.uc.reg_write(eh.regs["ret"], argv[0]) elif "_objc_store" in funcName: eh.uc.reg_write(eh.regs["ret"], argv[1]) if eh.isValidEmuPtr(argv[0]): eh.writeEmuPtr(argv[0], argv[1]) # ARM instruction patched to BLX to get xref, get selref elif ((eh.arch == unicorn.UC_ARCH_ARM or eh.arch == unicorn.UC_ARCH_ARM64) and address in userData["patchedSelRefs"]): selref, retClsName = self.getSelRefFromImpPtr( eh, idc.get_func_attr(idc.get_operand_value(address, 0), idc.FUNCATTR_START)) logging.debug("ARM: got selref %s from imp ptr %s @%s" % (eh.hexString(selref), eh.hexString( idc.get_func_attr(idc.get_operand_value(address, 0), idc.FUNCATTR_START)), eh.hexString(address))) # grab register name reg, skip = userData["patchedSelRefs"][address] if eh.arch == unicorn.UC_ARCH_ARM64: sel = eh.derefPtr(selref) eh.uc.reg_write(eh.regs[reg], sel) logging.debug("set %s to sel %s" % (reg, eh.hexString(sel))) else: eh.uc.reg_write(eh.regs[reg], selref) logging.debug("set %s to selref %s" % (reg, eh.hexString(selref))) # skip bytes to next instruction (emulator memory is not the same as IDB) logging.debug("skipping %d bytes" % skip) eh.changeProgramCounter(userData, address + skip) return elif "msgSend" in funcName: retClsName = "" selXref = None # get sel and id if "_stret" in funcName: sel = argv[2] else: sel = argv[1] if sel & magicMaskMask == magicMask: selXref, sel = userData["magicVals"][sel & 0xffff] logging.debug("found magic sel used @%s: %s" % (eh.hexString(address), eh.hexString(sel))) selName = idc.get_name(sel, idc.ida_name.GN_VISIBLE) # if dealing with a msgref, we dont need to get xref selref = None logging.debug("selName = %s" % selName) if selName[:7] == "msgRef_": selref = sel elif selName[:4] == "sel_": for x in idautils.XrefsTo(sel): if x.frm >= self.objcSelRefs[0] and x.frm < self.objcSelRefs[1]: selref = x.frm break else: # check if selref has already been converted to imp if sel in list(idautils.Functions()): selref, retClsName = self.getSelRefFromImpPtr(eh, sel) if selref: selName = self.formatName(idc.get_name(selref, idc.ida_name.GN_VISIBLE)) # get id info isInstance = True if "_stret" in funcName: id = argv[1] else: id = argv[0] if "Super" in funcName: id = eh.getEmuPtr(id) if idc.get_segm_name(id) == "__objc_methtype": id = clsName = eh.getIDBString(id)[2:-1] elif id & magicMaskMask == magicMask: logging.debug("magic val found for id: %s" % eh.hexString(id)) id, clsName = userData["magicVals"][id & 0xffff] else: idref = None nameId = idc.get_name(id, idc.ida_name.GN_VISIBLE) if nameId[:6] == "cfstr_": # use the first n chars of the string as the id nstr = idc.get_strlit_contents(eh.derefPtr( id + eh.size_pointer * 2), -1, idc.STRTYPE_C)[:MAX_STR_DISPLAY_LEN].replace("\r", "").replace("\n", "") id = "@\"" + nstr clsName = "NSString" if len(nstr) == MAX_STR_DISPLAY_LEN: id += "..\"" else: id += "\"" elif nameId[:14] == "_OBJC_CLASS_$_" or nameId[:15] == "_OBJC_CATEGORY_": id = clsName = self.formatName(nameId) isInstance = False elif nameId == "_NSApp": id = clsName = "NSApp" else: for x in idautils.XrefsTo(id): if idc.get_name(x.frm, idc.ida_name.GN_VISIBLE)[:9] == "classRef_": idref = x.frm break if idref is None: id = clsName = UNKNOWN else: id = clsName = self.formatName(idc.get_name(idref, idc.ida_name.GN_VISIBLE)) isInstance = False if selName == "init" or selName == "new" or selName == "sharedInstance": retClsName = clsName elif selName == "class": retClsName = clsName + "_&_class" elif selName == "alloc": retClsName = clsName + "_&_alloc" if clsName == "": clsName = UNKNOWN elif clsName[-8:] == "_&_alloc": clsName = clsName[:-8] retClsName = clsName elif clsName[-8:] == "_&_class": clsName = clsName[:-8] isInstance = False # determine return value type if retClsName == "" and clsName in userData["classes"]: if isInstance: type_ = "instance" else: type_ = "class" # get IMP for selref funcVA = None logging.debug("determining return value type") for x in userData["classes"][clsName][type_]: if x[0] == selref: funcVA = x[1] break # find method struct for IMP and get type info if funcVA: logging.debug("IMP ptr: %s" % eh.hexString(funcVA)) tgt = funcVA if eh.arch == unicorn.UC_ARCH_ARM: tgt = funcVA | 1 for x in idautils.XrefsTo(tgt): if x.frm >= self.objcConst[0] and x.frm < self.objcConst[1]: # check type string to see if id is returned typeStr = eh.getIDBString( eh.derefPtr(x.frm + eh.size_pointer)) logging.debug("type string: %s" % typeStr) if len(typeStr) > 0 and typeStr[0] == "@": # scan imp for ivar reference, grab its type retClsName = self.getIvarTypeFromFunc(eh, funcVA) logging.debug("ret cls name: %s" % retClsName) break # save objc syntax of call to reference later if used elsewhere userData["magicVals"].append( ("[%s %s]" % (id, selName), retClsName)) eh.uc.reg_write(eh.regs["ret"], magicMask | userData["magicValsCount"]) userData["magicValsCount"] += 1 # if IDA didn't know about this xref to msgSend and we didn't catch it with our wider net, process it now if (address not in userData["msgSendXrefs"] and address not in userData["possibleMsgSendXrefs"] and idc.get_operand_type(address, 0) == 1): self.processMsgSend(eh, address, id, selName, clsName, isInstance, selref, selXref, userData) idc.add_dref(address, eh.getRegVal(idc.print_operand(address, 0)), idc.dr_I | idc.XREF_USER) logging.debug("found undiscovered msgSend xref@%s" % eh.hexString(address)) userData["msgSendXrefs"].append(address) else: logging.debug("couldn't find sel (%s) for msgSend call @%s" % (eh.hexString(sel), eh.hexString(address))) if address in userData["targetInfo"] and address not in userData["visitedTargets"]: userData["visitedTargets"].append(address) idc.set_cmt(address, "objc2_analyzer failed to determine arguments", 0) return # this call is one of those "call reg" instructions we thought might be a msgSend call, but wasn't sure if (address in userData["possibleMsgSendXrefs"] and idc.get_operand_type(address, 0) == 1 and "msgSend" in idc.get_name(eh.getRegVal(idc.print_operand(address, 0)), idc.ida_name.GN_VISIBLE)): self.processMsgSend(eh, address, id, selName, clsName, isInstance, selref, selXref, userData) idc.add_dref(address, eh.getRegVal(idc.print_operand(address, 0)), idc.dr_I | idc.XREF_USER) logging.debug("found undiscovered msgSend xref@%s" % eh.hexString(address)) elif address in userData["msgSendXrefs"]: self.processMsgSend(eh, address, id, selName, clsName, isInstance, selref, selXref, userData) def targetCallback(self, eh, address, argv, userData): # we do everything we need to do here in the callHook instead pass # uses heuristic to determine if getter function, then returns the type of the ivar def getIvarTypeFromFunc(self, eh, va): if va in self.ivarSetters: return self.ivarSetters[va] elif va in self.notIvarSetters: return UNKNOWN addr = va endVa = idc.get_func_attr(va, idc.FUNCATTR_END) if endVa - va < 0x20: ivarVa = None while addr <= endVa: srcOpnd = idc.print_operand(addr, 1) # if ivar is the src op for an instruction, assume this function will return it if eh.arch == unicorn.UC_ARCH_ARM and "_OBJC_IVAR_$_" in srcOpnd: oploc = idc.get_name_ea_simple( srcOpnd[srcOpnd.find("_OBJC_IVAR_$_"):srcOpnd.find(" ")]) if oploc != idc.BADADDR: ivarVa = oploc break elif eh.arch == unicorn.UC_ARCH_ARM64: for x in idautils.XrefsFrom(addr): if (idc.get_segm_name(x.to) == "__objc_ivar" and idc.get_name(x.to, idc.ida_name.GN_VISIBLE)[:13] == "_OBJC_IVAR_$_"): ivarVa = x.to break elif eh.arch == unicorn.UC_ARCH_X86: if "_OBJC_IVAR_$_" in srcOpnd: ivarVa = idc.get_operand_value(addr, 1) break addr = idc.next_head(addr, idc.get_inf_attr(idc.INF_MAX_EA)) if ivarVa: for x in idautils.XrefsTo(ivarVa): if x.frm >= self.objcConst[0] and x.frm < self.objcConst[1]: typeStr = eh.getIDBString( eh.derefPtr(x.frm + eh.size_pointer * 2)) self.ivarSetters[va] = typeStr[2:-1] logging.debug("%s is an ivar getter function, returning type %s" % ( eh.hexString(va), typeStr[2:-1])) return typeStr[2:-1] else: logging.debug( "%s determined not to be an ivar getter function", eh.hexString(va)) self.notIvarSetters.append(va) else: logging.debug( "%s determined not to be an ivar getter function", eh.hexString(va)) self.notIvarSetters.append(va) return UNKNOWN # returns class or sel name from IDA name def formatName(self, name): if name[:9] == "classRef_": name = name[9:] elif name[:14] == "_OBJC_CLASS_$_": name = name[14:] elif name[:15] == "_OBJC_CATEGORY_": name = name[15:name.find("_$_")] + "_" + name[name.find("_$_") + 3:] elif name[:7] == "selRef_": name = name[7:] elif name[:7] == "msgRef_": name = name[7:-len("__objc_msgSend_fixup")] return name # checks that the sel ptr in the method struct has an xref to selrefs or msgrefs section, returns whether the selector # is ambiguous, whether it is a msgref, and the pointer to the ref def getRefPtr(self, eh, methodVa): isMsgRef, isAmbiguous, refPtr = (None, None, None) namePtr = eh.derefPtr(methodVa) cnt = 0 for x in idautils.XrefsTo(namePtr): if self.objcSelRefs and x.frm >= self.objcSelRefs[0] and x.frm < self.objcSelRefs[1]: refPtr = x.frm isMsgRef = False elif self.objcMsgRefs and x.frm >= self.objcMsgRefs[0] and x.frm < self.objcMsgRefs[1]: refPtr = x.frm isMsgRef = True elif self.objcConst and x.frm >= self.objcConst[0] and x.frm < self.objcConst[1]: cnt += 1 # ambiguous sel names isAmbiguous = False if cnt > 1: isAmbiguous = True return isAmbiguous, isMsgRef, refPtr # adds objc comment and calls fixXref to fix xrefs for objc_msgSend # address: address of msgSend call # id: class/instance name to show in comment # sel: selector name to show in comment # clsName: name of class to lookup for sel->imp mapping # selref: sel reference to lookup in sel->imp mapping def processMsgSend(self, eh, address, id, sel, clsName, isInstance, selref, selXref, userData): logging.debug("addr: %s id: %s sel: %s clsName: %s isInstance: %s selRef: %s selXref: %s" % (eh.hexString(0 if address == None else address), id, sel, clsName, isInstance, eh.hexString(0 if selref == None else selref), eh.hexString(0 if selXref == None else selXref))) if sel: idc.set_cmt(address, "[%s %s]" % (id, sel), 0) if sel and id != UNKNOWN: # as a convenience, if sel is "new", fix xref to "init" if sel == "new" and clsName in userData["classes"]: if (len(filter(lambda x: idc.get_name(x, idc.ida_name.GN_VISIBLE) == "selRef_init", map(lambda x: x[0], userData["classes"][clsName]["instance"]))) > 0): selref = filter(lambda x: idc.get_name(x, idc.ida_name.GN_VISIBLE) == "selRef_init", map( lambda x: x[0], userData["classes"][clsName]["instance"]))[0] isInstance = True if selXref and selXref not in self.fixedSelXRefs: self.fixXref(eh, userData["classes"], clsName, selref, isInstance, selXref, address, userData) def assembleThumbBLXIns(self, eh, address, target): # pipelining and alignment pc = address + 4 pc &= 0xfffffffc offset = (target - pc) & 0xffffffff logging.debug("assembling BLX instruction for offset: %08X" % offset) S = (offset & 0x1000000) >> 24 I1 = (offset & 0x800000) >> 23 I2 = (offset & 0x400000) >> 22 # shortcut for J1 = ~I1 ^ S J1 = I1 ^ 1 ^ S J2 = I2 ^ 1 ^ S H = (offset & 0x3ff000) >> 12 L = (offset & 0xffc) >> 2 encoded = (0xf0000000 | (S << 26) | (H << 16) | ( 3 << 14) | (J1 << 13) | (J2 << 11) | (L << 1)) encoded = struct.pack("<H", (encoded >> 16)) + \ struct.pack("<H", (encoded & 0xffff)) return struct.unpack("<I", encoded)[0] # patch the referencing instruction to point to imp instead of selref and create an xref from msgSend to imp # selXref: address of selref's xref, which will be patched to point to imp if it exists and can be found def fixXref(self, eh, classes, clsName, selRefVA, isInstance, selXref, msgSendXref, userData): if clsName not in classes: logging.debug("class %s not found in objc_data section" % clsName) return funcVA = None # search stored class data for selref->funcVA tuple if isInstance: type_ = "instance" else: type_ = "class" for c in classes[clsName][type_]: if c[0] == selRefVA: funcVA = c[1] isAmbiguous = c[2] break if funcVA is None: logging.debug("selref@%s not found for class %s" % (eh.hexString(selRefVA), clsName)) return # if isAmbiguous == False: # logging.debug("this selector is not ambiguous, we will fix it later!") try: if selXref not in userData["selXrefs"]: userData["selXrefs"][selXref] = [] userData["selXrefs"][selXref].append(msgSendXref) except Exception as e: logging.debug("exception fixing xref @%s: %s" % (eh.hexString(userData["currAddr"]), e)) print("exception fixing xref @%s: %s" % (eh.hexString(userData["currAddr"]), e)) return # add xref to func and change instruction to point to function instead of selref # xref comes from call to msgSend, not from the sel xref idc.add_dref(msgSendXref, funcVA, idc.dr_I | idc.XREF_USER) # for both ARM archs, we change the LDR instruction to a BL instruction as its the only way I know how to get IDA # to make a clickable link to an objc method considering the comment bug reg = idc.print_operand(selXref, 0) srcOpnd = idc.print_operand(selXref, 1) if eh.arch == unicorn.UC_ARCH_ARM: if eh.isThumbMode(selXref): # change instructions to a BLX # if target is not 4 byte aligned, bump it up so IDA gets the xref logging.debug("fixing xref for imp %s" % eh.hexString(funcVA)) target = funcVA if target % 4 != 0: logging.debug("target is not 4-byte aligned, adding 2") target += 2 if "er16:" != srcOpnd[5:10]: # skip 8 bytes to skip over upper MOV userData["patchedSelRefs"][selXref] = (reg, 8) else: userData["patchedSelRefs"][selXref] = (reg, 4) patchVal = self.assembleThumbBLXIns(eh, selXref, target) idc.patch_dword(selXref, patchVal) self.fixedSelXRefs.append(selXref) logging.debug("selector xref fixed!") else: # i don't have an example of ARM mode objective-c code to work with logging.debug( "ARMv7 ARM mode not currently supported for xref patching") self.fixedSelXRefs.append(selXref) pass elif eh.arch == unicorn.UC_ARCH_ARM64: userData["patchedSelRefs"][selXref] = (reg, 4) if funcVA - selXref < 0: patchVal = (((funcVA - selXref) / 4) & 0xffffff) | 0x97000000 else: patchVal = ((funcVA - selXref) / 4) | 0x94000000 idc.patch_dword(selXref, patchVal) self.fixedSelXRefs.append(selXref) logging.debug("selector xref fixed!") elif eh.arch == unicorn.UC_ARCH_X86: # 7 is size of instruction offs = funcVA - selXref - 7 # change RIP-relative address idc.patch_dword(selXref + 3, offs) # change from mov to lea idc.patch_byte(selXref + 1, 0x8D) self.fixedSelXRefs.append(selXref) logging.debug("selector xref fixed!") # store the sel->imp mapping for a given method in our classes dict def processMethod(self, eh, clsName, methodVa, classes, type_): objc2ClassMethImpOffs = 2 * eh.size_pointer isAmbiguous, isMsgRef, selRefVA = self.getRefPtr(eh, methodVa) if selRefVA is None: return funcVA = eh.derefPtr(methodVa + objc2ClassMethImpOffs) if eh.arch == unicorn.UC_ARCH_ARM: # remove last bit in case of thumb mode address funcVA = funcVA & ~1 # adjust pointer to beginning of message_ref struct to get xrefs if isMsgRef: selRefVA -= eh.size_pointer # this shouldn't happen now if selRefVA in map(lambda x: x[0], classes[clsName][type_]): logging.debug("class name: %s - method type: %s - duplicate selref VA: %s, ignoring.." % (clsName, type_, eh.hexString(selRefVA))) else: logging.debug("class name: %s - method type: %s - selref VA: %s - function VA: %s - ambiguous: %s" % (clsName, type_, eh.hexString(selRefVA), eh.hexString(funcVA), isAmbiguous)) classes[clsName][type_].append((selRefVA, funcVA, isAmbiguous)) # collect imp and sel/msg ref pointers def getClassData(self, eh): objc2ClassSize = 5 * eh.size_pointer objc2ClassInfoOffs = 3 * eh.size_pointer objc2MethSize = 3 * eh.size_pointer objc2ClassBaseMethsOffs = 4 * eh.size_pointer objc2CatInstMethsOffs = 2 * eh.size_pointer objc2CatClsMethsOffs = 3 * eh.size_pointer classes = {} if self.objcData is None: return classes for va in range(self.objcData[0], self.objcData[1], objc2ClassSize): if "_OBJC_METACLASS_$_" in idc.get_name(va, idc.ida_name.GN_VISIBLE): continue clsName = self.formatName(idc.get_name(va, idc.ida_name.GN_VISIBLE)) logging.debug("walking classes @%s: %s" % (eh.hexString(va), clsName)) classes[clsName] = {"class": [], "instance": []} # get instance methods first, if class method has same name as instance method, ignore it which is not great baseMethodsVA = idc.get_name_ea_simple("_OBJC_INSTANCE_METHODS_" + clsName) if baseMethodsVA != idc.BADADDR and baseMethodsVA != 0: count = idc.get_wide_dword(baseMethodsVA + eh.size_DWORD) baseMethodsVA += eh.size_DWORD * 2 # advance to start of class methods array for va2 in range(baseMethodsVA, baseMethodsVA + objc2MethSize * count, objc2MethSize): self.processMethod(eh, clsName, va2, classes, "instance") baseMethodsVA = idc.get_name_ea_simple("_OBJC_CLASS_METHODS_" + clsName) if baseMethodsVA != idc.BADADDR and baseMethodsVA != 0: count = idc.get_wide_dword(baseMethodsVA + eh.size_DWORD) baseMethodsVA += eh.size_DWORD * 2 # advance to start of class methods array for va2 in range(baseMethodsVA, baseMethodsVA + objc2MethSize * count, objc2MethSize): self.processMethod(eh, clsName, va2, classes, "class") # we don't use idc.get_name_ea_simple to find the methods for categories because IDA's naming is too difficult to # parse/format if self.objcCatList: for va in range(self.objcCatList[0], self.objcCatList[1], eh.size_pointer): clsName = self.formatName(idc.get_name(eh.derefPtr(va), idc.ida_name.GN_VISIBLE)) logging.debug("walking category classes @%s: %s" % (eh.hexString(va), clsName)) classes[clsName] = {"class": [], "instance": []} catVA = eh.derefPtr(va) # class methods catMethsVA = eh.derefPtr(catVA + objc2CatClsMethsOffs) if catMethsVA == 0: continue count = idc.get_wide_dword(catMethsVA + eh.size_DWORD) catMethsVA += eh.size_DWORD * 2 # advance to start of methods array for va2 in range(catMethsVA, catMethsVA + objc2MethSize * count, objc2MethSize): self.processMethod(eh, clsName, va2, classes, "class") # instance methods catMethsVA = eh.derefPtr(catVA + objc2CatInstMethsOffs) if catMethsVA == 0: continue count = idc.get_wide_dword(catMethsVA + eh.size_DWORD) catMethsVA += eh.size_DWORD * 2 # advance to start of methods array for va2 in range(catMethsVA, catMethsVA + objc2MethSize * count, objc2MethSize): self.processMethod(eh, clsName, va2, classes, "instance") return classes # uses iterate feature of flare_emu: for each xref to objc msgSend variants # patches program bytes to change sel ref pointers to implementation pointers for objc methods # adds objc-like syntax comments for each msgSend call def processObjc(self): userData = {} userData["selXrefs"] = {} eh = flare_emu.EmuHelper() classes = self.getClassData(eh) logging.debug("%d classes found" % len(classes.keys())) # get xrefs to objc_msgSend variants xrefs = list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSend"))) xrefs.extend(list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSend_fixup")))) xrefs.extend(list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSend_stret")))) xrefs.extend(list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSend_fpret")))) xrefs.extend(list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSendSuper2")))) xrefs.extend( list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSendSuper2_stret")))) xrefs.extend(list(idautils.XrefsTo(idc.get_name_ea_simple("_objc_msgSendSuper_stret")))) logging.debug("%d initial xrefs to objc_msgSend variants" % len(xrefs)) # build user data for emu callback userData["classes"] = classes userData["msgSendXrefs"] = [] emuFuncs = set([]) # get paths to initial msgSend xrefs targets = [] for x in xrefs: funcStart = idc.get_func_attr(x.frm, idc.FUNCATTR_START) funcEnd = idc.get_func_attr(x.frm, idc.FUNCATTR_END) if funcStart == idc.BADADDR: continue emuFuncs.add((funcStart, funcEnd)) if idc.print_insn_mnem(x.frm) not in self.callMnems: continue # get unique idautils.Functions from xrefs that we need to emulate userData["msgSendXrefs"].append(x.frm) targets.append(x.frm) # look for other possible msgSend calls that IDA missed userData["possibleMsgSendXrefs"] = [] for func in emuFuncs: addr = func[0] # scan each function with a known msgSend xref for more while addr <= func[1]: dis = idc.generate_disasm_line(addr, 0) # is this instruction a "call reg" and IDA hasn't already identified it as something? if ((dis[:4] == "call" or dis[:2] == "BL") and ";" not in dis and addr not in userData["msgSendXrefs"] and idc.get_operand_type(addr, 0) == 1): userData["possibleMsgSendXrefs"].append(addr) targets.append(addr) addr = idc.next_head(addr, idc.get_inf_attr(idc.INF_MAX_EA)) userData["selXrefs"] = {} userData["patchedSelRefs"] = {} if eh.arch == unicorn.UC_ARCH_ARM: emuHook = self.objc2AnalyzeHookARM elif eh.arch == unicorn.UC_ARCH_ARM64: emuHook = self.objc2AnalyzeHookARM64 elif eh.arch == unicorn.UC_ARCH_X86 and eh.mode == unicorn.UC_MODE_64: emuHook = self.objc2AnalyzeHookX64 else: logging.debug("unsupported architecture, quitting..") eh.iterate(targets, self.targetCallback, preEmuCallback=self.preEmuCallback, callHook=self.callHook, instructionHook=emuHook, hookData=userData, resetEmuMem=True) # reload with patches eh.initEmuHelper() eh.reloadBinary() # parses ivar type encoding and returns magicVal for objc syntax string representation of ivar def getIvarInfo(self, eh, ivarPtr, userData): objc2IvarTypeOffs = 2 * eh.size_pointer objc2IvarNameOffs = eh.size_pointer if eh.size_pointer == 4: magicMask = self.magicMask32 else: magicMask = self.magicMask64 for x in idautils.XrefsTo(ivarPtr): if idc.get_segm_name(x.frm) == "__objc_const": typeStrPtr = eh.derefPtr(x.frm + objc2IvarTypeOffs) namePtr = eh.derefPtr(x.frm + objc2IvarNameOffs) typeStr = eh.getIDBString(typeStrPtr) varName = eh.getIDBString(namePtr) ptr = "" cmtStr = "" clsName = "" while typeStr[0] == "^": ptr += "*" typeStr = typeStr[1:] if len(ptr) > 0: ptr = " " + ptr if typeStr[0] == "[": m = re.match(r"\[([\d]+)([\^]*)(.+)\]", typeStr) if m: if len(m.group(2)) > 0: ptr = " " + "*" * len(m.group(2)) cmtStr = "%s-width array of " typeStr = m.group(3) if typeStr[0] == "@" and len(typeStr) > 1: clsName = typeStr[2:-1] cmtStr += "(%s *)%s" % (clsName, varName) elif typeStr == "@": clsName = "" cmtStr += "(id)%s" % varName elif typeStr == "c": clsName = "" cmtStr += "(char)%s" % varName elif typeStr == "*": clsName = "" cmtStr += "(char *)%s" % varName elif typeStr == "i": clsName = "" cmtStr += "(int%s)%s" % (ptr, varName) elif typeStr == "s": clsName = "" cmtStr += "(short%s)%s" % (ptr, varName) elif typeStr == "l": clsName = "" cmtStr += "(long%s)%s" % (ptr, varName) elif typeStr == "q": clsName = "" cmtStr += "(long long%s)%s" % (ptr, varName) elif typeStr == "C": clsName = "" cmtStr += "(unsigned char%s)%s" % (ptr, varName) elif typeStr == "I": clsName = "" cmtStr += "(unsigned int%s)%s" % (ptr, varName) elif typeStr == "S": clsName = "" cmtStr += "(unsigned short%s)%s" % (ptr, varName) elif typeStr == "L": clsName = "" cmtStr += "(unsigned long%s)%s" % (ptr, varName) elif typeStr == "Q": clsName = "" cmtStr += "(unsigned long long%s)%s" % (ptr, varName) elif typeStr == "f": clsName = "" cmtStr += "(float%s)%s" % (ptr, varName) elif typeStr == "d": clsName = "" cmtStr += "(double%s)%s" % (ptr, varName) elif typeStr == "B": clsName = "" cmtStr += "(bool%s)%s" % (ptr, varName) elif typeStr == "v": clsName = "" cmtStr += "(void%s)%s" % (ptr, varName) elif typeStr == ":": clsName = "" cmtStr += "(SEL%s)%s" % (ptr, varName) elif typeStr[0] == "{": m = re.match(r"\{(.+)\=.*\}", typeStr) if m: cmtStr += "(struct %s%s)%s" % (m.group(1), ptr, varName) elif typeStr[0] == "(": m = re.match(r"\((.+)\=.*\)", typeStr) if m: cmtStr += "(union %s%s)%s" % (m.group(1), ptr, varName) elif typeStr == "?": clsName = "" cmtStr += "(unknown%s)%s" % (ptr, varName) if cmtStr == "": logging.debug("couldn't decode ivar type %s for ivar @%s" % ( typeStr, eh.hexString(ivarPtr))) cmtStr = "(??)%s" % varName userData["magicVals"].append((cmtStr, clsName)) ret = magicMask | userData["magicValsCount"] userData["magicValsCount"] += 1 logging.debug("returning ivar magicVal for %s for ivar @%s" % (cmtStr, eh.hexString(ivarPtr))) return ret def preEmuCallback(self, eh, userData, funcStart): userData["magicVals"] = [] userData["magicValsCount"] = 0 if eh.size_pointer == 4: magicMask = self.magicMask32 else: magicMask = self.magicMask64 # get "self" id if in objc function clsName = None funcName = idaapi.get_func_name(funcStart) if funcName[0] in ["-", "+"] and "[" in funcName and "]" in funcName and " " in funcName: shortClsName = clsName = funcName[2:funcName.find(" ")] if "(" in clsName: clsName = "_OBJC_CATEGORY_" + \ clsName[:clsName.find( "(")] + "_$_" + clsName[clsName.find("(") + 1:clsName.find(")")] shortClsName = shortClsName[:shortClsName.find( "(")] + "_" + shortClsName[shortClsName.find("(") + 1:shortClsName.find(")")] else: clsName = "_OBJC_CLASS_$_" + clsName if clsName: if funcName[0] == "+": # this is a class method, use classRef self_ = idc.get_name_ea_simple(clsName) # assume rdx will hold an instance of the class userData["magicVals"].append( ("(%s *)instance" % shortClsName, shortClsName)) inst = magicMask | userData["magicValsCount"] userData["magicValsCount"] += 1 eh.uc.reg_write(eh.regs["arg3"], inst) elif funcName[0] == "-": # this is an instance method, use magic value to store "self" userData["magicVals"].append( ("(%s *)self" % shortClsName, shortClsName)) self_ = magicMask | userData["magicValsCount"] userData["magicValsCount"] += 1 eh.uc.reg_write(eh.regs["arg1"], self_) if __name__ == '__main__': Objc2Analyzer()