# Volatility # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ @author: Mariano `emdel` Graziano @license: GNU General Public License 2.0 or later @contact: magrazia@cisco.com @organization: Cisco Systems, Inc. """ import volatility.commands as commands import volatility.utils as utils from collections import OrderedDict import os, struct, json, gzip, subprocess, base64 from capstone import * from capstone.x86 import * import volatility.debug as debug REG_SUFFIXS = {} REG_SUFFIXS['x64'] = ['b', 'w', 'd'] MATH_OPS = ['+', '-', '*', '/'] GPRS = {} GPRS['x64'] = ['RAX', 'RBX', 'RCX', 'RDX', 'RBP', 'RSP', 'RIP', 'RSI', 'RDI', 'R8', 'R9', 'R10', 'R11', 'R12', 'R13', 'R14', 'R15'] GPRS['x86'] = ['EAX', 'EBX', 'ECX', 'EDX', 'EBP', 'ESP', 'EIP', 'ESI', 'EDI'] class unchain(commands.Command): '''unchain: Volatility Plugin in the ROPMEMU framework. It's a chain extractor and shaper. Run dust.sh on the output. ''' def __init__(self, config, *args, **kwargs): commands.Command.__init__(self, config, *args, **kwargs) self._config.add_option('BIN', short_option = 'B', default = None, help = 'Filename for the dumped chain', action = 'store', type = 'str') self._config.add_option('MODE', short_option = 'm', default = 'x64', help = 'Modes: x86 and x64', action = 'store', type = 'str') self._config.add_option('IJSON', short_option = 'i', default = None, help = 'JSON Trace Input file', action = 'store', type = 'str') self._config.add_option('GLIMIT', short_option = 'G', default = None, help = 'Gadget Limit Number', action = 'store', type = 'int') self._config.add_option('CLEAN', short_option = 'C', dest="clean", default = False, action="store_true", help="Clean /tmp files") self._config.add_option('DB', short_option = 'D', default = None, action="store", help="Filename for the opcode DB", type = 'str') self._config.add_option('SGADGET', short_option = 'S', default = -1, action="store", help="Starting gadget for emulation", type = 'int') self._config.add_option('IDB', short_option = 'I', default = None, action="store", help="Input opcodes DB", type = 'str') self.dump_fd = 0 self.gid = 0 self.md = None self.WHITELIST_INSTRUCTIONS = ['mov', 'pop', 'add', 'sub', 'xor', 'pushf'] self.BLACKLIST_INSTRUCTIONS = ['ret', 'call', 'leave'] self.GREYLIST_INSTRUCTIONS = [] self.trace = OrderedDict() self.opcodes_db = OrderedDict() self.NASM = '/usr/bin/nasm' self.branch = [X86_GRP_JUMP, X86_GRP_INT, X86_GRP_CALL, X86_GRP_RET, X86_GRP_IRET, X86_GRP_VM] def get_buf_size(self): if self._config.MODE == 'x64': return 64 else: return 32 def get_word_size(self): if self._config.MODE == 'x64': return 0x08 else: return 0x04 def get_unpack_format(self): if self._config.MODE == 'x64': return '<Q' else: return '<I' def is_reg(self, o): if o in GPRS[self.mode]: return True return False # TODO: Pay attention. Not always 0x... # TODO: Abstract disass library def is_constant(self, o): if o.startswith('0x'): return True return False # TODO def is_mem(self, o): return False def check_arg(self, arg): ''' 1: reg, 2: const, 3: mem ''' if self.is_reg(arg): return 1 if self.is_constant(arg): return 2 if self.is_mem(arg): return 3 return None # We support only x86 arch def get_cap_arch(self): return CS_ARCH_X86 # We support two x86 modes: x86 and x64 def get_cap_mode(self): if self._config.MODE == 'x64': return CS_MODE_64 else: return CS_MODE_32 def init_capstone(self): return Cs(self.get_cap_arch(), self.get_cap_mode()) def get_json_trace(self): SUPPORTED_EXT = ['json', 'gz'] ext = self._config.IJSON.split('.')[-1] if ext.lower() not in SUPPORTED_EXT: self.trace = None return print "[+] Getting %s" % self._config.IJSON if ext.lower() == 'gz': gf = gzip.open(self._config.IJSON) self.trace = json.loads(gf.read(), object_pairs_hook = OrderedDict) gf.close() else: jf = open(self._config.IJSON) self.trace = json.load(jf, object_pairs_hook = OrderedDict) jf.close() def sanitize_capstone_mov(self, ins): if 'qword ptr' in ins: return ins.replace(' qword ptr', '') else: return ins def get_chain_instruction(self, chain_ins): ins = chain_ins[1] if ins.startswith('mov'): ins = self.sanitize_capstone_mov(ins) return ins def is_in_trace(self, chain_ptr, chain_gnum, c_ins): for trace_key1, trace_c1 in self.trace.items(): trace_ptr, trace_gnum = trace_key1.split('-') if trace_ptr != chain_ptr: continue for trace_key2, trace_c2 in trace_c1.items(): if c_ins in [tk.lower() for tk in trace_c2.keys()]: debug.debug("%s %s %s %s %s" % (chain_ptr, chain_gnum, trace_ptr, trace_gnum, c_ins)) return True return False def get_trace_asm(self): '''Debugging function''' debug.debug("Trace ASM") gnum = 0 stop = 0 for k1, v1 in self.trace.items(): if self._config.GLIMIT: if gnum > self._config.GLIMIT: stop = 1 gnum += 1 if stop == 1: break for k2, v2 in v1.items(): for k3, v3 in v2.items(): print k3.lower() def is_trace_sync(self, ptr, num, instr): for trace_key1, trace_c1 in self.trace.items(): trace_ptr, trace_gnum = trace_key1.split('-') if trace_ptr != ptr: continue for trace_key2, trace_c2 in trace_c1.items(): if instr in [tk.lower() for tk in trace_c2.keys()]: debug.debug("%s %s %s %s %s" % (ptr, num, trace_ptr, trace_gnum, instr)) if num == trace_gnum: return True return False def get_instruction_context(self, gadget, instruction): for k1, v1 in gadget.items(): for k2, v2 in v1.items(): if k2.lower() == instruction: return v2 return None def get_context_from_trace(self, gkey, instruction): ptr, num = gkey.split('-') if self.is_in_trace(ptr, num, instruction): if self.is_trace_sync(ptr, num, instruction): return self.get_instruction_context(self.trace[gkey], instruction) return None # TODO: Fix this stupid upper/lower issue due to distorm/capstone usage def get_reg_value(self, hw_context, pop_operand): return hw_context[pop_operand.upper()] def mov_from_pop(self, instruction, gkey): hw_context = self.get_context_from_trace(gkey, instruction) if hw_context: pop_operand = instruction.split(' ')[-1] value = self.get_reg_value(hw_context, pop_operand) new_instr = "mov %s, %s" % (pop_operand, value) return new_instr def get_bits_directive(self): if self._config.MODE == 'x64': return "[BITS 64]" else: return "[BITS 32]" def create_tmp_file(self, new_instr, cnt): # http://www.nasm.us/doc/nasmdoc7.html if not os.path.exists("/tmp/ropmemu"): os.makedirs("/tmp/ropmemu") filename = "%s_%d%s" % ("/tmp/ropmemu/ropmemu", cnt, ".asm") fd = open(filename, "w") bits = self.get_bits_directive() fd.write("%s\n" % bits) fd.write("%s" % new_instr) fd.close() def get_nasm(self, cnt): progname = "%s_%d" % ("/tmp/ropmemu/ropmemu", cnt) h = open(progname) return h.read() def get_nasm_hex(self, buf): content = '' for x in xrange(0, len(buf)): content += "".join(hex(ord(str(buf[x])))[2:4]) return content def rm_nasm_files(self): print "[+] Removing /tmp files" for r, d, f in os.walk("/tmp/ropmemu"): # removing files for i in f: os.remove(os.path.join('/tmp/ropmemu', i)) # removing empty dir os.rmdir("/tmp/ropmemu") def invoke_nasm(self, cnt): # http://stackoverflow.com/questions/26504930/recieving-32-bit-registers-from-64-bit-nasm-code filename = "%s_%d%s" % ("/tmp/ropmemu/ropmemu", cnt, ".asm") progname = "%s_%d" % ("/tmp/ropmemu/ropmemu", cnt) pargs = [self.NASM, '-O0', '-f', 'bin', filename, '-o', progname] if not subprocess.call(pargs): buf = self.get_nasm(cnt) if self._config.DEBUG: buf_hex = self.get_nasm_hex(buf) return buf def get_opcodes(self, new_instr, cnt): self.create_tmp_file(new_instr, cnt) return self.invoke_nasm(cnt) def is_nasm(self): if os.path.exists(self.NASM): return True else: return False def is_capstone_branch(self, ins): for m in ins.groups: if m in self.branch: return True return False # call reg -> jmp addr | jmp reg -> jmp val def shape_rop_jmpcall(self, instruction, hw_context): reg = instruction.split()[1] val = hw_context[instruction.upper()][reg.upper()] new_instruction = "%s %s" % ("jmp", val) debug.debug("From %s to %s" % (instruction, new_instruction)) return new_instruction def check_branch_instruction(self, instruction, hw_context): if instruction.startswith('call'): return self.shape_rop_jmpcall(instruction, hw_context) elif instruction.startswith('jmp'): return self.shape_rop_jmpcall(instruction, hw_context) def build_mov_from_pop(self, instruction, reg, hw_context): val = hw_context[instruction][reg.upper()] new_instr = "mov %s, %s" % (reg, val) return new_instr def get_nasm_size_fmt(self): if self._config.MODE == 'x64': return "qword" else: return "dword" # TODO: Think about a clever method def sanitize_reg(self, op): ''' Register sanitiziation, e.g. 'R8D' -> R8''' for x in REG_SUFFIXS[self._config.MODE]: if op.endswith(x): return op[:-1] if self._config.MODE == 'x64': if op.startswith('e'): x64_op = "%s%s" % ('r', op[1:]) return x64_op return op def upper_capstone(self, instr): return instr.upper().replace("0X", "0x") def get_size(self): if self.mode == 'x64': return 0x08 elif self.mode == 'x86': return 0x04 else: raise def read_value(self, addr): print "[read_value] - " , addr if self._config.MODE == 'x64': raw = self._addrspace.read(addr, self.get_size()) return struct.unpack('<Q', raw)[0] elif self._config._MODE == 'x86': raw = self._addrspace.read(addr, self.get_size()) return struct.unpack('<I', raw)[0] else: raise RuntimeError("Mode not supported.") def expand_mov(self, instruction, dst, src, hw_context): if src.startswith('['): # TODO: Have a real parser. /!\ eval() is dangerous expression = src[1:-1] args = expression exp = 0 for op in MATH_OPS: if op in expression: math = op exp = 1 args = expression.split(op) break if exp == 1: arg1 = args[0] arg2 = args[1] arg2_type = self.check_arg(self.upper_capstone(arg2)) if arg2_type == 1: arg2 = hw_context[self.upper_capstone(instruction)][self.upper_capstone(arg2)] arg1_type = self.check_arg(self.upper_capstone(arg1)) if arg1_type == 1: arg1 = hw_context[self.upper_capstone(instruction)][self.upper_capstone(arg1)] solve = "%s%s%s" % (arg1, math, arg2) mem_addr = "%x" % eval(solve) val = hex(self.read_value(int(mem_addr, 16))).strip('L') else: # It's from the trace, we have already read the value, so it's # the correct one. val = hw_context[self.upper_capstone(instruction)][self.upper_capstone(args)] else: #val = hw_context[self.upper_capstone(instruction)][self.upper_capstone(src)] val = hw_context[instruction][src.upper()] fmt = self.get_nasm_size_fmt() if dst.startswith('['): # we need to append a new instruction prev_instruction = "mov %s, %s" % (src, val) new_instruction = "%s\nmov %s %s, %s" % (prev_instruction, fmt, dst, src) return new_instruction new_instruction = "mov %s %s, %s" % (fmt, dst, val) return new_instruction def check_normal_instruction(self, instruction, hw_context): if instruction == "pushf": return instruction mnemonic, operands = instruction.split(' ', 1) ops = operands.split(',') if mnemonic == 'pop': return self.build_mov_from_pop(instruction, ops[0], hw_context) elif mnemonic == 'mov': #src = self.sanitize_reg(instruction.split(',')[1].strip().upper()) src = self.sanitize_reg(instruction.split(',')[1].strip()) dst_raw = instruction.split(',')[0].split()[-1].strip() dst = dst_raw if not dst_raw.startswith('['): #dst = self.sanitize_reg(dst_raw.upper()) dst = self.sanitize_reg(dst_raw) return self.expand_mov(instruction, dst, src, hw_context) return None def add_get_opcodes(self, new_instr, instruction, cnt): if new_instr not in self.opcodes_db: self.opcodes_db[new_instr] = None opcodes = self.get_opcodes(new_instr, cnt) # For the DB project - b64 based self.opcodes_db[new_instr] = base64.b64encode(opcodes) debug.debug("%s (%s) -- %s" % (new_instr, instruction, self.get_nasm_hex(opcodes))) return opcodes opcodes = base64.b64decode(self.opcodes_db[new_instr]) debug.debug("%s (%s) -- %s" % (new_instr, instruction, self.get_nasm_hex(opcodes))) return opcodes def append_mnemonic_instruction_lists(self, instruction): mnemonic = instruction.split()[0] if mnemonic != "jmp" and mnemonic.startswith("j"): self.GREYLIST_INSTRUCTIONS.append(mnemonic) return True self.BLACKLIST_INSTRUCTIONS.append(mnemonic) return False def check_trace_instruction(self, address, instruction, hw_context, cnt): print "[INPUT] %s) %s" % (str(cnt), instruction) if instruction.split()[0] in self.BLACKLIST_INSTRUCTIONS: return None if instruction.split()[0] in self.WHITELIST_INSTRUCTIONS: new_instr = self.check_normal_instruction(instruction, hw_context) if not new_instr: new_instr = instruction print "[OUTPUT] " , new_instr opcodes = self.add_get_opcodes(new_instr, instruction, cnt) return opcodes #self.serialize_opcodes() addr = int(address, 16) data = self._addrspace.read(addr, self.get_buf_size()) print "---[NEW " , instruction if not data: print "[-] Something went wrong. Missing instruction: %s" % instruction return for ins in self.md.disasm(data, addr): if self.is_capstone_branch(ins): if not self.append_mnemonic_instruction_lists(instruction): return None new_instruction = self.check_branch_instruction(instruction, hw_context) print "[OUTPUT] %s" % new_instruction opcodes = self.get_opcodes(new_instruction, cnt) print self.get_nasm_hex(opcodes) debug.debug("%s -- %s" % (new_instruction, self.get_nasm_hex(opcodes))) return opcodes def init_chain_dump(self): name = "%s" % self._config.BIN print "[+] Creating %s" % name self.dump_fd = open(name, 'wb') def stop_chain_dump(self): self.dump_fd.close() if self._config.CLEAN: self.rm_nasm_files() def append_opcodes_dump(self, opcodes): self.dump_fd.write(opcodes) def init_opcodes_db(self): if self._config.IDB: idb_fd = open(self._config.IDB) self.opcodes_db = json.load(idb_fd, object_pairs_hook = OrderedDict) idb_fd.close() def follow_trace(self): cnt = 0 self.init_chain_dump() self.init_opcodes_db() for trace_key1, trace_c1 in self.trace.items(): trace_ptr, trace_gnum = trace_key1.split('-') if self._config.GLIMIT and int(trace_gnum) >= self._config.GLIMIT: break if int(trace_gnum) < self._config.SGADGET: continue self.gid += 1 for trace_key2, trace_c2 in trace_c1.items(): for tk in trace_c2.keys(): cnt += 1 opcodes = self.check_trace_instruction(trace_key2, tk.lower(), trace_c2, cnt) if opcodes: self.append_opcodes_dump(opcodes) else: debug.debug("[-] Skipping instructions... %s" % tk.lower()) self.stop_chain_dump() def serialize_opcodes(self): if self._config.DB: db_name = "%s_%s_%d.json" % (self._config.DB, "dechain", self._config.GLIMIT) if self._config.IDB: db_name = self._config.IDB fd = open(db_name, 'w') print "\n[+] Dumping %s" % db_name json.dump(self.opcodes_db, fd, indent = 2) fd.close() def calculate(self): if not self.is_nasm(): debug.error("Please install nasm") if not self._config.IJSON: debug.error("Please provide the input JSON trace") self._addrspace = utils.load_as(self._config) self.md = self.init_capstone() self.md.detail = True print "[+] From gadget: %s" % self._config.SGADGET print "[+] To gadget: %s" % self._config.GLIMIT self.get_json_trace() self.follow_trace() if self._config.DEBUG: self.get_trace_asm() if self._config.DB or self._config.IDB: self.serialize_opcodes() def render_text(self, outfd, data): outfd.write("\n")