#!/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2009-2016, Mario Vilas # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice,this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. """ Binary code disassembly. @group Disassembler loader: Disassembler, Engine @group Disassembler engines: BeaEngine, CapstoneEngine, DistormEngine, LibdisassembleEngine, PyDasmEngine """ from __future__ import with_statement __all__ = [ 'Disassembler', 'Engine', 'BeaEngine', 'CapstoneEngine', 'DistormEngine', 'LibdisassembleEngine', 'PyDasmEngine', ] from textio import HexDump import win32 import ctypes import warnings # lazy imports BeaEnginePython = None distorm3 = None pydasm = None libdisassemble = None capstone = None #============================================================================== class Engine (object): """ Base class for disassembly engine adaptors. @type name: str @cvar name: Engine name to use with the L{Disassembler} class. @type desc: str @cvar desc: User friendly name of the disassembler engine. @type url: str @cvar url: Download URL. @type supported: set(str) @cvar supported: Set of supported processor architectures. For more details see L{win32.version._get_arch}. @type arch: str @ivar arch: Name of the processor architecture. """ name = "<insert engine name here>" desc = "<insert engine description here>" url = "<insert download url here>" supported = set() def __init__(self, arch = None): """ @type arch: str @param arch: Name of the processor architecture. If not provided the current processor architecture is assumed. For more details see L{win32.version._get_arch}. @raise NotImplementedError: This disassembler doesn't support the requested processor architecture. """ self.arch = self._validate_arch(arch) try: self._import_dependencies() except ImportError: msg = "%s is not installed or can't be found. Download it from: %s" msg = msg % (self.name, self.url) raise NotImplementedError(msg) def _validate_arch(self, arch = None): """ @type arch: str @param arch: Name of the processor architecture. If not provided the current processor architecture is assumed. For more details see L{win32.version._get_arch}. @rtype: str @return: Name of the processor architecture. If not provided the current processor architecture is assumed. For more details see L{win32.version._get_arch}. @raise NotImplementedError: This disassembler doesn't support the requested processor architecture. """ # Use the default architecture if none specified. if not arch: arch = win32.arch # Validate the architecture. if arch not in self.supported: msg = "The %s engine cannot decode %s code." msg = msg % (self.name, arch) raise NotImplementedError(msg) # Return the architecture. return arch @classmethod def _import_dependencies(cls): """ Loads the dependencies for this disassembler. @raise ImportError: This disassembler cannot find or load the necessary dependencies to make it work. """ raise SyntaxError("Subclasses MUST implement this method!") def decode(self, address, code): """ @type address: int @param address: Memory address where the code was read from. @type code: str @param code: Machine code to disassemble. @rtype: list of tuple( long, int, str, str ) @return: List of tuples. Each tuple represents an assembly instruction and contains: - Memory address of instruction. - Size of instruction in bytes. - Disassembly line of instruction. - Hexadecimal dump of instruction. @raise NotImplementedError: This disassembler could not be loaded. This may be due to missing dependencies. """ raise NotImplementedError() #============================================================================== class BeaEngine (Engine): """ Integration with the BeaEngine disassembler by Beatrix. """ name = "BeaEngine" desc = "BeaEngine disassembler by Beatrix" url = "https://github.com/BeaEngine/beaengine" supported = set(( win32.ARCH_I386, win32.ARCH_AMD64, )) @classmethod def _import_dependencies(cls): # Load the BeaEngine ctypes wrapper. global BeaEnginePython if BeaEnginePython is None: import BeaEnginePython def decode(self, address, code): addressof = ctypes.addressof # Instance the code buffer. buffer = ctypes.create_string_buffer(code) buffer_ptr = addressof(buffer) # Instance the disassembler structure. Instruction = BeaEnginePython.DISASM() Instruction.VirtualAddr = address Instruction.EIP = buffer_ptr Instruction.SecurityBlock = buffer_ptr + len(code) if self.arch == win32.ARCH_I386: Instruction.Archi = 0 else: Instruction.Archi = 0x40 Instruction.Options = ( BeaEnginePython.Tabulation + BeaEnginePython.NasmSyntax + BeaEnginePython.SuffixedNumeral + BeaEnginePython.ShowSegmentRegs ) # Prepare for looping over each instruction. result = [] Disasm = BeaEnginePython.Disasm InstructionPtr = addressof(Instruction) hexdump = HexDump.hexadecimal append = result.append OUT_OF_BLOCK = BeaEnginePython.OUT_OF_BLOCK UNKNOWN_OPCODE = BeaEnginePython.UNKNOWN_OPCODE # For each decoded instruction... while True: # Calculate the current offset into the buffer. offset = Instruction.EIP - buffer_ptr # If we've gone past the buffer, break the loop. if offset >= len(code): break # Decode the current instruction. InstrLength = Disasm(InstructionPtr) # If BeaEngine detects we've gone past the buffer, break the loop. if InstrLength == OUT_OF_BLOCK: break # The instruction could not be decoded. if InstrLength == UNKNOWN_OPCODE: # Output a single byte as a "db" instruction. char = "%.2X" % ord(buffer[offset]) result.append(( Instruction.VirtualAddr, 1, "db %sh" % char, char, )) Instruction.VirtualAddr += 1 Instruction.EIP += 1 # The instruction was decoded but reading past the buffer's end. # This can happen when the last instruction is a prefix without an # opcode. For example: decode(0, '\x66') elif offset + InstrLength > len(code): # Output each byte as a "db" instruction. for char in buffer[ offset : offset + len(code) ]: char = "%.2X" % ord(char) result.append(( Instruction.VirtualAddr, 1, "db %sh" % char, char, )) Instruction.VirtualAddr += 1 Instruction.EIP += 1 # The instruction was decoded correctly. else: # Output the decoded instruction. append(( Instruction.VirtualAddr, InstrLength, Instruction.CompleteInstr.strip(), hexdump(buffer.raw[offset:offset+InstrLength]), )) Instruction.VirtualAddr += InstrLength Instruction.EIP += InstrLength # Return the list of decoded instructions. return result #============================================================================== class DistormEngine (Engine): """ Integration with the diStorm disassembler by Gil Dabah. """ name = "diStorm" desc = "diStorm disassembler by Gil Dabah" url = "https://github.com/gdabah/distorm" supported = set(( win32.ARCH_I386, win32.ARCH_AMD64, )) @classmethod def _import_dependencies(cls): # Load the distorm bindings. global distorm3 if distorm3 is None: try: import distorm3 except ImportError: import distorm as distorm3 def __init__(self, arch = None): super(DistormEngine, self).__init__(arch) # Load the decoder function. self.__decode = distorm3.Decode # Load the bits flag. self.__flag = { win32.ARCH_I386: distorm3.Decode32Bits, win32.ARCH_AMD64: distorm3.Decode64Bits, }[self.arch] def decode(self, address, code): return self.__decode(address, code, self.__flag) #============================================================================== class PyDasmEngine (Engine): """ Integration with PyDasm: Python bindings to libdasm. """ name = "PyDasm" desc = "PyDasm: Python bindings to libdasm" url = "https://github.com/alexeevdv/libdasm" supported = set(( win32.ARCH_I386, )) @classmethod def _import_dependencies(cls): # Load the libdasm bindings. global pydasm if pydasm is None: import pydasm def decode(self, address, code): # Decode each instruction in the buffer. result = [] offset = 0 while offset < len(code): # Try to decode the current instruction. instruction = pydasm.get_instruction(code[offset:offset+32], pydasm.MODE_32) # Get the memory address of the current instruction. current = address + offset # Illegal opcode or opcode longer than remaining buffer. if not instruction or instruction.length + offset > len(code): hexdump = '%.2X' % ord(code[offset]) disasm = 'db 0x%s' % hexdump ilen = 1 # Correctly decoded instruction. else: disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, current) ilen = instruction.length hexdump = HexDump.hexadecimal(code[offset:offset+ilen]) # Add the decoded instruction to the list. result.append(( current, ilen, disasm, hexdump, )) # Move to the next instruction. offset += ilen # Return the list of decoded instructions. return result #============================================================================== class LibdisassembleEngine (Engine): """ Integration with Immunity libdisassemble. """ name = "Libdisassemble" desc = "Immunity libdisassemble" url = "http://www.immunitysec.com/resources-freesoftware.shtml" supported = set(( win32.ARCH_I386, )) @classmethod def _import_dependencies(cls): # Load the libdisassemble module. # Since it doesn't come with an installer or an __init__.py file # users can only install it manually however they feel like it, # so we'll have to do a bit of guessing to find it. global libdisassemble if libdisassemble is None: try: # If installed properly with __init__.py import libdisassemble.disassemble as libdisassemble except ImportError: # If installed by just copying and pasting the files import disassemble as libdisassemble def decode(self, address, code): # Decode each instruction in the buffer. result = [] offset = 0 while offset < len(code): # Decode the current instruction. opcode = libdisassemble.Opcode( code[offset:offset+32] ) length = opcode.getSize() disasm = opcode.printOpcode('INTEL') hexdump = HexDump.hexadecimal( code[offset:offset+length] ) # Add the decoded instruction to the list. result.append(( address + offset, length, disasm, hexdump, )) # Move to the next instruction. offset += length # Return the list of decoded instructions. return result #============================================================================== class CapstoneEngine (Engine): """ Integration with the Capstone disassembler by Nguyen Anh Quynh. """ name = "Capstone" desc = "Capstone disassembler by Nguyen Anh Quynh" url = "http://www.capstone-engine.org/" supported = set(( win32.ARCH_I386, win32.ARCH_AMD64, win32.ARCH_THUMB, win32.ARCH_ARM, win32.ARCH_ARM64, )) @classmethod def _import_dependencies(cls): # Load the Capstone bindings. global capstone if capstone is None: import capstone def __init__(self, arch = None): super(CapstoneEngine, self).__init__(arch) # Load the constants for the requested architecture. self.__constants = { win32.ARCH_I386: (capstone.CS_ARCH_X86, capstone.CS_MODE_32), win32.ARCH_AMD64: (capstone.CS_ARCH_X86, capstone.CS_MODE_64), win32.ARCH_THUMB: (capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB), win32.ARCH_ARM: (capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM), win32.ARCH_ARM64: (capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM), } # Test for the bug in early versions of Capstone. # If found, warn the user about it. try: self.__bug = not isinstance( list(capstone.cs_disasm_quick( capstone.CS_ARCH_X86, capstone.CS_MODE_32, "\x90", 1 ))[0], capstone.capstone.CsInsn ) except AttributeError: self.__bug = False if self.__bug: warnings.warn( "This version of the Capstone bindings is unstable," " please upgrade to a newer one!", RuntimeWarning, stacklevel=4) def decode(self, address, code): # Get the constants for the requested architecture. arch, mode = self.__constants[self.arch] # Get the decoder function outside the loop. decoder = capstone.cs_disasm_quick # If the buggy version of the bindings are being used, we need to catch # all exceptions broadly. If not, we only need to catch CsError. if self.__bug: CsError = Exception else: CsError = capstone.CsError # Create the variables for the instruction length, mnemonic and # operands. That way they won't be created within the loop, # minimizing the chances data might be overwritten. # This only makes sense for the buggy vesion of the bindings, normally # memory accesses are safe). length = mnemonic = op_str = None # For each instruction... result = [] offset = 0 while offset < len(code): # Disassemble a single instruction, because disassembling multiple # instructions may cause excessive memory usage (Capstone allocates # approximately 1K of metadata per each decoded instruction). instr = None try: instr = list(decoder( arch, mode, code[offset:offset+16], address+offset, 1 ))[0] except IndexError: pass # No instructions decoded. except CsError: pass # Any other error. # On success add the decoded instruction. if instr is not None: # Get the instruction length, mnemonic and operands. # Copy the values quickly before someone overwrites them, # if using the buggy version of the bindings (otherwise it's # irrelevant in which order we access the properties). length = instr.size mnemonic = instr.mnemonic op_str = instr.op_str # Concatenate the mnemonic and the operands. if op_str: disasm = "%s %s" % (mnemonic, op_str) else: disasm = mnemonic # Get the instruction bytes as a hexadecimal dump. hexdump = HexDump.hexadecimal( code[offset:offset+length] ) # On error add a "define constant" instruction. # The exact instruction depends on the architecture. else: # The number of bytes to skip depends on the architecture. # On Intel processors we'll skip one byte, since we can't # really know the instruction length. On the rest of the # architectures we always know the instruction length. if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): length = 1 else: length = 4 # Get the skipped bytes as a hexadecimal dump. skipped = code[offset:offset+length] hexdump = HexDump.hexadecimal(skipped) # Build the "define constant" instruction. # On Intel processors it's "db". # On ARM processors it's "dcb". if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): mnemonic = "db " else: mnemonic = "dcb " bytes = [] for b in skipped: if b.isalpha(): bytes.append("'%s'" % b) else: bytes.append("0x%x" % ord(b)) op_str = ", ".join(bytes) disasm = mnemonic + op_str # Add the decoded instruction to the list. result.append(( address + offset, length, disasm, hexdump, )) # Update the offset. offset += length # Return the list of decoded instructions. return result #============================================================================== # TODO: use a lock to access __decoder # TODO: look in sys.modules for whichever disassembler is already loaded class Disassembler (object): """ Generic disassembler. Uses a set of adapters to decide which library to load for which supported platform. @type engines: tuple( L{Engine} ) @cvar engines: Set of supported engines. If you implement your own adapter you can add its class here to make it available to L{Disassembler}. Supported disassemblers are: """ engines = ( DistormEngine, # diStorm engine goes first for backwards compatibility BeaEngine, CapstoneEngine, LibdisassembleEngine, PyDasmEngine, ) # Add the list of implemented disassembler adaptors to the docstring. __doc__ += "\n" for e in engines: __doc__ += " - %s - %s (U{%s})\n" % (e.name, e.desc, e.url) del e # Cache of already loaded disassemblers. __decoder = {} def __new__(cls, arch = None, engine = None): """ Factory class. You can't really instance a L{Disassembler} object, instead one of the adapter L{Engine} subclasses is returned. @type arch: str @param arch: (Optional) Name of the processor architecture. If not provided the current processor architecture is assumed. For more details see L{win32.version._get_arch}. @type engine: str @param engine: (Optional) Name of the disassembler engine. If not provided a compatible one is loaded automatically. See: L{Engine.name} @raise NotImplementedError: No compatible disassembler was found that could decode machine code for the requested architecture. This may be due to missing dependencies. @raise ValueError: An unknown engine name was supplied. """ # Use the default architecture if none specified. if not arch: arch = win32.arch # Return a compatible engine if none specified. if not engine: found = False for clazz in cls.engines: try: if arch in clazz.supported: selected = (clazz.name, arch) try: decoder = cls.__decoder[selected] except KeyError: decoder = clazz(arch) cls.__decoder[selected] = decoder return decoder except NotImplementedError, e: pass msg = "No disassembler engine available for %s code." % arch raise NotImplementedError(msg) # Return the specified engine. selected = (engine, arch) try: decoder = cls.__decoder[selected] except KeyError: found = False engineLower = engine.lower() for clazz in cls.engines: if clazz.name.lower() == engineLower: found = True break if not found: msg = "Unsupported disassembler engine: %s" % engine raise ValueError(msg) if arch not in clazz.supported: msg = "The %s engine cannot decode %s code." % selected raise NotImplementedError(msg) decoder = clazz(arch) cls.__decoder[selected] = decoder return decoder @classmethod def get_all_engines(cls): """ Get the full list of available disassembly engines for this version of WinAppDbg. To get the disassembly engines that can actually be used, call L{get_supported_engines} instead. @rtype: tuple( Engine ) @return: Tuple of Engine objects. """ return cls.engines @classmethod def get_available_engines(cls): """ Get the list of supported disassembly engines on this machine. To get the full list of disassembly engines supported by this version of WinAppDbg, call L{get_all_engines} instead. @warning: This call will internally load all the required dependencies for all disassembly engines! This is to ensure they are available. @rtype: tuple( Engine ) @return: Tuple of Engine objects. """ supported = [] for e in cls.engines: try: e._import_dependencies() supported.append(e) except Exception: pass return tuple(supported)