python source code of processor

# TODO:
#  - name locations
#  - mark data xref to memory load/store
#  - mark xref to imports
#  - compute stack deltas
#  - add entry point for start function (need to see an example)

# stdlib
import logging
import functools

# from pip
import wasm
import wasm.wasmtypes
import netnode

# from IDA
import idc
import idaapi
import idautils

# from this project
import idawasm.const
import idawasm.common
import idawasm.analysis.llvm


logger = logging.getLogger(__name__)


# these are wasm-specific operand types
WASM_LOCAL = idaapi.o_idpspec0
WASM_GLOBAL = idaapi.o_idpspec1
WASM_FUNC_INDEX = idaapi.o_idpspec2
WASM_TYPE_INDEX = idaapi.o_idpspec3
WASM_BLOCK = idaapi.o_idpspec4
WASM_ALIGN = idaapi.o_idpspec5


def no_exceptions(f):
    '''
    decorator that catches and logs any exceptions.
    the exceptions are swallowed, and `0` is returned.

    this is useful for routines that IDA invokes, as IDA bails on exceptions.

    Example::

        @no_exceptions
        def definitely_doesnt_work():
            raise ZeroDivisionError()

        assert definitely_doesnt_work() == 0
    '''
    @functools.wraps(f)
    def wrapper(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        # we explicitly want to catch all exceptions here,
        # because IDA cannot handle them.
        except:  # NOQA: E722 do not use bare 'except'
            logger.error('exception in %s', f.__name__, exc_info=True)
            return 0
    return wrapper


# tags functions that are invoked from IDA-land.
ida_entry = no_exceptions


class wasm_processor_t(idaapi.processor_t):
    # processor ID for the wasm disassembler.
    # I made this number up.
    id = 0x8069
    flag = idaapi.PR_USE32 | idaapi.PR_RNAMESOK | idaapi.PRN_HEX | idaapi.PR_NO_SEGMOVE
    cnbits = 8
    dnbits = 8
    psnames = ['wasm']
    plnames = ['WebAssembly']
    segreg_size = 0
    tbyte_size = 0
    assembler = {
        'flag': idaapi.ASH_HEXF3 | idaapi.AS_UNEQU | idaapi.AS_COLON | idaapi.ASB_BINF4 | idaapi.AS_N2CHR,
        'uflag': 0,
        'name': "WebAssembly assembler",
        'origin': "org",
        'end': "end",
        'cmnt': ";;",
        'ascsep': "\"",
        'accsep': "'",
        'esccodes': "\"'",
        'a_ascii': "db",
        'a_byte': "db",
        'a_word': "dw",
        'a_dword': "dd",
        'a_qword': "dq",
        'a_oword': "xmmword",
        'a_float': "dd",
        'a_double': "dq",
        'a_tbyte': "dt",
        'a_dups': "#d dup(#v)",
        'a_bss': "%s dup ?",
        'a_seg': "seg",
        'a_curip': "$",
        'a_public': "public",
        'a_weak': "weak",
        'a_extrn': "extrn",
        'a_comdef': "",
        'a_align': "align",
        'lbrace': "(",
        'rbrace': ")",
        'a_mod': "%",
        'a_band': "&",
        'a_bor': "|",
        'a_xor': "^",
        'a_bnot': "~",
        'a_shl': "<<",
        'a_shr': ">>",
        'a_sizeof_fmt': "size %s",
    }

    def dt_to_width(self, dt):
        '''
        returns OOFW_xxx flag given a dt_xxx
        '''
        return {
            idaapi.dt_byte:  idaapi.OOFW_8,
            idaapi.dt_word:  idaapi.OOFW_16,
            idaapi.dt_dword: idaapi.OOFW_32,
            idaapi.dt_qword: idaapi.OOFW_64,
        }[dt]

    def _get_section(self, section_id):
        '''
        fetch the section with the given id.

        Args:
          section_id (int): the section id.

        Returns:
          wasm.Structure: the section.

        Raises:
          KeyError: if the section is not found.
        '''
        for i, section in enumerate(self.sections):
            if i == 0:
                continue

            if section.data.id != section_id:
                continue

            return section

        raise KeyError(section_id)

    def _get_section_offset(self, section_id):
        '''
        fetch the file offset of the given section.

        Args:
          section_id (int): the section id.

        Returns:
          int: the offset of the section.

        Raises:
          KeyError: if the section is not found.
        '''
        p = 0
        for i, section in enumerate(self.sections):
            if i == 0:
                p += idawasm.common.size_of(section.data)
                continue

            if section.data.id != section_id:
                p += idawasm.common.size_of(section.data)
                continue

            return p

        raise KeyError(section_id)

    def _compute_function_branch_targets(self, offset, code):
        '''
        compute branch targets for the given code segment.

        we can do it in a single pass:
        scan instructions, tracking new blocks, and maintaining a stack of nested blocks.
        when we hit a branch instruction, use the stack to resolve the branch target.
        the branch target will always come from the enclosing scope.

        Args:
          offset (int): offset of the given code segment.
          code (bytes): raw bytecode.

        Returns:
          Dict[int, Dict[int, int]]: map from instruction addresses to map from relative depth to branch target address.
        '''
        # map from virtual address to map from relative depth to virtual address
        branch_targets = {}
        # map from block index to block instance, with fields including `offset` and `depth`
        blocks = {}
        # stack of block indexes
        block_stack = []
        p = offset

        for bc in wasm.decode.decode_bytecode(code):
            if bc.op.id in {wasm.opcodes.OP_BLOCK, wasm.opcodes.OP_LOOP, wasm.opcodes.OP_IF}:
                # enter a new block, so capture info, and push it onto the current depth stack
                block_index = len(blocks)
                block = {
                    'index': block_index,
                    'offset': p,
                    'depth': len(block_stack),
                    'type': {
                        wasm.opcodes.OP_BLOCK: 'block',
                        wasm.opcodes.OP_LOOP: 'loop',
                        wasm.opcodes.OP_IF: 'if',
                    }[bc.op.id],
                }
                blocks[block_index] = block
                block_stack.insert(0, block_index)
                branch_targets[p] = {
                    # reference to block that is starting
                    'block': block
                }

            elif bc.op.id in {wasm.opcodes.OP_END}:
                if len(block_stack) == 0:
                    # end of function
                    branch_targets[p] = {
                        'block': {
                            'type': 'function',
                            'offset': offset,     # start of function
                            'end_offset': p,      # end of function
                            'depth': 0,           # top level always has depth 0
                        }
                    }
                    break

                # leaving a block, so pop from the depth stack
                block_index = block_stack.pop(0)
                block = blocks[block_index]
                block['end_offset'] = p + bc.len
                branch_targets[p] = {
                    # reference to block that is ending
                    'block': block
                }

            elif bc.op.id in {wasm.opcodes.OP_BR, wasm.opcodes.OP_BR_IF}:
                block_index = block_stack[bc.imm.relative_depth]
                block = blocks[block_index]
                branch_targets[p] = {
                    bc.imm.relative_depth: block
                }

            elif bc.op.id in {wasm.opcodes.OP_ELSE}:
                # TODO: not exactly sure of the semantics here
                raise NotImplementedError('else')

            elif bc.op.id in {wasm.opcodes.OP_BR_TABLE}:
                # TODO: not exactly sure what one of these looks like yet.
                raise NotImplementedError('br table')
                # probably will populate `branch_targets` with multiple entries

            p += bc.len

        return branch_targets

    def _compute_branch_targets(self):
        branch_targets = {}

        code_section = self._get_section(wasm.wasmtypes.SEC_CODE)
        pcode_section = self._get_section_offset(wasm.wasmtypes.SEC_CODE)

        ppayload = pcode_section + idawasm.common.offset_of(code_section.data, 'payload')
        pbody = ppayload + idawasm.common.offset_of(code_section.data.payload, 'bodies')
        for body in code_section.data.payload.bodies:
            pcode = pbody + idawasm.common.offset_of(body, 'code')
            branch_targets.update(self._compute_function_branch_targets(pcode, body.code))
            pbody += idawasm.common.size_of(body)

        return branch_targets

    def _parse_types(self):
        '''
        parse the type entries.

        Returns:
          List[Dict[str, Any]]: list if type descriptors, each which hash:
            - form
            - param_count
            - param_types
            - return_count
            - return_type
        '''
        type_section = self._get_section(wasm.wasmtypes.SEC_TYPE)
        return idawasm.common.struc_to_dict(type_section.data.payload.entries)

    def _parse_globals(self):
        '''
        parse the global entries.

        Returns:
          Dict[int, Dict[str, any]]: from global index to dict with keys `offset` and `type`.
        '''
        globals_ = {}
        global_section = self._get_section(wasm.wasmtypes.SEC_GLOBAL)
        pglobal_section = self._get_section_offset(wasm.wasmtypes.SEC_GLOBAL)

        ppayload = pglobal_section + idawasm.common.offset_of(global_section.data, 'payload')
        pglobals = ppayload + idawasm.common.offset_of(global_section.data.payload, 'globals')
        pcur = pglobals
        for i, body in enumerate(global_section.data.payload.globals):
            pinit = pcur + idawasm.common.offset_of(body, 'init')
            ctype = idawasm.const.WASM_TYPE_NAMES[body.type.content_type]
            globals_[i] = {
                'index': i,
                'offset': pinit,
                'type': ctype,
            }
            pcur += idawasm.common.size_of(body)
        return globals_

    def _parse_imported_functions(self):
        '''
        parse the import entries for functions.
        useful for recovering function names.

        Returns:
          Dict[int, Dict[str, any]]: from function index to dict with keys `index`, `module`, and `name`.
        '''
        functions = {}
        import_section = self._get_section(wasm.wasmtypes.SEC_IMPORT)
        type_section = self._get_section(wasm.wasmtypes.SEC_TYPE)

        function_index = 0
        for entry in import_section.data.payload.entries:
            if entry.kind != idawasm.const.WASM_EXTERNAL_KIND_FUNCTION:
                continue

            type_index = entry.type.type
            ftype = type_section.data.payload.entries[type_index]

            functions[function_index] = {
                'index': function_index,
                'module': entry.module_str.tobytes().decode('utf-8'),
                'name': entry.field_str.tobytes().decode('utf-8'),
                'type': idawasm.common.struc_to_dict(ftype),
                'imported': True,
                # TODO: not sure if an import can be exported.
                'exported': False,
            }

            function_index += 1

        return functions

    def _parse_exported_functions(self):
        '''
        parse the export entries for functions.
        useful for recovering function names.

        Returns:
          Dict[int, Dict[str, any]]: from function index to dict with keys `index` and `name`.
        '''
        functions = {}
        export_section = self._get_section(wasm.wasmtypes.SEC_EXPORT)
        for entry in export_section.data.payload.entries:
            if entry.kind != idawasm.const.WASM_EXTERNAL_KIND_FUNCTION:
                continue

            functions[entry.index] = {
                'index': entry.index,
                'name': entry.field_str.tobytes().decode('utf-8'),
                'exported': True,
                # TODO: not sure if an export can be imported.
                'imported': False,
            }

        return functions

    def _parse_functions(self):
        imported_functions = self._parse_imported_functions()
        exported_functions = self._parse_exported_functions()

        functions = dict(imported_functions)

        function_section = self._get_section(wasm.wasmtypes.SEC_FUNCTION)
        code_section = self._get_section(wasm.wasmtypes.SEC_CODE)
        pcode_section = self._get_section_offset(wasm.wasmtypes.SEC_CODE)
        type_section = self._get_section(wasm.wasmtypes.SEC_TYPE)

        payload = code_section.data.payload
        ppayload = pcode_section + idawasm.common.offset_of(code_section.data, 'payload')
        pbody = ppayload + idawasm.common.offset_of(payload, 'bodies')
        for i in range(code_section.data.payload.count):
            function_index = len(imported_functions) + i
            body = code_section.data.payload.bodies[i]
            type_index = function_section.data.payload.types[i]
            ftype = type_section.data.payload.entries[type_index]

            local_types = []
            for locals_group in body.locals:
                ltype = locals_group.type
                for j in range(locals_group.count):
                    local_types.append(ltype)

            if function_index in exported_functions:
                name = exported_functions[function_index]['name']
                is_exported = True
            else:
                name = '$func%d' % (function_index)
                is_exported = False

            functions[function_index] = {
                'index': function_index,
                'name': name,
                'offset': pbody + idawasm.common.offset_of(body, 'code'),
                'type': idawasm.common.struc_to_dict(ftype),
                'exported': is_exported,
                'imported': False,
                'local_types': local_types,
                'size': idawasm.common.size_of(body, 'code'),
            }

            pbody += idawasm.common.size_of(body)

        return functions

    def _render_type(self, type_, name=None):
        if name is None:
            name = ''
        else:
            name = ' ' + name

        params = []
        if type_['param_count'] > 0:
            for i, param in enumerate(type_['param_types']):
                params.append(' (param $param%d %s)' % (i, idawasm.const.WASM_TYPE_NAMES[param]))
        sparam = ''.join(params)

        if type_['return_count'] == 0:
            sresult = ''
        elif type_['return_count'] == 1:
            sresult = ' (result %s)' % (idawasm.const.WASM_TYPE_NAMES[type_['return_type']])
        else:
            raise NotImplementedError('multiple return values')

        return '(func%s%s%s)' % (name, sparam, sresult)

    def _render_function_prototype(self, function):
        if function.get('imported'):
            name = '$import%d' % (function['index'])
            signature = self._render_type(function['type'], name=name)
            return '(import "%s" "%s" %s)' % (function['module'],
                                              function['name'],
                                              signature)
        else:
            return self._render_type(function['type'], name=function['name'])

    def load(self):
        '''
        load the state of the processor and analysis from the segments.

        the processor object may not be re-created, so we do our initializiation here.
        initialize the following fields:

          - self.buf
          - self.sections
          - self.functions
          - self.function_offsets
          - self.function_ranges
          - self.globals
          - self.branch_targets
        '''
        logger.info('parsing sections')
        buf = []
        for ea in idautils.Segments():
            # assume all the segments are contiguous, which is what our loader does
            buf.append(idc.GetManyBytes(idc.SegStart(ea), idc.SegEnd(ea) - idc.SegStart(ea)))

        self.buf = b''.join(buf)
        self.sections = list(wasm.decode.decode_module(self.buf))

        logger.info('parsing types')
        self.types = self._parse_types()

        logger.info('parsing globals')
        self.globals = self._parse_globals()

        logger.info('parsing functions')
        self.functions = self._parse_functions()

        # map from function offset to function object
        self.function_offsets = {f['offset']: f for f in self.functions.values() if 'offset' in f}

        # map from (function start, function end) to function object
        self.function_ranges = {
            (f['offset'], f['offset'] + f['size']): f
            for f in self.functions.values()
            if 'offset' in f
        }

        logger.info('computing branch targets')
        self.branch_targets = self._compute_branch_targets()

        self.deferred_noflows = {}
        self.deferred_flows = {}

        for function in self.functions.values():
            name = function['name'].encode('utf-8')
            if 'offset' in function:
                idc.MakeName(function['offset'], name)
                # notify_emu will be invoked from here.
                idc.MakeCode(function['offset'])
                idc.MakeFunction(function['offset'], function['offset'] + function['size'])

            if function.get('exported'):
                # TODO: this should really be done in the loader.
                # though, at the moment, we do a lot more analysis here in the processor.
                idc.add_entry(function['index'], function['offset'], name, True)

            # TODO: idc.add_entry for the start routine. need an example of this.

    @ida_entry
    def notify_newfile(self, filename):
        '''
        handle file being analyzed for the first time.
        '''
        logger.info('new file: %s', filename)
        self.load()

        wasm_nn = netnode.Netnode('$ wasm.offsets')
        wasm_nn['functions'] = {f['index']: f['offset'] for f in self.functions.values() if 'offset' in f}
        wasm_nn['globals'] = {g['index']: g['offset'] for g in self.globals.values() if 'offset' in g}

        for Analyzer in (idawasm.analysis.llvm.LLVMAnalyzer, ):
            ana = Analyzer(self)

            if ana.taste():
                logger.debug('%s analyzing', Analyzer.__name__)
                ana.analyze()
            else:
                logger.debug('%s declined analysis', Analyzer.__name__)


    @ida_entry
    def notify_oldfile(self, filename):
        '''
        handle file loaded from existing .idb database.
        '''
        logger.info('existing database: %s', filename)
        self.load()

    @ida_entry
    def notify_savebase(self):
        '''
        the database is being saved.
        '''
        logger.info('saving wasm processor state.')

    @ida_entry
    def notify_endbinary(self, ok):
        """
         After loading a binary file
         args:
          ok - file loaded successfully?
        """
        logger.info('wasm module loaded.')

    @ida_entry
    def notify_get_autocmt(self, insn):
        '''
        fetch instruction auto-comment.

        Returns:
          Union[str, None]: the comment string, or None.
        '''
        if 'cmt' in self.instruc[insn.itype]:
            return self.instruc[insn.itype]['cmt']

    @ida_entry
    def notify_may_be_func(self, insn, state):
        '''
        can a function start at the given instruction?

        Returns:
          int: 100 if a function starts here, zero otherwise.
        '''
        if insn.ea in self.function_offsets:
            return 100
        else:
            return 0

    def notify_emu_BR_END(self, insn, next):
        # unconditional branch followed by END.

        # BR flows to the END
        idaapi.add_cref(insn.ea, insn.ea + insn.size, idaapi.fl_F)

        # unconditional branch, so END does not flow to following instruction
        self.deferred_noflows[next.ea] = True

        # branch target
        if insn.ea in self.branch_targets:
            targets = self.branch_targets[insn.ea]
            target_block = targets[insn.Op1.value]
            target_va = target_block['end_offset']
            self.deferred_flows[next.ea] = [(next.ea, target_va, idaapi.fl_JF)]

        return 1

    def notify_emu_BR_IF_END(self, insn, next):
        # BR_IF flows to the END
        idaapi.add_cref(insn.ea, insn.ea + insn.size, idaapi.fl_F)

        # conditional branch, so there will be a fallthrough flow.
        # the default behavior of `end` is to fallthrough, so don't change that.
        pass

        # branch target
        if insn.ea in self.branch_targets:
            targets = self.branch_targets[insn.ea]
            target_block = targets[insn.Op1.value]
            target_va = target_block['end_offset']
            self.deferred_flows[next.ea] = [(next.ea, target_va, idaapi.fl_JF)]

        return 1

    def notify_emu_RETURN_END(self, insn, next):
        # the RETURN will fallthrough to END,
        idaapi.add_cref(insn.ea, insn.ea + insn.size, idaapi.fl_F)

        # but the END will not fallthrough.
        self.deferred_noflows[next.ea] = True

        return 1

    def notify_emu_BR(self, insn):
        # handle an unconditional branch not at the end of a black.

        # unconditional branch does not fallthrough flow.
        pass

        # branch target
        if insn.ea in self.branch_targets:
            targets = self.branch_targets[insn.ea]
            target_block = targets[insn.Op1.value]
            target_va = target_block['end_offset']
            idaapi.add_cref(insn.ea, target_va, idaapi.fl_JF)

        return 1

    def notify_emu_BR_IF(self, insn):
        # handle a conditional branch not at the end of a block.
        # fallthrough flow
        idaapi.add_cref(insn.ea, insn.ea + insn.size, idaapi.fl_F)

        # branch target
        if insn.ea in self.branch_targets:
            targets = self.branch_targets[insn.ea]
            target_block = targets[insn.Op1.value]
            target_va = target_block['end_offset']
            idaapi.add_cref(insn.ea, target_va, idaapi.fl_JF)

        return 1

    def notify_emu_END(self, insn):
        for flow in self.deferred_flows.get(insn.ea, []):
            idaapi.add_cref(*flow)

        if insn.ea in self.branch_targets:
            targets = self.branch_targets[insn.ea]
            block = targets['block']
            if block['type'] == 'loop':
                # end of loop

                # noflow

                # branch back to top of loop
                target_va = block['offset']
                idaapi.add_cref(insn.ea, target_va, idaapi.fl_JF)

            elif block['type'] == 'if':
                # end of if
                raise NotImplementedError('if')

            elif block['type'] == 'block':
                # end of block
                # fallthrough flow, unless a deferred noflow from earlier, such as the case:
                #
                #     return
                #     end
                #
                # the RETURN is the end of the function, so no flow after the END.
                if insn.ea not in self.deferred_noflows:
                    idaapi.add_cref(insn.ea, insn.ea + insn.size, idaapi.fl_F)

            elif block['type'] == 'function':
                # end of function
                # noflow
                pass

            else:
                raise RuntimeError('unexpected block type: ' + block['type'])

        return 1

    @ida_entry
    def notify_emu(self, insn):
        '''
        Emulate instruction, create cross-references, plan to analyze
        subsequent instructions, modify flags etc. Upon entrance to this function
        all information about the instruction is in 'insn' structure.
        If zero is returned, the kernel will delete the instruction.

        adding xrefs is fairly straightforward, except for one hiccup:
        we'd like xrefs to flow from trailing END instructions,
         rather than getting orphaned in their own basic block.

        for example, consider the following:

            br $block0
            end

        if we place the code flow xref on the BR,
         then there is no flow to the END instruction,
         and the graph will look like:

            +------------+     +-----+
            |     ...    |     | end |
            | br $block0 |     +-----+
            +------------+
                   |
                  ...

        instead, we want the code flow xref to flow from the END,
         deferred from the BR, so the graph looks like this:

            +------------+
            |     ...    |
            | br $block0 |
            | end        |
            +------------+
                   |
                  ...

        to do this, at branching instruction,
         we detect if the following instruction is an END.
        if so, we flow through to the END,
         and queue the xrefs to be added when the END is processed.

        this assumes that the branching instructions are always analyzed before the END instructions.

        unfortunately, adding xrefs on subsequent instructions doesn't work (the node doesn't exist, or something).
        so, we have to used this "deferred" approach.
        '''

        # note: `next` may be None if invalid.
        next = idautils.DecodeInstruction(insn.ea + insn.size)

        # add drefs to globals
        for op in insn.ops:
            if not (op.type == idaapi.o_imm and op.specval == WASM_GLOBAL):
                continue

            if op.value not in self.globals:
                logger.debug('missing global: %d', op.value)
                continue

            global_va = self.globals[op.value]['offset']
            if insn.itype == self.itype_SET_GLOBAL:
                idc.add_dref(insn.ea, global_va, idc.dr_W)
            elif insn.itype == self.itype_GET_GLOBAL:
                idc.add_dref(insn.ea, global_va, idc.dr_R)
            else:
                raise RuntimeError('unexpected instruction referencing global: ' + str(insn))

        # TODO: add drefs to memory, but need example of this first.

        # handle cases like:
        #
        #     block
        #     ...
        #     br $foo
        #     end
        #
        # we want the cref to flow from the instruction `end`, not `br $foo`.
        if (insn.itype in {self.itype_BR,
                           self.itype_BR_IF,
                           self.itype_BR_TABLE}
              and next is not None                # NOQA: E127 continuation line over-indented for visual indent
              and next.itype == self.itype_END):  # NOQA: E127

            if insn.itype == self.itype_BR:
                return self.notify_emu_BR_END(insn, next)

            elif insn.itype == self.itype_BR_IF:
                return self.notify_emu_BR_IF_END(insn, next)

            elif insn.itype in (self.itype_BR_TABLE, ):
                raise NotImplementedError('br table')

        # handle cases like:
        #
        #     ...
        #     return
        #     end
        #
        # we want return to flow into the return, which should then not flow.
        elif (insn.itype == self.itype_RETURN
              and next is not None
              and next.itype == self.itype_END):
            return self.notify_emu_RETURN_END(insn, next)

        # handle other RETURN and UNREACHABLE instructions.
        # tbh, not sure how we'd encounter another RETURN, but we'll be safe.
        elif insn.get_canon_feature() & wasm.opcodes.INSN_NO_FLOW:
            return 1

        # handle an unconditional branch not at the end of a black.
        elif insn.itype == self.itype_BR:
            return self.notify_emu_BR(insn)

        elif insn.itype == self.itype_BR_TABLE:
            # haven't seen one of these yet, so don't know to handle exactly.
            raise NotImplementedError('br table')

        # handle a conditional branch not at the end of a block.
        elif insn.itype == self.itype_BR_IF:
            return self.notify_emu_BR_IF(insn)

        # add flows deferred from a prior branch, eg.
        #
        #     br $foo
        #     end
        #
        # flows deferred from the BR to the END insn.
        elif insn.itype == self.itype_END:
            return self.notify_emu_END(insn)

        # default behavior: fallthrough
        else:
            idaapi.add_cref(insn.ea, insn.ea + insn.size, idaapi.fl_F)

    @ida_entry
    def out_mnem(self, ctx):
        postfix = ''
        ctx.out_mnem(20, postfix)

    def _get_function(self, ea):
        '''
        fetch the function object that contains the given address.
        '''
        # warning: O(#funcs) scan here, called in a tight loop (render operand).
        for (start, end), f in self.function_ranges.items():
            if start <= ea < end:
                return f
        raise KeyError(ea)

    @ida_entry
    def notify_out_operand(self, ctx, op):
        '''
        Generate text representation of an instructon operand.
        This function shouldn't change the database, flags or anything else.
        All these actions should be performed only by u_emu() function.
        The output text is placed in the output buffer initialized with init_output_buffer()
        This function uses out_...() functions from ua.hpp to generate the operand text
        Returns: 1-ok, 0-operand is hidden.
        '''
        if op.type == WASM_BLOCK:
            if op.value == 0xFFFFFFC0:  # VarInt7 for 0x40
                # block has empty type
                pass
            else:
                # ref: https://webassembly.github.io/spec/core/binary/types.html#binary-valtype
                # TODO(wb): untested!
                ctx.out_keyword({
                    # TODO(wb): I don't think these constants will line up in practice
                    0x7F: 'type:i32',
                    0x7E: 'type:i64',
                    0x7D: 'type:f32',
                    0x7C: 'type:f64',
                }[op.value])
            return True

        elif op.type == idaapi.o_reg:
            wtype = op.specval
            if wtype == WASM_LOCAL:
                # output a function-local "register".
                # these are nice because they can be re-named by the analyst.
                #
                # eg.
                #     code:0D57    get_local    $param0
                #     code:0D4B    set_local    $local9
                #                                 ^
                #                                these things
                f = self._get_function(ctx.insn.ea)
                if op.reg < f['type']['param_count']:
                    # the first `param_count` indices reference a parameter,
                    ctx.out_register('$param%d' % (op.reg))
                else:
                    # and the remaining indices are local variables.
                    ctx.out_register('$local%d' % (op.reg))
                return True

        elif op.type == idaapi.o_imm:
            wtype = op.specval
            if wtype == WASM_GLOBAL:
                # output a reference to a global variable.
                # note that we provide the address of the variable,
                #  and IDA will insert the correct name.
                # this is particularly nice when a user re-names the variable.
                #
                # eg.
                #
                #     code:0D38    set_global   global_0
                #                                 ^
                #                                this thing
                if op.value in self.globals:
                    g = self.globals[op.value]
                    ctx.out_name_expr(op, g['offset'])
                    return True
                else:
                    logger.info('missing global at index %d', op.value)
                    ctx.out_register('$global%d'% (op.value))
                    return True


            elif wtype == WASM_FUNC_INDEX:
                f = self.functions[op.value]
                if 'offset' in f:
                    # output a reference to an existing function.
                    # note that we provide the address of the function,
                    #  and IDA will insert the correct name.
                    #
                    # eg.
                    #
                    #     code:0D9E    call   $func9
                    #                           ^
                    #                          this thing
                    ctx.out_name_expr(op, f['offset'])
                else:
                    # output a reference to a function by name,
                    # such as an imported routine.
                    # since this won't have a location in the binary,
                    #  we output the raw name of the function.
                    #
                    # TODO: link this to the import entry
                    ctx.out_keyword(f['name'].encode('utf-8'))
                return True

            elif wtype == WASM_TYPE_INDEX:
                # resolve the type index into a type,
                # then human-render it.
                #
                # eg.
                #
                #     code:0B7F  call_indirect  (func (param $param0 i32) (param $param1 i32) (result i32)), 0
                #                  ^
                #                 this thing
                type_index = op.value
                type = self.types[type_index]
                signature = self._render_type(type)

                ctx.out_keyword(signature)
                return True

            elif wtype == WASM_ALIGN:
                # output an alignment directive.
                #
                # eg.
                #
                #     code:0B54   i32.load    0x30, align:2
                #                                     ^
                #                                    this thing
                ctx.out_keyword('align:')
                width = self.dt_to_width(op.dtype)
                ctx.out_value(op, idaapi.OOFW_IMM | width)
                return True

            else:
                width = self.dt_to_width(op.dtype)
                ctx.out_value(op, idaapi.OOFW_IMM | width)
                return True

        # error case
        return False

    @ida_entry
    def notify_out_insn(self, ctx):
        '''
        must not change the database.

        args:
          ctx (object): has a `.insn` field.
        '''
        insn = ctx.insn
        ea = insn.ea

        # if this is the start of a function, render the function prototype.
        # like::
        #
        #     code:082E $func8:
        #     code:082E (func $func8 (param $param0 i32) (param $param1 i32) (result i32))
        if ea in self.function_offsets:
            # use idaapi.rename_regvar and idaapi.find_regvar to resolve $local/$param names
            # ref: https://reverseengineering.stackexchange.com/q/3038/17194
            fn = self.function_offsets[ea]
            proto = self._render_function_prototype(fn)
            ctx.gen_printf(0, proto + '\n')

        # the instruction has a mnemonic, then zero or more operands.
        # if more than one operand, the operands are separated by commas.
        #
        # eg.
        #
        #     code:0E30    i32.store    0x1C,  align:2
        #                      ^         ^  ^ ^     ^
        #                  mnemonic      |  | |     |
        #                             op[0] | |     |
        #                               comma |     |
        #                                     space |
        #                                        op[1]

        ctx.out_mnemonic()
        ctx.out_one_operand(0)

        for i in range(1, 3):
            op = insn[i]

            if op.type == idaapi.o_void:
                break

            ctx.out_symbol(',')
            ctx.out_char(' ')
            ctx.out_one_operand(i)

        # if this is a block instruction, annotate the relevant block.
        #
        # eg.
        #
        #     code:0E84     block        $block2
        #     code:0E86     loop         $loop3
        #     code:0F3F     end          $loop3
        #                                   ^
        #                                 this name

        # TODO: resolve block names on conditionals.
        # right now they look like:
        #
        #     code:0E77     br_if        1
        #
        # but we want something like this:
        #
        #     code:0E77     br_if        $block2

        # TODO: even better, we should use the location name, rather than auto-generated $block name
        # from this:
        #
        #     code:0E77     br_if        $block2
        #
        # want:
        #
        #     code:0E77     br_if        loc_error

        if insn.itype in (self.itype_BLOCK, self.itype_LOOP, self.itype_END) \
           and ea in self.branch_targets:

            targets = self.branch_targets[ea]
            block = targets['block']
            if block['type'] in ('block', 'loop'):
                ctx.out_tagon(idaapi.COLOR_UNAME)
                for c in ("$" + block['type'] + str(block['index'])):
                    ctx.out_char(c)
                ctx.out_tagoff(idaapi.COLOR_UNAME)

        ctx.set_gen_cmt()
        ctx.flush_outbuf()

    @ida_entry
    def notify_ana(self, insn):
        '''
        decodes an instruction and place it into the given insn.

        Args:
          insn (idaapi.insn_t): the instruction to populate.

        Returns:
          int: size of insn on success, 0 on failure.
        '''

        # as of today (v1), each opcode is a single byte
        opb = insn.get_next_byte()

        if opb not in wasm.opcodes.OPCODE_MAP:
            return 0

        # translate from opcode index to IDA-specific const.
        # as you can see elsewhere, IDA insn consts have to be contiguous,
        #  so we can't just re-use the opcode index.
        insn.itype = self.insns[opb]['id']

        # fetch entire instruction buffer to decode
        if wasm.opcodes.OPCODE_MAP.get(opb).imm_struct:
            # opcode has operands that we must decode

            # warning: py2.7-specific
            # can't usually just cast the bytearray to a string without explicit decode.
            # assumption: instruction will be less than 0x10 bytes.
            buf = str(bytearray(idc.GetManyBytes(insn.ea, 0x10)))
        else:
            # single byte instruction

            # warning: py2.7-specific
            buf = str(bytearray([opb]))

        bc = next(wasm.decode.decode_bytecode(buf))
        for _ in range(1, bc.len):
            # consume any additional bytes.
            # this is how IDA knows the size of the insn.
            insn.get_next_byte()

        insn.Op1.type = idaapi.o_void
        insn.Op2.type = idaapi.o_void

        # decode instruction operand.
        # as of today (V1), there's at most a single operand.
        # (though there may also be alignment directive, etc. that we place into Op2+)
        #
        # place the operand value into `.value`, unless its a local, and then use `.reg`.
        # use `.specval` to indicate special handling of register, possible cases:
        #   WASM_LOCAL
        #   WASM_GLOBAL
        #   WASM_FUNC_INDEX
        #   WASM_TYPE_INDEX
        #   WASM_BLOCK
        #   WASM_ALIGN
        #
        if bc.imm is not None:
            immtype = bc.imm.get_meta().structure

            SHOW_FLAGS = idaapi.OF_NO_BASE_DISP | idaapi.OF_NUMBER | idaapi.OF_SHOW

            # wasm is currently single-byte opcode only
            # therefore the first operand must be found at offset 0x1.
            insn.Op1.offb = 1
            insn.Op1.offo = 1

            # by default, display the operand, unless overridden below.
            insn.Op1.flags = SHOW_FLAGS

            if immtype == wasm.immtypes.BlockImm:
                # sig = BlockTypeField()
                insn.Op1.type = WASM_BLOCK
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.sig
                insn.Op1.specval = WASM_BLOCK

            elif immtype == wasm.immtypes.BranchImm:
                # relative_depth = VarUInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.relative_depth

            elif immtype == wasm.immtypes.BranchTableImm:
                # target_count = VarUInt32Field()
                # target_table = RepeatField(VarUInt32Field(), lambda x: x.target_count)
                # default_target = VarUInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.target_count

                insn.Op2.type = idaapi.o_imm
                insn.Op2.offb = 1  # TODO(wb): fixup offset of Op2
                insn.Op2.offo = 1  # TODO(wb): fixup offset of Op2
                insn.Op2.flags = SHOW_FLAGS
                insn.Op2.dtype = idaapi.dt_dword
                insn.Op2.value = bc.imm.target_table

                insn.Op3.type = idaapi.o_imm
                insn.Op3.offb = 1  # TODO(wb): fixup offset of Op3
                insn.Op3.offo = 1  # TODO(wb): fixup offset of Op3
                insn.Op3.flags = SHOW_FLAGS
                insn.Op3.dtype = idaapi.dt_dword
                insn.Op3.value = bc.imm.default_target

            elif immtype == wasm.immtypes.CallImm:
                # function_index = VarUInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.function_index
                insn.Op1.specval = WASM_FUNC_INDEX

            elif immtype == wasm.immtypes.CallIndirectImm:
                # type_index = VarUInt32Field()
                # reserved = VarUInt1Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.type_index
                insn.Op1.specval = WASM_TYPE_INDEX

                insn.Op2.type = idaapi.o_imm
                insn.Op2.offb = 1  # TODO(wb): fixup offset of Op2
                insn.Op2.offo = 1  # TODO(wb): fixup offset of Op2
                insn.Op2.flags = SHOW_FLAGS
                insn.Op2.dtype = idaapi.dt_dword
                insn.Op2.value = bc.imm.reserved

            elif immtype == wasm.immtypes.LocalVarXsImm:
                # local_index = VarUInt32Field()
                insn.Op1.type = idaapi.o_reg
                insn.Op1.reg = bc.imm.local_index
                insn.Op1.specval = WASM_LOCAL

            elif immtype == wasm.immtypes.GlobalVarXsImm:
                # global_index = VarUInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.global_index
                insn.Op1.specval = WASM_GLOBAL

            elif immtype == wasm.immtypes.MemoryImm:
                # flags = VarUInt32Field()
                # offset = VarUInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.offset

                insn.Op2.type = idaapi.o_imm
                insn.Op2.offb = 1  # TODO(wb): fixup offset of Op2
                insn.Op2.offo = 1  # TODO(wb): fixup offset of Op2
                insn.Op2.flags = SHOW_FLAGS
                insn.Op2.dtype = idaapi.dt_dword
                insn.Op2.value = bc.imm.flags
                insn.Op2.specval = WASM_ALIGN

            elif immtype == wasm.immtypes.CurGrowMemImm:
                # reserved = VarUInt1Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.reserved

            elif immtype == wasm.immtypes.I32ConstImm:
                # value = VarInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_dword
                insn.Op1.value = bc.imm.value

            elif immtype == wasm.immtypes.I64ConstImm:
                # value = VarInt64Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_qword
                insn.Op1.value = bc.imm.value

            elif immtype == wasm.immtypes.F32ConstImm:
                # value = UInt32Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_float
                insn.Op1.value = bc.imm.value

            elif immtype == wasm.immtypes.F64ConstImm:
                # value = UInt64Field()
                insn.Op1.type = idaapi.o_imm
                insn.Op1.dtype = idaapi.dt_double
                insn.Op1.value = bc.imm.value

        return insn.size

    def init_instructions(self):
        # Now create an instruction table compatible with IDA processor module requirements
        self.insns = {}
        for i, op in enumerate(wasm.opcodes.OPCODES):
            self.insns[op.id] = {
                # the opcode byte
                'opcode': op.id,
                # the IDA constant for this instruction
                'id': i,
                # danger: this must be an ASCII-encoded byte string, *not* unicode!
                'name': op.mnemonic.encode('ascii'),
                'feature': op.flags,
                'cmt': idawasm.const.WASM_OPCODE_DESCRIPTIONS.get(op.id),
            }
            clean_mnem = op.mnemonic.encode('ascii').replace('.', '_').replace('/', '_').upper()
            # the itype constant value must be contiguous, which sucks, because its not the op.id value.
            setattr(self, 'itype_' + clean_mnem, i)

        # Array of instructions
        # the index into this array apparently must match the `self.itype_*`.
        self.instruc = list(sorted(self.insns.values(), key=lambda i: i['id']))

        self.instruc_start = 0
        self.instruc_end = len(self.instruc)
        self.icode_return = self.itype_RETURN

    def init_registers(self):
        """This function parses the register table and creates corresponding ireg_XXX constants"""

        # Registers definition
        # for wasm, "registers" are local variables.
        self.reg_names = []

        # we'd want to scan the module and pick the max number of parameters,
        # however, the data isn't available yet,
        # so we pick a scary large number.
        #
        # note: IDA reg_t size is 16-bits
        MAX_LOCALS = 0x1000
        for i in range(MAX_LOCALS):
            self.reg_names.append("$local%d" % (i))

        # we'd want to scan the module and pick the max number of parameters,
        # however, the data isn't available yet,
        # so we pick a scary large number.
        MAX_PARAMS = 0x1000
        for i in range(MAX_PARAMS):
            self.reg_names.append("$param%d" % (i))

        # these are fake, "virtual" registers.
        # req'd for IDA, apparently.
        # (not actually used in wasm)
        self.reg_names.append("SP")
        self.reg_names.append("CS")
        self.reg_names.append("DS")

        # Create the ireg_XXXX constants.
        # for wasm, will look like: ireg_LOCAL0, ireg_PARAM0
        for i in range(len(self.reg_names)):
            setattr(self, 'ireg_' + self.reg_names[i].replace('$', ''), i)

        # Segment register information (use virtual CS and DS registers if your
        # processor doesn't have segment registers):
        # (not actually used in wasm)
        self.reg_first_sreg = self.ireg_CS
        self.reg_last_sreg = self.ireg_DS

        # number of CS register
        # (not actually used in wasm)
        self.reg_code_sreg = self.ireg_CS

        # number of DS register
        # (not actually used in wasm)
        self.reg_data_sreg = self.ireg_DS

    def __init__(self):
        # this is called prior to loading a binary, so don't read from the database here.
        idaapi.processor_t.__init__(self)
        self.PTRSZ = 4  # Assume PTRSZ = 4 by default
        self.init_instructions()
        self.init_registers()

        # these will be populated by `notify_newfile`
        self.buf = b''
        # ordered list of wasm section objects
        self.sections = []
        # map from function index to function object
        self.functions = {}
        # map from virtual address to function object
        self.function_offsets = {}
        # map from (va-start, va-end) to function object
        self.function_ranges = {}
        # map from global index to global object
        self.globals = {}
        # map from va to map from relative depth to va
        self.branch_targets = {}
        # list of type descriptors
        self.types = []

        # map from address to list of cref arguments.
        # used by `notify_emu`.
        self.deferred_flows = {}

        # set of addresses which should not flow.
        # map from address to True.
        # used by `notify_emu`.
        self.deferred_noflows = {}


def PROCESSOR_ENTRY():
    logging.basicConfig(level=logging.DEBUG)
    return wasm_processor_t()