#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#  syscall_extractor.py
#
#  Copyright 2015 Spencer McIntyre <zeroSteiner@gmail.com>
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are
#  met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above
#    copyright notice, this list of conditions and the following disclaimer
#    in the documentation and/or other materials provided with the
#    distribution.
#  * Neither the name of the  nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

import argparse
import collections
import copy
import json
import os
import struct
import sys
sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import pefile
import tabulate

__version__ = '1.0'

IMAGE_FILE_MACHINE_I386 = 0x014c
IMAGE_FILE_MACHINE_X86_64 = 0x8664

Syscall = collections.namedtuple('Syscall', ('number', 'rva', 'name', 'ordinal'))

def pe_format_version_str(pe):
	pe.parse_data_directories()
	if not hasattr(pe, 'VS_FIXEDFILEINFO'):
		return
	version_info = []
	version_info.append((pe.VS_FIXEDFILEINFO.FileVersionMS & 0xffff0000) >> 16)
	version_info.append(pe.VS_FIXEDFILEINFO.FileVersionMS & 0xffff)
	version_info.append((pe.VS_FIXEDFILEINFO.FileVersionLS & 0xffff0000) >> 16)
	version_info.append(pe.VS_FIXEDFILEINFO.FileVersionLS & 0xffff)
	version_info = map(str, version_info)
	return '.'.join(version_info)

def get_i386_syscall(stub):
	if len(stub) < 18:
		return None
	if not stub.startswith(b'\xb8'):
		return None
	if stub[5:12] == b'\xba\x00\x03\xfe\x7f\xff\x12' and stub[12] in (b'\xc2', b'\xc3'):
		return struct.unpack('I', stub[1:5])[0]
	if stub[5:18] == b'\xe8\x03\x00\x00\x00\xc2\x08\x00\x8b\xd4\x0f\x34\xc3':
		return struct.unpack('I', stub[1:5])[0]
	return None

def get_x86_64_syscall(stub):
	if len(stub) != 11:
		return None
	if not stub.startswith(b'\x4c\x8b\xd1\xb8'):
		return None
	if not stub.endswith(b'\x0f\x05\xc3'):
		return None
	return struct.unpack('I', stub[4:8])[0]

def extract_syscalls(file_name):
	pe = pefile.PE(file_name)
	file_version = pe_format_version_str(pe)
	print("[*] Scanning {0} ({1})".format(file_name, file_version))
	if not pe.is_dll:
		print('[-] File is not a DLL')
		return
	machine = pe.NT_HEADERS.FILE_HEADER.Machine
	if machine == IMAGE_FILE_MACHINE_I386:
		extractor = get_i386_syscall
		stub_length = 18
	elif machine == IMAGE_FILE_MACHINE_X86_64:
		extractor = get_x86_64_syscall
		stub_length = 11
	else:
		print("[-] Not a supported machine type (0x{0:02x})".format(machine))
		return
	arch_name = ('i386' if machine == IMAGE_FILE_MACHINE_I386 else 'x86-64')
	print('[*] Detected file as: ' + arch_name)
	syscalls = []
	pe.parse_data_directories()
	if not hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
		return
	for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
		stub = pe.get_data(exp.address, stub_length)
		syscall_number = extractor(stub)
		if syscall_number is None:
			continue
		syscalls.append(Syscall(syscall_number, pe.OPTIONAL_HEADER.ImageBase + exp.address, exp.name, exp.ordinal))
	metadata = dict(file_name=os.path.basename(file_name), version=file_version, architecture=arch_name)
	return dict(metadata=metadata, syscalls=syscalls)

def main():
	output_formats = copy.copy(tabulate.tabulate_formats)
	output_formats.append('json')
	parser = argparse.ArgumentParser(description='syscall_extractor: Extract syscalls from a Windows PE file', conflict_handler='resolve')
	parser.add_argument('-f', '--format', dest='output_format', default='simple', choices=output_formats, help='output format')
	parser.add_argument('pe_files', nargs='+', help='pe files to extract syscall numbers from')
	args = parser.parse_args()

	parsed_files = []
	for pe_file in args.pe_files:
		parsed_files.append(extract_syscalls(os.path.abspath(pe_file)))
	parsed_files = list(pe_file for pe_file in parsed_files if pe_file)
	print("[+] Found {0:,} syscalls".format(sum(len(pe_file['syscalls']) for pe_file in parsed_files)))

	if args.output_format == 'json':
		print(json.dumps(parsed_files, sort_keys=True, indent=2, separators=(',', ': ')))
	else:
		syscalls = []
		for pe_file in parsed_files:
			syscalls.extend(pe_file['syscalls'])
		syscalls = ((syscall[0], hex(syscall[1]), syscall[2], syscall[3]) for syscall in syscalls)
		print(tabulate.tabulate(syscalls, headers=('Number', 'RVA', 'Name', 'Ordinal'), tablefmt=args.output_format))
	return 0

if __name__ == '__main__':
	sys.exit(main())