#!/usr/bin/env python2.7 """Checks Hashes read from an input file on Virustotal""" __AUTHOR__ = 'Florian Roth' __VERSION__ = "0.10 September 2017" """ Modified by Hannah Ward: clean up, removal of simplejson, urllib2 with requests Install dependencies with: pip install requests bs4 colorama """ import requests import time import re import os import signal import sys import pickle from bs4 import BeautifulSoup import traceback import argparse from colorama import init, Fore, Back, Style URL = r'https://www.virustotal.com/vtapi/v2/file/report' VENDORS = ['Microsoft', 'Kaspersky', 'McAfee', 'CrowdStrike', 'TrendMicro', 'ESET-NOD32', 'Symantec', 'F-Secure', 'Sophos', 'GData'] API_KEY = '-' WAIT_TIME = 15 # Public API allows 4 request per minute, so we wait 15 secs by default def fetch_hash(line): pattern = r'(?<!FIRSTBYTES:\s)\b([0-9a-fA-F]{32}|[0-9a-fA-F]{40}|[0-9a-fA-F]{64})\b' hash_search = re.findall(pattern, line) if len(hash_search) > 0: hash = hash_search[-1] rest = ' '.join(re.sub('({0}|;|,|:)'.format(hash), ' ', line).strip().split()) return hash, rest return '', '' def print_highlighted(line, hl_color=Back.WHITE): """ Print a highlighted line """ # Tags colorer = re.compile('(HARMLESS|SIGNED|MS_SOFTWARE_CATALOGUE)', re.VERBOSE) line = colorer.sub(Fore.BLACK + Back.GREEN + r'\1' + Style.RESET_ALL + ' ', line) colorer = re.compile('(SIG_REVOKED)', re.VERBOSE) line = colorer.sub(Fore.BLACK + Back.RED + r'\1' + Style.RESET_ALL + ' ', line) colorer = re.compile('(SIG_EXPIRED)', re.VERBOSE) line = colorer.sub(Fore.BLACK + Back.YELLOW + r'\1' + Style.RESET_ALL + ' ', line) # Extras colorer = re.compile('(\[!\])', re.VERBOSE) line = colorer.sub(Fore.BLACK + Back.CYAN + r'\1' + Style.RESET_ALL + ' ', line) # Standard colorer = re.compile('([A-Z_]{2,}:)\s', re.VERBOSE) line = colorer.sub(Fore.BLACK + hl_color + r'\1' + Style.RESET_ALL + ' ', line) print line def process_permalink(url, debug=False): """ Requests the HTML page for the sample and extracts other useful data that is not included in the public API """ headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)', 'Referrer': 'https://www.virustotal.com/en/'} info = {'filenames': ['-'], 'firstsubmission': '-', 'harmless': False, 'signed': False, 'revoked': False, 'expired': False, 'mssoft': False, 'imphash': '-', 'filetype': '-'} try: source_code = requests.get(url, headers=headers) # Extract info from source code soup = BeautifulSoup(source_code.text, 'html.parser') # Get file names elements = soup.find_all('td') for i, row in enumerate(elements): text = row.text.strip() if text == "File names": file_names = elements[i + 1].text.strip().split("\n") info['filenames'] = filter(None, map(lambda file: file.strip(), file_names)) # Get file names elements = soup.find_all('div') for i, row in enumerate(elements): text = row.text.strip() if text.startswith('File type'): info['filetype'] = elements[i].text[10:].strip() # Get additional information elements = soup.findAll("div", {"class": "enum"}) for i, row in enumerate(elements): text = row.text.strip() if 'First submission' in text: first_submission_raw = elements[i].text.strip().split("\n") info['firstsubmission'] = first_submission_raw[1].strip() if 'imphash' in text: info['imphash'] = elements[i].text.strip().split("\n")[-1].strip() # Harmless if "Probably harmless!" in source_code: info['harmless'] = True # Signed if "Signed file, verified signature" in source_code: info['signed'] = True # Revoked if "revoked by its issuer" in source_code: info['revoked'] = True # Expired if "Expired certificate" in source_code: info['expired'] = True # Microsoft Software if "This file belongs to the Microsoft Corporation software catalogue." in source_code: info['mssoft'] = True except Exception, e: if debug: traceback.print_exc() finally: # Return the info dictionary return info def saveCache(cache, fileName): """ Saves the cache database as pickle dump to a file :param cache: :param fileName: :return: """ with open(fileName, 'wb') as fh: pickle.dump(cache, fh, pickle.HIGHEST_PROTOCOL) def loadCache(fileName): """ Load cache database as pickle dump from file :param fileName: :return: """ try: with open(fileName, 'rb') as fh: return pickle.load(fh), True except Exception, e: # traceback.print_exc() return {}, False def removeNonAsciiDrop(string): nonascii = "error" # print "CON: ", string try: # Generate a new string without disturbing characters and allow new lines nonascii = "".join(i for i in string if (ord(i) < 127 and ord(i) > 31) or ord(i) == 10 or ord(i) == 13) except Exception, e: traceback.print_exc() pass return nonascii def signal_handler(signal, frame): print "\n[+] Saving {0} cache entries to file {1}".format(len(cache), args.c) saveCache(cache, args.c) sys.exit(0) def process_lines(lines, result_file, nocsv=False, dups=False, debug=False): """ Process the input file line by line """ # Some statistics that could help find similarities imphashes = {} for line in lines: # Skip comments if line.startswith("#"): continue # Remove line break line.rstrip("\n\r") # Get all hashes in line # ... and the rest of the line as comment hashVal, comment = fetch_hash(line) # If no hash found if hashVal == '': continue # Cache if hashVal in cache: if dups: # Colorized head of each hash check print_highlighted("\nHASH: {0} COMMENT: {1}".format(hashVal, comment)) print_highlighted("RESULT: %s (from cache)" % cache[hashVal]) continue else: # Colorized head of each hash check print_highlighted("\nHASH: {0} COMMENT: {1}".format(hashVal, comment)) # Prepare VT API request parameters = {"resource": hashVal, "apikey": API_KEY} success = False while not success: try: response_dict = requests.get(URL, params=parameters).json() success = True except Exception, e: if debug: traceback.print_exc() # print "Error requesting VT results" pass # Process results result = "- / -" virus = "-" last_submitted = "-" first_submitted = "-" filenames = "-" filetype = "-" rating = "unknown" positives = 0 res_color = Back.CYAN md5 = "-" sha1 = "-" sha256 = "-" imphash = "-" harmless = "" signed = "" revoked = "" expired = "" mssoft = "" vendor_result_string = "-" if response_dict.get("response_code") > 0: # Hashes md5 = response_dict.get("md5") sha1 = response_dict.get("sha1") sha256 = response_dict.get("sha256") # AV matches positives = response_dict.get("positives") total = response_dict.get("total") last_submitted = response_dict.get("scan_date") # Virus Name scans = response_dict.get("scans") virus_names = [] vendor_results = [] for vendor in VENDORS: if vendor in scans: if scans[vendor]["result"]: virus_names.append("{0}: {1}".format(vendor, scans[vendor]["result"])) vendor_results.append(scans[vendor]["result"]) else: vendor_results.append("-") else: vendor_results.append("-") vendor_result_string = ";".join(vendor_results) if len(virus_names) > 0: virus = " / ".join(virus_names) # Type rating = "clean" res_color = Back.GREEN if positives > 0: rating = "suspicious" res_color = Back.YELLOW if positives > 10: rating = "malicious" res_color = Back.RED # Get more information with permalink if debug: print "[D] Processing permalink {0}".format(response_dict.get("permalink")) info = process_permalink(response_dict.get("permalink"), debug) # File Names filenames = removeNonAsciiDrop(", ".join(info['filenames'][:5]).replace(';', '_')) first_submitted = info['firstsubmission'] # Other info filetype = info['filetype'] imphash = info['imphash'] if imphash != "-": if imphash in imphashes: print_highlighted("[!] Imphash seen in %d samples " "https://totalhash.cymru.com/search/?hash:%s" % (imphashes[imphash], imphash), hl_color=res_color) imphashes[imphash] += 1 else: imphashes[imphash] = 1 # Result result = "%s / %s" % (response_dict.get("positives"), response_dict.get("total")) print_highlighted("VIRUS: {0}".format(virus)) print_highlighted("FILENAMES: {0}".format(filenames)) print_highlighted("FILE_TYPE: {2} FIRST_SUBMITTED: {0} LAST_SUBMITTED: {1}".format( first_submitted, last_submitted, filetype)) # Permalink analysis results if info['harmless']: harmless = " HARMLESS" if info['signed']: signed = " SIGNED" if info['revoked']: revoked = " SIG_REVOKED" if info['expired']: expired = " SIG_EXPIRED" if info["mssoft"]: mssoft = "MS_SOFTWARE_CATALOGUE" # Print the highlighted result line print_highlighted("RESULT: %s %s%s%s%s%s" % (result, harmless, signed, revoked, expired, mssoft), hl_color=res_color) # Add to log file if not nocsv: result_line = "{0};{1};{2};{3};{4};{5};{6};{7};" \ "{8};{9};{10};{11};{12};{13};{14};{15};{16};{17}\n".format(hashVal, rating, comment, positives, virus, filenames, first_submitted, last_submitted, filetype, md5, sha1, sha256, imphash, harmless.lstrip(' '), signed.lstrip(' '), revoked.lstrip(' '), expired.lstrip(' '), vendor_result_string) with open(result_file, "a") as fh_results: fh_results.write(result_line) # Add to hash cache cache[hashVal] = result # Wait some time for the next request time.sleep(WAIT_TIME) if __name__ == '__main__': signal.signal(signal.SIGINT, signal_handler) init(autoreset=False) print Style.RESET_ALL print Fore.WHITE + Back.BLUE print " ".ljust(80) print " _ ________ _______ __ ".ljust(80) print " | | / /_ __/ / ___/ / ___ ____/ /_____ ____ ".ljust(80) print " | |/ / / / / /__/ _ \/ -_) __/ '_/ -_) __/ ".ljust(80) print " |___/ /_/ \___/_//_/\\__/\__/_/\_\\__/_/ ".ljust(80) print " ".ljust(80) print (" " + __AUTHOR__ + " - " + __VERSION__ + "").ljust(80) print " ".ljust(80) + Style.RESET_ALL print Style.RESET_ALL + " " parser = argparse.ArgumentParser(description='Virustotal Online Checker') parser.add_argument('-f', help='File to process (hash line by line OR csv with hash in each line - auto-detects ' 'position and comment)', metavar='path', default='') parser.add_argument('-c', help='Name of the cache database file (default: vt-hash-db.pkl)', metavar='cache-db', default='vt-hash-db.pkl') parser.add_argument('--nocache', action='store_true', help='Do not use cache database file', default=False) parser.add_argument('--nocsv', action='store_true', help='Do not write a CSV with the results', default=False) parser.add_argument('--dups', action='store_true', help='Do not skip duplicate hashes', default=False) parser.add_argument('--debug', action='store_true', default=False, help='Debug output') args = parser.parse_args() # Check API Key if API_KEY == '': print "[E] No API Key set" print " Include your API key in the header section of the script (API_KEY)\n" print " More info:" print " https://www.virustotal.com/en/faq/#virustotal-api\n" sys.exit(1) # Check input file if args.f == '': print "[E] Please provide an input file with '-f inputfile'\n" parser.print_help() sys.exit(1) if not os.path.exists(args.f): print "[E] Cannot find input file {0}".format(args.f) sys.exit(1) # Caches cache = {} # Trying to load cache from pickle dump if not args.nocache: cache, success = loadCache(args.c) if success: print "[+] {0} cache entries read from cache database: {1}".format(len(cache), args.c) else: print "[-] No cache database found" print "[+] Analyzed hashes will be written to cache database: {0}".format(args.c) print "[+] You can always interrupt the scan by pressing CTRL+C without losing the scan state" # Open input file try: with open(args.f, 'rU') as fh: lines = fh.readlines() except Exception, e: print "[E] Cannot read input file " sys.exit(1) # Result file # Result file if not args.nocsv: result_file = "check-results_{0}.csv".format(os.path.splitext(os.path.basename(args.f))[0]) if os.path.exists(result_file): print "[+] Found results CSV from previous run: {0}".format(result_file) print "[+] Appending results to file: {0}".format(result_file) else: print "[+] Writing results to new file: {0}".format(result_file) try: with open(result_file, 'w') as fh_results: fh_results.write("Lookup Hash;Rating;Comment;Positives;Virus;File Names;First Submitted;" "Last Submitted;MD5;SHA1;SHA256;ImpHash;Harmless;Signed;Revoked;Expired;" "{0}\n".format(";".join(VENDORS))) except Exception, e: print "[E] Cannot write export file {0}".format(result_file) # Process the input lines process_lines(lines, result_file, args.nocsv, args.dups, args.debug) # Write Cache print "\n[+] Saving {0} cache entries to file {1}".format(len(cache), args.c) saveCache(cache, args.c) print Style.RESET_ALL