/* 	CFGScanDroid - Control Flow Graph Scanning for Android
	Copyright (C) 2014  Douglas Gastonguay-Goddard

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details */

package org.talos.CFGScanDroid;

import java.util.ArrayList;
import java.util.List;
import java.util.HashSet;
import java.util.Set;
import java.util.Arrays;

import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.FileNotFoundException;

import java.nio.file.Files;
import java.nio.file.Path;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.ParameterException;

import org.jf.dexlib2.iface.ClassDef;
import org.jf.dexlib2.iface.instruction.Instruction;
import org.jf.dexlib2.iface.TryBlock;
import org.jf.dexlib2.iface.ExceptionHandler;
import org.jf.dexlib2.iface.Method;
import org.jf.dexlib2.iface.MethodImplementation;
import org.jf.dexlib2.dexbacked.DexBackedDexFile;
import org.jf.dexlib2.DexFileFactory;
import org.jf.util.ExceptionWithContext;

import com.google.common.collect.Ordering;

import cern.colt.matrix.impl.SparseDoubleMatrix2D;

import com.tinkerpop.blueprints.impls.tg.TinkerGraph;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.blueprints.Edge;

import java.util.Map;
import java.util.HashMap;

// define: incest
	// when a vertex has a sibling at the same depth 
	// and that sibling is also it's parent

public class CFGScanDroid {
	static long scannedSampleCount = 0;
	static long detectedSampleCount = 0;
	static long scannedFunctionCount = 0;
	static JCommanderArguments parsedArguments;
	static boolean useShortIdentifier;
	static List<Match> matches = new ArrayList<Match>();

	public static Graph buildGraph() {
		TinkerGraph graph = new TinkerGraph("/tmp/cfggraph", TinkerGraph.FileType.GRAPHML); 
		graph.createIndex("signatureName", Vertex.class);
		graph.createIndex("sha256", Vertex.class);
		graph.createIndex("md5", Vertex.class);
		graph.createIndex("type", Vertex.class);

		Map sigLookup = new HashMap<String, Vertex>();
		Map fileLookup = new HashMap<String, Vertex>();
		
		for(Match match : matches) {
			// check map for sig
			CFGSig matchSig = match.getSignature();
			String sigString = matchSig.getStringSignature();
			Vertex sigVertex = (Vertex)sigLookup.get(sigString);

			if(sigVertex == null) {
				// create vertex
				sigVertex = graph.addVertex(null);
				sigVertex.setProperty("type", "signature");
				sigVertex.setProperty("signature", sigString);
				sigVertex.setProperty("signatureName", matchSig.getName());
				// add sig to map
				sigLookup.put(sigString, sigVertex);
			}

			// check map for file
			String fileSHA256 = match.getFileSHA256();
			Vertex fileVertex = (Vertex)fileLookup.get(fileSHA256);

			if(fileVertex == null) {
				// create vertex
				fileVertex = graph.addVertex(null);
				sigVertex.setProperty("type", "file");
				fileVertex.setProperty("sha256", fileSHA256);
				fileVertex.setProperty("md5", match.getFileMD5());
				fileVertex.setProperty("fileNameList", new ArrayList<String>());
				// add file to map
				fileLookup.put(fileSHA256, fileVertex);
			}

			// what idiot would scan the same file multiple times with different names?
			List<String> fileNames = fileVertex.getProperty("fileNameList");
			if(!fileNames.contains(match.getFileName())) {
				fileNames.add(match.getFileName());
			}

			// TODO: comment this out and see if it still works
			fileVertex.setProperty("fileNameList", fileNames);

			// create edge(sig, file)
			Edge matchEdge = graph.addEdge(null, sigVertex, fileVertex, "matches");

			ControlFlowGraph cfg = match.getControlFlowGraph();
			matchEdge.setProperty("method", cfg.getIdentifier(false));
			// matchEdge.setProperty("fileBytes", cfg.getMethodBytesAsHexString());
		}

		return graph;
	}

	public static void main(String[] args) throws IOException {
		parsedArguments = new JCommanderArguments();
		JCommander argParser = new JCommander(parsedArguments);

		// parse arguments
		try {
			argParser.parse(args);
		} catch(ParameterException exception) {
			System.err.println(exception);
			System.err.println("PARSE ERROR: Bad parameter");
			System.out.print(parsedArguments.getUsage());
			System.exit(1);
		}

		// make sure a useful set of arguments are set
		validateArguments(argParser);

		// get files from directories, one level deep
		List<File> fileList = getFileList();
		fileList = Ordering.natural().sortedCopy(fileList);

		// dump sigs
		if(parsedArguments.dumpSignatures()) {
			for(File file : fileList) 
				dumpSigs(file);
		// scan
		} else {
			// load signatures
			List<CFGSig> signatures = null;
			for(String sigFile : parsedArguments.getSignatureFiles()) {
				if(signatures == null)
					signatures = parseSignatures(sigFile);
				else
					signatures.addAll(parseSignatures(sigFile));
			}

			// load raw signatures
			for(String sig : parsedArguments.getRawSignatures()) {
				if(signatures == null)
					signatures = new ArrayList<CFGSig>();

				CFGSig cfgSig = new CFGSig(sig);
				signatures.add(cfgSig);
			}

			// normalize
			if(parsedArguments.normalize()) {
				for(CFGSig cfgSig : signatures) {
					// System.out.println("NORMALIZING SIGNATURE: " + cfgSig.getName());
					// System.out.println(cfgSig.getVertexCount());
					// System.out.println(cfgSig.getEdgeCount());
					cfgSig.normalize();
					// System.out.println(cfgSig.getVertexCount());
					// System.out.println(cfgSig.getEdgeCount());
				}
			}

			// for each file, scan
			for(File file : fileList)  {
				++scannedSampleCount;
				boolean detected = scanDexFile(file, signatures);
				if(detected)
					++detectedSampleCount;
			}

			// print stats
			if(parsedArguments.printStatistics()) {
				System.out.println();
				System.out.println("Samples Scanned:\t" + scannedSampleCount);
				System.out.println("Functions Scanned:\t" + scannedFunctionCount);
				System.out.println("Samples Detected:\t" + detectedSampleCount);
				for(CFGSig signature : signatures) {
					System.out.println(signature.getName() + ": " + signature.getDetectionCount());
				}
			}

			if(parsedArguments.outputGraph()) {
				Graph graph = buildGraph();
				graph.shutdown();
			}
		}

		return;
	}

	// build file list
	public static List<File> getFileList() {
		List<File> fileList = new ArrayList<File>();
		for(String fileName : parsedArguments.getDexFiles()) {
			File file = new File(fileName);
			// does file exist?
			if(!file.exists()) {
				System.out.println("404 - File not found! Discarding: " + fileName);
				continue;
			}

			// if directory, get files inside
			if(file.isDirectory()) {
				File[] innerFileList = file.listFiles();
				if(file == null || innerFileList == null || innerFileList.length == 0)
					continue;

				for(File entry : file.listFiles()) {
					// if file in directory is a file, add it
					if(entry.exists() && entry.isFile() && !fileList.contains(entry)) {
						fileList.add(entry);
					}
				}
				continue;
			}

			// if it's a file, add it
			if(file.isFile()  && !fileList.contains(file)) {
				fileList.add(file);
			}
		}

		return fileList;
	}

	public static void validateArguments(JCommander argParser) {
		// truncate long/class/path.fn to path.fn
		useShortIdentifier = parsedArguments.shortIdentifier();

		// must have signature or dump sigs flag
		if(parsedArguments.getRawSignatures().size() < 1 && 
		   parsedArguments.getSignatureFiles().size() < 1 && 
		   !parsedArguments.dumpSignatures()) {
			System.err.println("PARSE ERROR: Must have one of (-s|-d|-r)!");
			System.out.print(parsedArguments.getUsage());
			System.exit(1);
		}

		if(parsedArguments.simpleMatch() && parsedArguments.subgraphIsomorphism()) {
			System.err.println("ERROR: Please specify only one of (-g|-i)");
			System.out.print(parsedArguments.getUsage());
			System.exit(1);
		}

		// files are important
		if(parsedArguments.getDexFiles().size() == 0) {
			System.err.println("YOU SHOULD PROBABLY INCLUDE SOME FILES TO SCAN! (-f)");
		}

		// simple match implies exact match
		if(parsedArguments.simpleMatch()) {
			parsedArguments.setExactMatch(true);
			parsedArguments.setPartialMatch(false);
		}

		if(parsedArguments.subgraphIsomorphism()){
			parsedArguments.setPartialMatch(true);
			parsedArguments.setNormalize(true);
		}

		// partial match unsets exact match
		if(parsedArguments.partialMatch())
			parsedArguments.setExactMatch(false);
	}

	public static void dumpSigs(File dexFileFile) throws IOException {
		// list file
		System.out.println("#DUMPING: " + dexFileFile.getPath());
		if(!dexFileFile.exists()) {
			System.err.println("Dexfile not found!");
			return;
		}

		DexBackedDexFile dexFile = null;

		// load dex file
		try {
			dexFile = DexFileFactory.loadDexFile(dexFileFile, 15);
		} catch(org.jf.util.ExceptionWithContext e) {
			System.err.println(e);
			return;
		} catch(java.io.FileNotFoundException e) {
			System.err.println("Cannot scan a directory: " + dexFileFile.getPath());
			return;
		} catch(Exception e) {
			System.err.println("Error loading file: " + dexFileFile.getPath());
			return;
		} 

		// skip odex, has instructions I don't support currently
		if(dexFile.isOdexFile()) {
			System.err.println("Odex not supported!");
			return;
		}

		List<? extends ClassDef> classDefs = Ordering.natural().sortedCopy(dexFile.getClasses());
		// for each method, generate sig
		for(final ClassDef classDef: classDefs) {
			for(Method method: classDef.getMethods()) {
				ControlFlowGraph cfg = new ControlFlowGraph(method);

				if(parsedArguments.normalize()) {
					cfg.normalize();
				}

				CFGSig sig = new CFGSig(cfg);
				if(sig.getVertexCount() > 1)
					System.out.println(sig.getStringSignature());
			}
		}
	}

	// scan dexfile
	public static boolean scanDexFile(File dexFileFile, List<CFGSig> signatures) throws IOException {
		boolean detected = false;

		// this check is redundant now
		if(!dexFileFile.exists()) {
			System.err.println("Dexfile not found!");
			return detected;
		}

		DexBackedDexFile dexFile = null;

		// load dex file
		try {
			dexFile = DexFileFactory.loadDexFile(dexFileFile, 15);
		} catch(org.jf.util.ExceptionWithContext e) {
			System.err.println(e);
			return detected;
		} catch(java.io.FileNotFoundException e) {
			System.err.println("Cannot scan a directory: " + dexFileFile.getPath());
			return detected;
		} catch(Exception e) {
			System.err.println("Error loading file: " + dexFileFile.getPath());
			return detected;
		} 

		if(dexFile.isOdexFile()) {
			System.err.println("Odex not supported!");
			return detected;
		}

		List<? extends ClassDef> classDefs = Ordering.natural().sortedCopy(dexFile.getClasses());

		for(final ClassDef classDef: classDefs) {
			// for each method
			for(Method method: classDef.getMethods()) {
				// build CFG
				ControlFlowGraph cfg = new ControlFlowGraph(method); //, tryBlocks);
				
				// This is incredibly slow as it is called on all methods -
				// It would be good to put it after the conditionals for scanning
				// but that would skew the edge and vertex counts 
				if(parsedArguments.normalize()) {
					cfg.normalize();
				}

				// System.out.println(cfg.getIdentifier(false));

				++scannedFunctionCount;

				// for each signature, scan method
				for(CFGSig signature : signatures) {
					// exactMatch condition
					boolean exactCondition = signature.getEdgeCount() == cfg.getEdgeCount();
					exactCondition = exactCondition && (signature.getVertexCount() == cfg.getVertexCount());
					// partial match condition
					boolean partialCondition = signature.getEdgeCount() <= cfg.getEdgeCount();
					partialCondition = partialCondition && (signature.getVertexCount() <= cfg.getVertexCount());					
					// if a condition is met
					if((parsedArguments.exactMatch() && exactCondition) || 
					   (parsedArguments.partialMatch() && partialCondition)) {
						// if matched
					   	if((!parsedArguments.subgraphIsomorphism() && (parsedArguments.simpleMatch() || 
					   	   ScanningAlgorithm.scanMethod(signature.getAdjacencyMatrix(), cfg.getAdjacencyMatrix()))) || 
					   		(parsedArguments.subgraphIsomorphism() && 
					   		ScanningAlgorithm.scanMethodSubgraph(signature.getAdjacencyMatrix(), cfg.getAdjacencyMatrix()))) {
					   	   // scanMethod(signature.getAdjacencyMatrix(), cfg.getAdjacencyMatrix())) {
					   	   	boolean wasPreviouslyDetected = detected;
						   	detected = true;
						   	// alert unless suppressed
					   		Match match;
					   		if(!wasPreviouslyDetected) {
					   			match = new Match(dexFileFile, signature, cfg);
					   			if(parsedArguments.printMatched() && !parsedArguments.printJSON())
					   				System.out.println("FILE: " + dexFileFile.getPath());
					   		} else {
					   			// this skips rehashing the file (md5 / sha256)
					   			match = new Match(matches.get(matches.size()-1), signature, cfg);
					   		}

				   			matches.add(match);
					   		
					   		if(parsedArguments.printMatched() && parsedArguments.printJSON()) {
								System.out.println(match.toJSONString());
							} else if(parsedArguments.printMatched()) {
								System.out.println("\t" + signature.getName() + " MATCH FOUND: ");
								System.out.println("\t\t" + cfg.getIdentifier(useShortIdentifier));
							}

							signature.detected();
							// System.out.print("SIG - ");
							// System.out.println(signature.adjacencyMatrix);
							// System.out.print("FNC - ");
							// System.out.println(cfg.adjacencyMatrix);
							// System.out.println();
							if(parsedArguments.oneMatch())
								break;
						}
					}
				}
				//System.out.println();
			}
		}

		// print unmatched unless suppressed
		if(!detected && parsedArguments.printUnmatched() && !parsedArguments.printJSON()) {
			System.out.println("FILE: " + dexFileFile.getPath());
		}

		return detected;
	}

	public static ArrayList<CFGSig> parseSignatures(String signatureFileName) {
		ArrayList<CFGSig> signatures = new ArrayList<CFGSig>();
		
		try{ 
			BufferedReader br = new BufferedReader(new FileReader(signatureFileName));
			String sig = null;
			// read signatures
			while((sig = br.readLine()) != null) {
				int i = sig.indexOf(';');
				if(i == -1) {
					System.err.println("Broken sig!");
					System.exit(1);
				}

				if(sig != null && sig.startsWith("#"))
					continue;

				CFGSig cfgSig = new CFGSig(sig);
				signatures.add(cfgSig);
			}
			
			br.close();
		} catch (IOException e) {
			e.printStackTrace();
		}

		if(signatures.size() == 0) {
			System.err.println("No signatures loaded from file " + signatureFileName);
		}

		return signatures;
	}

	
	
}