#!/usr/bin/python

## Binary Analysis Tool
## Copyright 2012-2015 Armijn Hemel for Tjaldur Software Governance Solutions
## Licensed under Apache 2.0, see LICENSE file for details

'''
This is a plugin for the Binary Analysis Tool. Its purpose is to determine the
package a file belongs to based on the name of a package. This information is
mined from distributions like Fedora and Debian.
'''

import os, os.path, sys, subprocess, copy, Queue, cPickle
import multiprocessing
from multiprocessing import Process, Lock
from multiprocessing.sharedctypes import Value, Array

def grabpackage(scanqueue, reportqueue, cursor, query):
	## select the packages that are available. It would be better to also have the directory
	## name available, so we should get rid of 'path' and use something else that is better
	## suited
	while True:
		filename = scanqueue.get(timeout=2592000)
		cursor.execute(query, (os.path.basename(filename),))
		res = cursor.fetchall()
		if res != []:
			returnres = []
			## TODO: filter results, only return files that are not in tons of packages
			for r in res:
				(package, packageversion, distribution, distroversion) = r
				distrores = {}
				distrores['package'] = package
				distrores['packageversion'] = packageversion
				distrores['distribution'] = distribution
				distrores['distributionversion'] = distroversion
				returnres.append(distrores)
			reportqueue.put({filename: returnres})
		scanqueue.task_done()

def filename2package(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
	processtasks = []
	for i in unpackreports:
		if not 'checksum' in unpackreports[i]:
			continue
		processtasks.append(i)

	if processors == None:
		processamount = 1
	else:
		processamount = processors
	## create a queue for tasks, with a few threads reading from the queue
	## and looking up results and putting them in a result queue
	query = "select distinct package, packageversion, source, distroversion from file where filename = %s"
	scanmanager = multiprocessing.Manager()
	scanqueue = multiprocessing.JoinableQueue(maxsize=0)
	reportqueue = scanmanager.Queue(maxsize=0)
	processpool = []

	map(lambda x: scanqueue.put(x), processtasks)
	minprocessamount = min(len(processtasks), processamount)
	res = []

	for i in range(0,minprocessamount):
		p = multiprocessing.Process(target=grabpackage, args=(scanqueue,reportqueue,batcursors[i],query))
		processpool.append(p)
		p.start()

	scanqueue.join()

	while True:
		try:
			val = reportqueue.get_nowait()
			res.append(val)
			reportqueue.task_done()
		except Queue.Empty, e:
			## Queue is empty
			break
	reportqueue.join()

	for p in processpool:
		p.terminate()

	for r in res:
		filename = r.keys()[0]
		filehash = unpackreports[filename]['checksum']

                ## read pickle file
		leaf_file = open(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash), 'rb')
		leafreports = cPickle.load(leaf_file)
		leaf_file.close()

		## write pickle file
		leafreports['file2package'] = r[filename]
		leafreports['tags'].append('file2package')
		unpackreports[filename]['tags'].append('file2package')
		leaf_file = open(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash), 'wb')
		cPickle.dump(leafreports, leaf_file)
		leaf_file.close()

	returnres = res

def file2packagesetup(scanenv, cursor, conn, debug=False):
	if cursor == None:
		return (False, {})
	cursor.execute("select table_name from information_schema.tables where table_type='BASE TABLE' and table_schema='public'")
	tablenames = map(lambda x: x[0], cursor.fetchall())
	conn.commit()
	if not 'file' in tablenames:
		return (False, {})
	return (True, scanenv)