package org.eclipse.scava.nlp.tools.preprocessor.fileparser;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.regex.Pattern;

import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ToXMLContentHandler;
import org.eclipse.scava.nlp.tools.preprocessor.markdown.MarkdownParser;
import org.eclipse.scava.platform.logging.OssmeterLogger;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class FileParser
{
	
	private static OssmeterLogger logger;
	private static AutoDetectParser parser;
	private static Detector detector;
	private static HashMap<String,String> supportedFiles = new HashMap<String,String>();
	private static HashMap<String,String> contentHandlerType = new HashMap<String,String>();
	private static Pattern mediaTypePattern;
	
	static
	{
		logger = (OssmeterLogger) OssmeterLogger.getLogger("nlp.tools.preprocessor.fileparser");
		FileParserSingleton singleton = FileParserSingleton.getInstance();
		parser = singleton.getParser();
		supportedFiles = singleton.getSupportedFiles();
		contentHandlerType = singleton.getContentHandlerType();
		detector = parser.getDetector();
		mediaTypePattern= Pattern.compile(";.+$");
	}
	
	public static HashMap<String, String> getSupportedFiles()
	{
		return supportedFiles;
	}
	
	public static boolean isSupported(File file) throws FileNotFoundException, IOException
	{
		FileInputStream fis = fileToInputStream(file);
		BufferedInputStream bif = new BufferedInputStream(fis);
		Metadata metadata = new Metadata();
		metadata.add(Metadata.RESOURCE_NAME_KEY, file.getName());
		boolean supported = isSupported(bif, metadata);
		bif.close();
		fis.close();
		return supported;
		
	}
	
	private static boolean isSupported(BufferedInputStream stream, Metadata metadata) throws IOException
	{
		return isSupported(detectMediaType(stream, metadata));
	}
	
	private static boolean isSupported(MediaType mediaType)
	{
		if(mediaType==null)
			return false;
		if(supportedFiles.containsKey(mediaTypeString(mediaType)))
			return true;
		return false;
	}
	
	private static MediaType detectMediaType(BufferedInputStream stream, Metadata metadata) throws IOException
	{
		try {
			
			MediaType mediaType =detector.detect(stream, metadata);
			//Tika seems to have problem detecting correctly Open Office files
			if(mediaTypeString(mediaType).equals("application/zip"))
			{
				if(metadata.get(Metadata.RESOURCE_NAME_KEY).endsWith("odt"))
					mediaType = new MediaType("application","vnd.oasis.opendocument.text");
				else if(metadata.get(Metadata.RESOURCE_NAME_KEY).endsWith("ods"))
					mediaType = new MediaType("application","vnd.oasis.opendocument.spreadsheet");
				else if(metadata.get(Metadata.RESOURCE_NAME_KEY).endsWith("odp"))
					mediaType = new MediaType("application","vnd.oasis.opendocument.presentation");
			}
			return mediaType;
		}
		catch (IOException e) {
			logger.error("Error while detecting the media type:", e);
			e.printStackTrace();
			throw e;
		}
	}
	
	private static String mediaTypeString (MediaType mediaType)
	{
		return mediaTypePattern.matcher(mediaType.toString()).replaceAll("");
	}
	

	private static FileInputStream fileToInputStream(File file) throws FileNotFoundException
	{
		try {
			FileInputStream fis = new FileInputStream(file);
			return fis;
		}
		catch (FileNotFoundException e) {
			logger.error("File not found:", e);
			throw e;
		}
	}
	
	/**
	 * 
	 * @param file
	 * @return Null if the file is not supported
	 * @throws Exception 
	 */
	public static FileContent extractText(File file) throws Exception
	{
		FileInputStream fis = fileToInputStream(file);
		BufferedInputStream bif = new BufferedInputStream(fis);
		Metadata metadata = new Metadata();
		metadata.add(Metadata.RESOURCE_NAME_KEY, file.getName());
		FileContent fileContent = extractText(bif, metadata);
		bif.close();
		fis.close();
		return fileContent;
	}
	
	/**
	 * 
	 * @param bufferedStream
	 * @return
	 * @throws Exception 
	 */
	private static FileContent extractText(BufferedInputStream bufferedStream, Metadata metadata) throws Exception
	{
		MediaType mediaType;
		ContentHandler handler=null;
		String handlerType;
	    try {
	    	mediaType=detectMediaType(bufferedStream, metadata);
	    	if(isSupported(mediaType))
	    	{
	    		String mediaTypeString=mediaTypeString(mediaType);
	    		handlerType=contentHandlerType.get(mediaTypeString);
	    		switch (handlerType)
	    		{
	    			case "HTML":
	    				handler = new ToXMLContentHandler();
	    				break;
	    			case "MARKDOWN":
	    			case "PLAIN":
	    				handler = new BodyContentHandler(-1);
	    				break;
	    		}
	    		if(handler!=null)
	    		{
		    		parser.parse(bufferedStream, handler, metadata);
		    		FileContent fileContent=null;
		    		String formatName = supportedFiles.get(mediaTypeString);
		    		switch (handlerType)
		    		{
		    			case "HTML":
		    				fileContent= new FileContent(handler.toString(),mediaTypeString,formatName, true);
		    				break;
		    			case "MARKDOWN":
		    				fileContent= new FileContent(MarkdownParser.parse(handler.toString()),mediaTypeString,formatName, true);
		    				break;
		    			case "PLAIN":
		    				fileContent= new FileContent(handler.toString(),mediaTypeString,formatName, false);
		    				break;
		    		}
	    			return fileContent;
	    		}
	    		throw new UnsupportedOperationException("Impossible to determine how to handle the file: ");
	    	}
			throw new UnsupportedOperationException("File is not supported: ");
		}
	    catch (UnsupportedOperationException e)
	    {
	    	throw e;
	    }
	    catch (ZeroByteFileException e)
	    {
	    	throw new UnsupportedOperationException("File is empty: ");
	    }
	    catch (IOException | SAXException | TikaException e) {
			logger.error("Error while parsing the file into text: ", e);
			e.printStackTrace();
			throw e;
		}
	    
	}
}