package org.aksw.simba.quetsal.util;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.eclipse.rdf4j.query.QueryLanguage;
import org.eclipse.rdf4j.query.TupleQuery;
import org.eclipse.rdf4j.query.TupleQueryResult;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.sparql.SPARQLRepository;

public class SemagrowSummariesGenerator {
	static Logger log = LoggerFactory.getLogger(SemagrowSummariesGenerator.class);
	
	public BufferedWriter bw ;
	public double distinctSbj;
	public long trplCount ;
	
	/**
	 * initialize input information for data summaries generation
	 * @param location Directory location of the resulting FedSummaries file (i.e. location/FedSum.n3)
	 * @throws IOException IO Exceptions
	 */
	public SemagrowSummariesGenerator(String location) throws IOException 
	{
		bw = new BufferedWriter(new FileWriter(new File(location))); //--name/location where the summaries file will be stored
		bw.append("@prefix void: <http://rdfs.org/ns/void#> ."); bw.newLine();
		bw.append("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . "); bw.newLine();
		bw.append("@prefix dc: <http://purl.org/dc/elements/1.1/> ."); bw.newLine();
		bw.append("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ."); bw.newLine();
		bw.newLine();
		bw.append("_:DatasetRoot rdf:type void:Dataset .");
		bw.newLine();
	}
	
	public static void main(String[] args) throws IOException
	{
		//String host = "ws24348.avicomp.com";
		String host = "192.168.0.145";
		List<String> endpoints = Arrays.asList(
			 //"http://" + host + ":8890/sparql",
			 //"http://" + host + ":8891/sparql",
			 //"http://" + host + ":8892/sparql"
			 //, 
				"http://" + host + ":8893/sparql"
			 /*
			 , "http://" + host + ":8894/sparql",
			 "http://" + host + ":8895/sparql",
			 "http://" + host + ":8896/sparql",
			 "http://" + host + ":8897/sparql",
			 "http://" + host + ":8898/sparql"
			 
			 , "http://" + host + ":8887/sparql"
			 , "http://" + host + ":8888/sparql"
			 , "http://" + host + ":8889/sparql"
			 , "http://" + host + ":8899/sparql"
			 */
		);


		String outputFile = "summaries/semagrow_8893.ttl";
	//	String namedGraph = "http://aksw.org/fedbench/";  //can be null. in that case all graph will be considered 

		SemagrowSummariesGenerator generator = new SemagrowSummariesGenerator(outputFile);
		long startTime = System.currentTimeMillis();

		generator.generateSummaries(endpoints);
		//generator.generateDAWSummaries(endpoints, namedGraph, branchLimit, 0.5);
		log.info("Data Summaries Generation Time (min): "+ (double)(System.currentTimeMillis() - startTime) / (1000 * 60));
		log.info("Data Summaries are secessfully stored at "+ outputFile);
	}
	
	ExecutorService executorService;
	/**
	 * Build Quetzal data summaries for the given list of SPARQL endpoints
	 * @param endpoints List of SPARQL endpoints url
	 * @param graph Named graph. Can be null. In this case all named graphs will be considered for Quetzal summaries
	 * @param branchLimit Branching limit
	 * @throws IOException IO Error
	 */
	public void generateSummaries(List<String> endpoints) throws IOException
	{
		executorService = Executors.newFixedThreadPool(10);
		
		List<Future<?>> flist = new ArrayList<Future<?>>();
		AtomicInteger dsnum = new AtomicInteger(0);
		for (String endpoint : endpoints)
		{
			Future<?> future = executorService.submit(new Runnable() {
			    public void run() {
			    	String sum = generateSummary(endpoint, dsnum.incrementAndGet());
			    	synchronized(bw) {
			    		try {
			    			bw.append(sum);
			    			bw.flush();
			    		} catch (Exception e) {
			    			log.error("", e);
			    		}
			    	}
			    }
			});
			flist.add(future);
		}

		for (Future<?> f : flist) {
			try {
				f.get();
			} catch (Exception e) {
    			log.error("", e);
			}
		}
		executorService.shutdown();
		bw.close();
	}

	public String generateSummary(String endpoint, int dsnum)
	{
		StringBuilder sb = new StringBuilder();
		
		long totalTrpl = 0;
		List<String> lstPred = getPredicates(endpoint, null);
		log.info("total distinct predicates: "+ lstPred.size() + " for endpoint: " + endpoint);
			
		sb.append("_:Dataset").append(dsnum).append("\n");
		sb.append("  rdf:type void:Dataset ;").append("\n");
		sb.append("  void:sparqlEndpoint <" + endpoint + "> ;").append("\n");
		sb.append("  void:properties " + lstPred.size() + " ;").append("\n");
			
		for (int i = 0; i < lstPred.size(); i++)
		{
			log.info((i+1)+" in progress: " + lstPred.get(i) + ", endpoint: " + endpoint);
			sb.append("  void:propertyPartition [ ").append("\n");
			sb.append("    void:property  <" + lstPred.get(i) + "> ;").append("\n");
			
			long tripleCount = getTripleCount(lstPred.get(i), endpoint);
			sb.append("    void:triples " + tripleCount + " ;").append("\n");
			
			long distinctSbj = getDistinctSbj(lstPred.get(i), endpoint);
			sb.append("    void:distinctSubjects " + distinctSbj + " ;").append("\n");
			
			long distinctObj = getDistinctObj(lstPred.get(i), endpoint);
			sb.append("    void:distinctObjects " + distinctObj + " ] ;").append("\n");
			
			totalTrpl += tripleCount;
		}
		sb.append("  void:triples " + totalTrpl + " ;").append("\n");
			
		long totalSbj = getSubjectCount(endpoint);
		sb.append("  void:distinctSubjects " + totalSbj + " ;").append("\n");
		
		long totalObj = getObjectCount(endpoint);
		sb.append("  void:distinctObjects " + totalObj + " ;").append("\n");
		sb.append("  void:subset _:DatasetRoot .").append("\n").append("\n");

		return sb.toString();
	}
	
	/**
	 * Get total number of distinct objects for a predicate
	 * @param pred Predicate
	 * @param m model
	 * @return triples
	 */
	public static long getDistinctObj(String pred, String endpoint) {
		String strQuery = "SELECT  (COUNT(DISTINCT ?o) AS ?objs) " + // 
				"WHERE " +
				"{" +
	       		"?s <" + pred + "> ?o " +
	       		"} " ;
		SPARQLRepository repo = new SPARQLRepository(endpoint);
		repo.initialize();
		RepositoryConnection conn = repo.getConnection();
		try {
			TupleQuery query = conn.prepareTupleQuery(QueryLanguage.SPARQL, strQuery); 
			TupleQueryResult rs = query.evaluate();
			return Long.parseLong(rs.next().getValue("objs").stringValue());
		} finally {
			conn.close();
			repo.shutDown();
		}
	}
	
	/**
	 * Get total number of distinct objects for a predicate
	 * @param pred Predicate
	 * @param m model
	 * @return triples
	 */
	public static long getDistinctSbj(String pred, String endpoint) {
		String strQuery = "SELECT  (COUNT(DISTINCT ?s) AS ?subjs) " + // 
				"WHERE " +
				"{" +
	       		"?s <" + pred + "> ?o " +
	       		"} " ;
		SPARQLRepository repo = new SPARQLRepository(endpoint);
		repo.initialize();
		RepositoryConnection conn = repo.getConnection();
		try {
			TupleQuery query = conn.prepareTupleQuery(QueryLanguage.SPARQL, strQuery); 
			TupleQueryResult rs = query.evaluate();
			return Long.parseLong(rs.next().getValue("subjs").stringValue());
		} finally {
			conn.close();
			repo.shutDown();
		}
	}
	
	/**
	 * Get total number of distinct subjects of a dataset
	 * @return count 
	 */
	public static long getSubjectCount(String endpoint) {
		String strQuery = "SELECT  (COUNT(DISTINCT ?s) AS ?sbjts) " + // 
				"WHERE " +
				"{" +
	       		"?s ?p ?o " +
	       		"} " ;
		SPARQLRepository repo = new SPARQLRepository(endpoint);
		repo.initialize();
		RepositoryConnection conn = repo.getConnection();
		try {
			TupleQuery query = conn.prepareTupleQuery(QueryLanguage.SPARQL, strQuery); 
			TupleQueryResult rs = query.evaluate();
			return Long.parseLong(rs.next().getValue("sbjts").stringValue());
		} finally {
			conn.close();
			repo.shutDown();
		}
	}
	/**
	 * Get total number of distinct objects of a dataset
	 * @return count
	 */
	public static long getObjectCount(String endpoint) {
		String strQuery = "SELECT  (COUNT(DISTINCT ?o) AS ?objts) " + // 
				"WHERE " +
				"{" +
	       		"?s ?p ?o " +
	       		"} " ;
		SPARQLRepository repo = new SPARQLRepository(endpoint);
		repo.initialize();
		RepositoryConnection conn = repo.getConnection();
		try {
			TupleQuery query = conn.prepareTupleQuery(QueryLanguage.SPARQL, strQuery); 
			TupleQueryResult rs = query.evaluate();
			return Long.parseLong(rs.next().getValue("objts").stringValue());
		} finally {
			conn.close();
			repo.shutDown();
		}
	}
	/**
	 * Get total number of triple for a predicate
	 * @param pred Predicate
	 * @param m model
	 * @return triples
	 */
	public static Long getTripleCount(String pred, String endpoint) {
		String strQuery = "SELECT  (COUNT(?s) AS ?triples) " + // 
				"WHERE " +
				"{" +
	       		"?s <"+pred+"> ?o " +
	       		"} " ;
		SPARQLRepository repo = new SPARQLRepository(endpoint);
		repo.initialize();
		RepositoryConnection conn = repo.getConnection();
		try {
			TupleQuery query = conn.prepareTupleQuery(QueryLanguage.SPARQL, strQuery); 
			TupleQueryResult rs = query.evaluate();
			return Long.parseLong(rs.next().getValue("triples").stringValue());
		} finally {
			conn.close();
			repo.shutDown();
		}
	}
	
	///**
	// * Get a SPARQL query to retrieve all the subject authorities for a predicate
	// * Note: Due to a limit of 10000 results per query on a SPARQL endpoint, we are using Regular expressions in queries
	// * to get the required part in each qualifying triples rather than doing a local SPLIT operation on results
	// * @param predicate predicate
	// * @return query Required SPARQL query
	// */
	//public String getSbjAuthorityQuery(String predicate) {
	//	
	//	String query = "SELECT DISTINCT ?authPath From <http://deri.benchmark> \n"
	//				+ "WHERE \n"
	//				+ "{ \n "
	//				+ "   ?s <"+predicate+"> ?o. \n"
	//				+ "  BIND(STRBEFORE(str(?s),REPLACE(str(?s), \"^([^/]*/){3}\", \"\")) AS ?authPath) \n"
	//						+ "   Filter(isURI(?s)) \n"
	//						+ "}" ;
	//		return query;
	//}

	/**
	 *  Get a SPARQL query to retrieve all distinct subjects for retrieving all distinct subject authorities for a predicate
	 * Note: You need to increase the 1000 limit of results for SPARQL endpoints if the distinct subjects for a predicate is greater than that limit
	 * @param predicate Predicate
	 * @param graph Named graph
	 * @return query Required SPARQL query
	 */
	public String getSbjAuthorityQuery(String predicate, String graph) {
		StringBuilder sb = new StringBuilder();
		sb.append("SELECT DISTINCT ?s");
		if (null != graph) {
			sb.append(" FROM <");
			sb.append(graph);
			sb.append(">");
		}
		sb.append(" WHERE { ?s <");
		sb.append(predicate);
		sb.append("> ?o. FILTER (isURI(?s)) }");
		return sb.toString();
	}
	
	/**
	 * Get Predicate List
	 * @param endPointUrl SPARQL endPoint Url
	 * @param graph Named graph
	 * @return  predLst Predicates List
	 */
	private static List<String> getPredicates(String endPointUrl, String graph)
	{
		List<String>  predLst = new ArrayList<String>();
		String strQuery = getPredQuery(graph);
		SPARQLRepository repo = new SPARQLRepository(endPointUrl);
		repo.initialize();
		RepositoryConnection conn = repo.getConnection();
		try {
			TupleQuery query = conn.prepareTupleQuery(QueryLanguage.SPARQL, strQuery); 
			TupleQueryResult res = query.evaluate();
			while (res.hasNext()) 
			{
				String pred = res.next().getValue("p").toString();
				predLst.add(pred);	  		
			}
		} finally {
			conn.close();
			repo.shutDown();
		}
		return predLst;
	}

	//--------------------------------------------------------------------------
	/**
	 * Get SPARQL query to retrieve all predicates in a SAPRQL endpoint
	 * @param graph Named Graph
	 * @return query SPARQL query
	 */
	private static String getPredQuery(String graph) {
		StringBuilder sb = new StringBuilder();
		sb.append("SELECT DISTINCT ?p");
		if (null != graph) {
			sb.append(" FROM <");
			sb.append(graph);
			sb.append(">");
		}
		sb.append(" WHERE { ?s ?p ?o }");
		return sb.toString();
	}
}