package sequencemining.util;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.input.ReversedLinesFileReader;

import sequencemining.main.SequenceMining;
import sequencemining.main.SequenceMiningCore;
import sequencemining.sequence.Sequence;
import sequencemining.transaction.TransactionList;

/**
 * Read last EM step of partial sequence log and output interesting sequences
 * along with interestingness and probability and write to end of log file.
 */
public class PartialLogFixer {

	public static void main(final String[] args) throws IOException {
		if (args.length != 2) {
			System.err.println("Usage <transactionDB> <logFile>");
			System.exit(-1);
		}

		System.out.println("Reading sequences from last parameter EM step for " + args[1] + "...");
		final HashMap<Sequence, Double> itemsets = readLastEMStepSequences(new File(args[1]));
		System.out.println("done. Number of sequences: " + itemsets.size());

		System.out.println("\nWriting sorted sequences to " + args[1] + "...");
		sortSequencesInterestingness(itemsets, new File(args[0]), new File(args[1]));
		System.out.println("All done. Exiting.");

	}

	public static HashMap<Sequence, Double> readLastEMStepSequences(final File logFile) throws IOException {
		final HashMap<Sequence, Double> sequences = new HashMap<>();

		final ReversedLinesFileReader reader = new ReversedLinesFileReader(logFile);
		String line = reader.readLine();
		while (line != null) {

			if (line.contains("Parameter Optimal Sequences:")) {
				final Matcher m = Pattern
						.compile(
								"\\[((?:[0-9]|,| )+?)\\]=\\(((?:(?:[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)|,)+?)\\)")
						.matcher(line);
				while (m.find()) {
					final Sequence sequence = new Sequence();
					final String[] items = m.group(1).split(", ");
					for (final String item : items)
						sequence.add(Integer.parseInt(item));
					final double prob = 1 - Double.parseDouble(m.group(2).split(",")[0]);
					sequences.put(sequence, prob);
				}
				break;
			}
			line = reader.readLine();

		}
		reader.close();

		return sequences;
	}

	public static void sortSequencesInterestingness(final HashMap<Sequence, Double> sequences, final File transactionDB,
			final File logFile) throws IOException {

		// Read in transaction database
		final TransactionList transactions = SequenceMining.readTransactions(transactionDB);

		// Sort sequences by interestingness
		System.out.println("Sorting sequences by interestingness...");
		final HashMap<Sequence, Double> intMap = SequenceMiningCore.calculateInterestingness(sequences, transactions);
		final Map<Sequence, Double> sortedSequences = SequenceMiningCore.sortSequences(sequences, intMap);

		System.out.println("Writing out to file...");
		final FileWriter out = new FileWriter(logFile, true);
		out.write("\n============= INTERESTING SEQUENCES =============\n");
		for (final Entry<Sequence, Double> entry : sortedSequences.entrySet()) {
			out.write(String.format("%s\tprob: %1.5f \tint: %1.5f %n", entry.getKey(), entry.getValue(),
					intMap.get(entry.getKey())));
		}
		out.write("\n");
		out.close();
		System.out.println("done.");

	}

}