/*
 *  DocTimeReporter.java
 *
 *  Copyright (c) 2008-2009, Intelius, Inc.
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Chirag Viradiya & Andrew Borthwick, 30/Sep/2009
 *
 *  $Id$
 */
package gate.util.reporting;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.StringTokenizer;
import java.util.Timer;
import java.util.TimerTask;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;

import gate.util.reporting.exceptions.BenchmarkReportExecutionException;
import gate.util.reporting.exceptions.BenchmarkReportFileAccessException;
import gate.util.reporting.exceptions.BenchmarkReportInputFileFormatException;
import gnu.getopt.Getopt;

/**
 * A reporter class to generate a report on time taken by each document within
 * given corpus.
 */
public class DocTimeReporter implements BenchmarkReportable {

  /** A File handle to input benchmark file. */
  private File benchmarkFile = new File("benchmark.txt");
  /** Report media. */
  private String printMedia = MEDIA_HTML;
  /** No of documents to be displayed against matching PRs. */
  private int maxDocumentInReport = 10;
  /** Search string, could be a PR name. */
  private String PRMatchingRegex = MATCH_ALL_PR_REGEX;
  /** A marker indicating the start of current logical run. */
  private String logicalStart = null;
  /** Path where to save the report file. */
  private File reportFile;

  /**
   * An HashSet containing names of the documents matching the given search
   * string.
   */
  private HashSet<String> allDocs = new HashSet<String>();
  /**
   * An HashSet containing PR names matching the search string. Used to display
   * in report header.
   */
  private HashSet<String> matchingPRs = new HashSet<String>();
  /** Total time taken by the given pipeline for the current logical run. */
  private float globalTotal = 0;
  /** A LinkedHashMap containing the documents matching the given PRs. */
  private LinkedHashMap<String, String> docContainer = new LinkedHashMap<String, String>();
  /**
   * Folder where the benchmark.txt files are created for specific pipeline log
   * entries.
   */
  private File temporaryDirectory;
  /** Name of the given pipeline */
  private String pipelineName = "";
  /** Status flag for normal exit. */
  private static final int STATUS_NORMAL = 0;
  /** Status flag for error exit. */
  private static final int STATUS_ERROR = 1;
  /** Chunk size in which file will be read */
  private static final int FILE_CHUNK_SIZE = 2000;
  /** An OS independent line separator */
  private static final String NL = System.getProperty("line.separator");
  /**
   * An integer containing the count of total valid log entries present in input
   * file provided.
   */
  public int validEntries = 0;

  /**
   * This string constant when set as print media indicates that the report is
   * printed in TEXT format.
   */
  public static final String MEDIA_TEXT = "text";
  /**
   * This string constant when set as print media indicates that the report is
   * printed in HTML format.
   */
  public static final String MEDIA_HTML = "html";

  /**
   * This integer constant when set as No of Docs indicates that the report have
   * all the documents matching a given PR.
   */
  public static final int ALL_DOCS = -1;

  /**
   * The default value for search string matching PRs for given run.
   */
  public static final String MATCH_ALL_PR_REGEX = "all_prs";

  /**
   * No argument constructor.
   */
  public DocTimeReporter() {
    // some initialisations
    initTmpDir();
  }

  /**
   * A constructor to be used while executing the tool from the command line.
   *
   * @param args array containing command line arguments.
   */
  DocTimeReporter(String[] args) {
      initTmpDir();
      parseArguments(args);
  }

  private void initTmpDir() {
     try {
      temporaryDirectory = File.createTempFile("benchmark-reports", "", null);
      if (!temporaryDirectory.delete()
       || !temporaryDirectory.mkdir()) {
        throw new IOException("Unable to create temporary directory.\n"
          + temporaryDirectory.getCanonicalPath());
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }


  /**
   * Calculates the total of the time taken by processing element at each leaf
   * level. Also calculates the difference between the actual time taken by the
   * resources and system noted time.
   *
   * @param reportContainer
   *          An Object of type LinkedHashMap<String, Object> containing the
   *          processing elements (with time in milliseconds) in hierarchical
   *          structure.
   * @return An Object containing modified hierarchical structure of processing
   *         elements with totals and All others embedded in it.
   */
  @SuppressWarnings("unchecked")
  @Override
  public Object calculate(Object reportContainer) {
    return sortHashMapByValues(
      doTotal((LinkedHashMap<String, Object>) reportContainer));
  }

  /**
   * Sorts LinkedHashMap by its values(natural descending order). keeps the
   * duplicates as it is.
   *
   * @param passedMap
   *          An Object of type LinkedHashMap to be sorted by its values.
   * @return An Object containing the sorted LinkedHashMap.
   */
  private LinkedHashMap<?,?> sortHashMapByValues(LinkedHashMap<String,String> passedMap) {
    List<String> mapKeys = new ArrayList<String>(passedMap.keySet());
    List<String> mapValues = new ArrayList<String>(passedMap.values());

    Collections.sort(mapValues, new ValueComparator());
    Collections.sort(mapKeys);
    // Reversing the collection to sort the values in descending order
    Collections.reverse(mapValues);
    LinkedHashMap<String,String> sortedMap = new LinkedHashMap<String,String>();

    Iterator<String> valueIt = mapValues.iterator();
    while (valueIt.hasNext()) {
      String val = valueIt.next();
      Iterator<String> keyIt = mapKeys.iterator();
      while (keyIt.hasNext()) {
        String key = keyIt.next();
        String comp1 = passedMap.get(key).toString();
        String comp2 = val.toString();

        if (comp1.equals(comp2)) {
          passedMap.remove(key);
          mapKeys.remove(key);
          sortedMap.put(key, val);
          break;
        }
      }
    }
    return sortedMap;
  }

  /**
   * Computes the sub totals at each processing level.
   *
   * @param reportContainer
   *          An Object of type LinkedHashMap<String, Object> containing the
   *          processing elements (with time in milliseconds) in hierarchical
   *          structure.
   * @return An Object containing the LinkedHashMap with the element values
   *         totaled.
   */
  @SuppressWarnings("unchecked")
  private LinkedHashMap<String, String> doTotal(
    LinkedHashMap<String, Object> reportContainer) {
    LinkedHashMap<String, Object> myHash =
      reportContainer;
    Iterator<String> i = myHash.keySet().iterator();
    while (i.hasNext()) {
      String key = i.next();
      if (myHash.get(key) instanceof LinkedHashMap) {
        docContainer = doTotal((LinkedHashMap<String, Object>) (myHash
            .get(key)));
      } else {
        if (docContainer.get(key) == null) {
          docContainer.put(key, (String)myHash.get(key));
        } else {
          // Do total if value already exists
          int val = Integer.parseInt(docContainer.get(key))
              + Integer.parseInt((String) myHash.get(key));
          docContainer.put(key, Integer.toString(val));
        }
      }
    }
    return docContainer;
  }

  /**
   * Prints a report as per the value provided for print media option.
   *
   * @param reportSource
   *          An Object of type LinkedHashMap<String, Object> containing the
   *          processing elements (with time in milliseconds) in hierarchical
   *          structure.
   * @param outputFile
   *          Path where to save the report.
   */
  @SuppressWarnings("unchecked")
  @Override
  public void printReport(Object reportSource, File outputFile) {
    if (printMedia.equalsIgnoreCase(MEDIA_TEXT)) {
      printToText(reportSource, outputFile);
    } else if (printMedia.equalsIgnoreCase(MEDIA_HTML)) {
      printToHTML((LinkedHashMap<String, Object>) reportSource, outputFile);
    }
  }

  /**
   * Prints benchmark report in text format.
   *
   * @param reportContainer
   *          An Object of type LinkedHashMap<String, Object> containing the
   *          document names (with time in milliseconds) in hierarchical
   *          structure.
   * @param outputFile
   *          An object of type File representing the output report file.
   */
  private void printToText(Object reportContainer, File outputFile) {
    ArrayList<String> printLines = new ArrayList<String>();
    @SuppressWarnings("unchecked")
    LinkedHashMap<String, Object> rcHash =
      (LinkedHashMap<String, Object>) reportContainer;
    String docs = "";
    if (maxDocumentInReport != ALL_DOCS) {
      if (allDocs.size() < maxDocumentInReport) {
        docs = Integer.toString(allDocs.size());
      } else {
        docs = Integer.toString(maxDocumentInReport);
      }

    } else {
      docs = "All";
    }
    printLines
        .add("============================================================="
            + NL);
    if (PRMatchingRegex.equals(MATCH_ALL_PR_REGEX)) {
      printLines.add("Top " + docs
          + " expensive documents matching All PRs in " + pipelineName
          + NL);
    } else {
      if (matchingPRs.size() > 0) {
        printLines.add("Top " + docs
            + " expensive documents matching following PRs in " + pipelineName
            + NL);
        for (String pr : matchingPRs) {
          printLines.add("\t" + pr + NL);
        }
      } else {
        printLines.add("No PRs matched to search string \""
            + getPRMatchingRegex() + "\"" + " in " + pipelineName);
        printLines.add(NL);
        printLines
            .add("============================================================="
                + NL);
      }

    }
    if (allDocs.size() > 0) {
      printLines
          .add("============================================================="
              + NL);
      printLines.add("Document Name" + "\t" + "Time (in seconds)" + "\t" + "%"
          + NL);
      printLines
          .add("-------------------------------------------------------------"
              + NL);
    }
    Iterator<String> i = rcHash.keySet().iterator();
    int count = 0;
    // Iterating over the report container
    while (i.hasNext()) {
      Object key = i.next();
      if (!((String) key).equals("total")) {
        int value = Integer.parseInt((String) rcHash.get(key));
        if (maxDocumentInReport == ALL_DOCS)
          printLines.add(key + "\t" + value / 1000.0 + "\t"
              + Math.round(((value / globalTotal) * 100) * 10) / 10.0
              + NL);
        else if (count < maxDocumentInReport)
          printLines.add(key + "\t" + value / 1000.0 + "\t"
              + Math.round(((value / globalTotal) * 100) * 10) / 10.0
              + NL);
      }
      count++;
    }
    if (allDocs.size() > 0) {
      printLines
          .add("-------------------------------------------------------------"
              + NL);
      printLines.add("Pipeline Total" + "\t" + globalTotal / 1000.0 + "\t"
          + 100 + NL + NL + NL);
    }
    BufferedWriter out = null;
    try {
      // Writing to report file
      out = new BufferedWriter(new FileWriter(outputFile, true));
      for (String line : printLines) {
        out.write(line);
      }

    } catch (IOException e) {
      e.printStackTrace();

    } finally {
      try {
        if (out != null) { out.close(); }
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

  /**
   * Stores GATE processing elements and the time taken by them in an in-memory
   * data structure for report generation.
   *
   * @param inputFile
   *          A handle to the input benchmark file.
   *
   * @return An Object of type LinkedHashMap<String, Object> containing the
   *         processing elements (with time in milliseconds) in hierarchical
   *         structure. Null if there was an error.
   *
   * @throws BenchmarkReportInputFileFormatException
   *           if the input file provided is not a valid benchmark file.
   */
  @Override
  public Object store(File inputFile)
      throws BenchmarkReportInputFileFormatException {
    String[] temp = inputFile.getAbsolutePath().split("\\" + File.separator);
    pipelineName = temp[temp.length - 1].replace("_benchmark.txt", "");
    LinkedHashMap<String, Object> globalStore =
      new LinkedHashMap<String, Object>();
    BufferedReader in = null;
    try {
      in = new BufferedReader(new FileReader(inputFile));
      String str;
      String docName = null;
      String matchedPR = null;
      String startToken = null;
      // Reading the benchmark.txt one line at a time
      Pattern pattern = Pattern.compile("(\\d+) (\\d+) (.*) (.*) \\{(.*)\\}");
      // Pattern matching for extracting document name
      Pattern patternDocName = Pattern.compile(".*documentName=(.*?)[,|}].*");
      while ((str = in.readLine()) != null) {
        if (str.matches(".*START.*")) {
          String[] splittedStartEntry = str.split("\\s");
          if (splittedStartEntry.length > 2) {
            startToken = splittedStartEntry[2];
          } else {
            throw new BenchmarkReportInputFileFormatException(
                getBenchmarkFile() + " is invalid.");
          }
        }
        Matcher matcher = pattern.matcher(str);
        Matcher matcherDocName = patternDocName.matcher(str);
        Pattern patternDocEnd = Pattern.compile("(\\d+) (\\d+) " + Pattern.quote(startToken)
            + " (.*) \\{(.*)\\}.*");
        Matcher matcherDocEnd = patternDocEnd.matcher(str);
        if (matcherDocName != null) {
          if (matcherDocName.matches()) {
            docName = matcherDocName.group(1);

          }
        }
        if (matcherDocEnd != null) {
          if (matcherDocEnd.matches()) {

            globalTotal = globalTotal
                + Integer.parseInt(matcherDocEnd.group(2));
          }
        }
        if (matcher != null && matcher.matches()) {
          String benchmarkIDs = matcher.group(3).replaceFirst(Pattern.quote(startToken) + ".",
              "").replaceFirst("doc_" + Pattern.quote(docName) + ".", "");
          String[] splittedBenchmarkIDs = benchmarkIDs.split("\\.");
          // Getting the exact PR name and storing only entries matching PR name
          if (PRMatchingRegex.equals(MATCH_ALL_PR_REGEX)) {
            if (splittedBenchmarkIDs.length > 0) {
              matchedPR = splittedBenchmarkIDs[0];
            }
            if (!matchedPR.equalsIgnoreCase(startToken)) {
              organizeEntries(globalStore, matchedPR, matcher.group(2), docName);
            }
          } else if (isPRMatched(benchmarkIDs, PRMatchingRegex)) {
            if (splittedBenchmarkIDs.length > 0) {
              matchedPR = splittedBenchmarkIDs[0];
            }
            if (matchedPR != null)
              matchingPRs.add(matchedPR);
            organizeEntries(globalStore, matchedPR, matcher.group(2), docName);
          }
        }
      }

    } catch (IOException e) {
      e.printStackTrace();
      globalStore = null;

    } finally {
      try {
        if (in != null) { in.close(); }
      } catch (IOException e) {
        e.printStackTrace();
        globalStore = null;
      }
    }
    return globalStore;
  }

  /**
   * Organizes the valid data extracted from the log entries into LinkedHashMap.
   *
   * @param store
   *          A global LinkedHashMap containing the processing elements (with
   *          time in milliseconds) in hierarchical structure.
   * @param matchedPR
   *          A PR matching the given search string.
   * @param bTime
   *          Time taken by the specific processing element.
   * @param docName
   *          Name of the document being processed.
   */
  @SuppressWarnings("unchecked")
  private void organizeEntries(LinkedHashMap<String, Object> store,
                               String matchedPR, String bTime, String docName) {
    allDocs.add(docName);
    if (store.containsKey(matchedPR)) {
      ((LinkedHashMap<String, Object>) store.get(matchedPR))
          .put(docName, bTime);
    } else {
      LinkedHashMap<String, Object> tempLHM = new LinkedHashMap<String, Object>();
      tempLHM.put(docName, bTime);
      store.put(matchedPR, tempLHM);
    }
  }

  /**
   * Prints the document level statistics report in HTML format.
   *
   * @param reportSource
   *          An Object of type LinkedHashMap<String, Object> containing the
   *          document names (with time in milliseconds).
   * @param outputFile
   *          An object of type File representing the output report file to
   *          which the HTML report is to be written.
   */
  private void printToHTML(LinkedHashMap<String, Object> reportSource,
                           File outputFile) {
    String htmlReport =
      "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"" + NL +
      "\"http://www.w3.org/TR/html4/loose.dtd\">" + NL +
      "<html><head><title>Benchmarking Report</title>" + NL +
      "<meta http-equiv=\"Content-Type\"" +
      " content=\"text/html; charset=utf-8\">" + NL +
      "<style type=\"text/css\">" + NL +
      "div { font-size:12px; margin-top: 4; }" + NL +
      "</style>" + NL +
      "</head>" + NL +
      "<body style=\"font-family:Verdana; color:navy;\">" + NL;
    String hTrace =
      "<div style=\"right: 0pt; border-top:1px solid #C9D7F1;" +
        " font-size:1px;\" ></div>" + NL;
    String reportTitle = hTrace;
    String docs = "";
    if (maxDocumentInReport != ALL_DOCS) {
      if (allDocs.size() < maxDocumentInReport) {
        docs = Integer.toString(allDocs.size());
      } else {
        docs = Integer.toString(maxDocumentInReport);
      }
    } else {
      docs = "All";
    }
    if (PRMatchingRegex.equals(MATCH_ALL_PR_REGEX)) {
      reportTitle = reportTitle
        + "<div style=\"font-size:15px;font-family:Verdana; color:navy;\">Top "
        + docs + " expensive documents matching All PRs in <b>"
        + pipelineName + "</b></div>" + NL;
    } else {
      if (matchingPRs.size() > 0) {
        reportTitle = reportTitle
          + "<div style=\"font-size:15px;font-family:Verdana; color:navy;\">Top "
          + docs + " expensive documents matching following PRs in <b>"
          + pipelineName + "</b> <ul>" + NL;
        for (String pr : matchingPRs) {
          reportTitle = reportTitle + "<li>" + pr + "</li>";
        }
        reportTitle = reportTitle + "</ul></div>";
      } else {
        reportTitle +=
          "<div style=\"font-size:15px;font-family:Verdana; color:navy;\">" +
          "No PRs matched to search string \"" +
          getPRMatchingRegex() + " \" in " + pipelineName + "</div>";
      }
    }
    reportTitle = reportTitle + hTrace;

    if (allDocs.size() > 0) {
      String htmlReportTitle = reportTitle +
        "<table><tr bgcolor=\"#eeeeff\">" +
        "<td><b>Document Name</b></td>" +
        "<td><b>Time in seconds</b></td>" +
        "<td><b>% Time taken</b></td>" +
        "</tr><tr>" + NL;
      String documentNameHTMLString = "<td rowspan = '112' width = '550'>";
      String timeTakenHTMLString = "<td width = '100'>";
      String timeInPercentHTMLString = "<td width = '100'>";
      LinkedHashMap<String, Object> rcHash =
        reportSource;
      rcHash.remove("total");
      Iterator<String> i = rcHash.keySet().iterator();
      int count = 0;
      while (i.hasNext()) {
        Object key = i.next();
        if (!((String) key).equals("total")) {
          int value = Integer.parseInt((String) rcHash.get(key));
          if (maxDocumentInReport == ALL_DOCS) {
            documentNameHTMLString += "<div>" + key + "</div>";
            timeTakenHTMLString += "<div>" + value / 1000.0 + "</div>";
            timeInPercentHTMLString += "<div>"
                + Math.round(((value / globalTotal) * 100) * 10) / 10.0
                + "</div>" + NL;
          } else if (count < maxDocumentInReport) {
            documentNameHTMLString += "<div>" + key + "</div>";
            timeTakenHTMLString += "<div>" + value / 1000.0 + "</div>";
            timeInPercentHTMLString += "<div>"
                + Math.round(((value / globalTotal) * 100) * 10) / 10.0
                + "</div>" + NL;
          }
        }
        count++;
      }
      documentNameHTMLString +=
        "<div bgcolor=\"#eeeeff\" style = \"font-size:15px;margin-left:400px;\">" +
        "<b>Total</b></div></td>" + NL;
      timeTakenHTMLString +=
        "<div bgcolor=\"#eeeeff\" style = \"font-size:15px;\"><b>" +
        globalTotal / 1000.0 + "</b></div></td>" + NL;
      timeInPercentHTMLString +=
        "<div bgcolor=\"#eeeeff\" style = \"font-size:15px;\">" +
         "<b>100</b></div></td>" + NL;

      if (!outputFile.exists()) {
        htmlReport += htmlReportTitle + documentNameHTMLString
            + timeTakenHTMLString + timeInPercentHTMLString + "</tr></table>";
      } else {
        htmlReport = "<br/><br/>" + htmlReportTitle + documentNameHTMLString
            + timeTakenHTMLString + timeInPercentHTMLString
            + "</tr></table></body></html>";
      }
    } else {
      htmlReport += reportTitle + "</body></html>";
    }

    BufferedWriter out = null;
    try {
      out = new BufferedWriter(new FileWriter(outputFile));
      out.write(htmlReport);

    } catch (IOException e) {
      e.printStackTrace();

    } finally {
      try {
        if (out != null) { out.close(); }
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

  /**
   * Ignores the inconsistent log entries from the benchmark file. Entries from
   * modules like pronominal coreferencer which have not been converted to new
   * benchmarking conventions are ignored.
   *
   * @param benchmarkIDChain
   *          the chain of benchmark ids. This is the third token in the
   *          benchmark file.
   * @param startTokens
   *          an array of first tokens in the benchmark id chain.
   *
   * @return true if valid log entry; false otherwise.
   */
  private boolean validateLogEntry(String benchmarkIDChain,
                                   ArrayList<String> startTokens) {
    String startTokenRegExp = "(";
    for (int i = 0; i < startTokens.size(); i++) {
      if ((benchmarkIDChain.split("\\.")).length == 1
          && benchmarkIDChain.equals(startTokens.get(i))) {
        validEntries += 1;
        return true;
      }
      startTokenRegExp += startTokens.get(i) + "|";
    }
    if (startTokenRegExp.length() > 1) {
      startTokenRegExp = startTokenRegExp.substring(0, startTokenRegExp
          .length() - 1);
    }
    startTokenRegExp += ")";
    if (benchmarkIDChain.matches(startTokenRegExp + "\\.doc_.*?\\.pr_.*")) {
      validEntries += 1;
      return true;
    } else
      return false;
  }

  /**
   * Parses the report command lime arguments.
   *
   * @param args array containing the command line arguments.
   */
  @Override
  public void parseArguments(String[] args) {
    Getopt g = new Getopt("gate.util.reporting.DocTimeReporter", args,
        "i:m:d:p:o:l:h");
    int c;
    String argNoOfDocs = null;
    while ((c = g.getopt()) != -1) {
      switch (c) {
      // -i inputFile
      case 'i':
        String argInPath = g.getOptarg();
        if (argInPath != null) {
          setBenchmarkFile(new File(argInPath));
        }
        break;
      // -m printMedia
      case 'm':
        String argPrintMedia = g.getOptarg();
        if (argPrintMedia != null) {
          setPrintMedia(argPrintMedia);
        }
        break;
      // -d noOfDocs
      case 'd':
        argNoOfDocs = g.getOptarg();
        if (argNoOfDocs == null) {
          setMaxDocumentInReport(maxDocumentInReport);
        }
        break;
      // -p prName
      case 'p':
        String argPrName = g.getOptarg();
        if (argPrName != null) {
          setPRMatchingRegex(argPrName);
        } else {
          setPRMatchingRegex(PRMatchingRegex);
        }
        break;
      // -o Report File
      case 'o':
        String argOutPath = g.getOptarg();
        if (argOutPath != null) {
          setReportFile(new File(argOutPath));
        }
        break;
      // -l logical start
      case 'l':
        String argLogicalStart = g.getOptarg();
        if (argLogicalStart != null) {
          setLogicalStart(argLogicalStart);
        }
        break;
      // -h usage information
      case 'h':
      case '?':
        usage();
        System.exit(STATUS_NORMAL);
        break;

      default:
        usage();
        System.exit(STATUS_ERROR);
        break;

      } // getopt switch
    }
    if (argNoOfDocs != null) {
      try {
        setMaxDocumentInReport(Integer.parseInt(argNoOfDocs));
      } catch (NumberFormatException e) {
        e.printStackTrace();
        usage();
        System.exit(STATUS_ERROR);
      }
    }
  }

  /**
   * Returns the name of the media on which report will be generated. e.g. text,
   * HTML.
   *
   * @return printMedia A String containing the name of the media on which
   *         report will be generated.
   */
  public String getPrintMedia() {
    return printMedia;
  }

  /**
   * Sets the media on which report will be generated.
   *
   * @param printMedia Type of media on which the report will be generated.
   * Must be MEDIA_TEXT or  MEDIA_HTML.
   * The default is MEDIA_HTML.
   */
  public void setPrintMedia(String printMedia) {
    if (!printMedia.equals(MEDIA_HTML)
     && !printMedia.equals(MEDIA_TEXT)) {
      throw new IllegalArgumentException("Illegal argument: " + printMedia);
    }
    this.printMedia = printMedia.trim();
  }

  /**
   * Provides the functionality to match a user input string with the PR in the
   * given benchmark ids.
   *
   * @param benchmarkIDs
   *          A string of benchmarkIDs containing the PR name at the start of
   *          string.
   * @param searchString
   *          The string to be matched for PR name.
   *
   * @return boolean true if search string matches PR name; false otherwise.
   */
  private boolean isPRMatched(String benchmarkIDs, String searchString) {
    String prName = benchmarkIDs.split("\\.")[0];
    // Remove leading and trailing whitespaces of search string
    searchString = searchString.trim();
    // Remove "pr" or "pr_" appearing in start of the prName string
    searchString = searchString.replaceAll("^(pr|pr_)", "");
    // Replace underscores with a space in the search string
    searchString = searchString.replaceAll("_", " ");
    // Replace multiple spaces with a single space
    searchString = searchString.replaceAll("\\s+", " ");
    searchString = searchString.trim();
    // Remove "pr_" appearing in start of the prName string
    String processedPRName = prName.replaceAll("^pr_", "");
    // Replace underscores with a space in the prName
    processedPRName = processedPRName.replaceAll("_", " ");
    if (prName.startsWith("pr_")) {
      return processedPRName.matches("(?i).*" + searchString + ".*");
    } else {
      return false;
    }
  }

  /**
   * A method for deleting a given file.
   *
   * @param fileToBeDeleted
   *          A handle of the file to be deleted.
   * @throws BenchmarkReportFileAccessException
   *           if a given file could not be deleted.
   */
  private void deleteFile(File fileToBeDeleted)
      throws BenchmarkReportFileAccessException {
    if (fileToBeDeleted.isFile()) {
      if (!fileToBeDeleted.delete()) {
        throw new BenchmarkReportFileAccessException(
          "Could not delete " + fileToBeDeleted.getAbsolutePath());
      }
    }
  }

  /**
   * Provides the functionality to separate out pipeline specific benchmark
   * entries in separate temporary benchmark files in a temporary folder in the
   * current working directory.
   *
   * @param benchmarkFile
   *          An object of type File representing the input benchmark file.
   * @param report
   *          A file handle to the report file to be written.
   * @throws BenchmarkReportFileAccessException
   *           if any error occurs while accessing the input benchmark file or
   *           while splitting it.
   * @throws BenchmarkReportExecutionException
   *           if the given input benchmark file is modified while generating
   *           the report.
   */
  private void splitBenchmarkFile(File benchmarkFile, File report)
      throws BenchmarkReportFileAccessException,
             BenchmarkReportInputFileFormatException {
    File dir = temporaryDirectory;
    // Folder already exists; then delete all files in the temporary folder
    if (dir.isDirectory()) {
      File files[] = dir.listFiles();
      for (int count = 0; count < files.length; count++) {
        if (!files[count].delete()) {
          throw new BenchmarkReportFileAccessException(
            "Could not delete files in the folder \"" +
            temporaryDirectory + "\"");
        }
      }
    } else if (!dir.mkdir()) {
      throw new BenchmarkReportFileAccessException(
        "Could not create  temporary folder \"" + temporaryDirectory + "\"");
    }

    // delete report2 from the filesystem
    if (getPrintMedia().equalsIgnoreCase(MEDIA_TEXT)) {
      deleteFile(new File(report.getAbsolutePath() + ".txt"));
    } else if (getPrintMedia().equalsIgnoreCase(MEDIA_HTML)) {
      deleteFile(new File(report.getAbsolutePath() + ".html"));
    }

    RandomAccessFile in = null;
    BufferedWriter out = null;
    try {
      String logEntry = "";
      long fromPos = 0;

      // File benchmarkFileName;
      if (getLogicalStart() != null) {
        fromPos = tail(benchmarkFile, FILE_CHUNK_SIZE);
      }
      in = new RandomAccessFile(benchmarkFile, "r");

      if (getLogicalStart() != null) {
        in.seek(fromPos);
      }
      ArrayList<String> startTokens = new ArrayList<String>();
      String lastStart = "";
      Pattern pattern = Pattern.compile("(\\d+) (\\d+) (.*) (.*) \\{(.*)\\}");
      Matcher matcher = null;
      File benchmarkFileName = null;
      while ((logEntry = in.readLine()) != null) {
        matcher = pattern.matcher(logEntry);
        String startToken = "";
        if (logEntry.matches(".*START.*")) {
          String[] splittedStartEntry = logEntry.split("\\s");
          if (splittedStartEntry.length > 2) {
            startToken = splittedStartEntry[2];
          } else {
            throw new BenchmarkReportInputFileFormatException(
                getBenchmarkFile() + " is invalid.");
          }

          if (startToken.endsWith("Start")) {
            continue;
          }
          if (!startTokens.contains(startToken)) {
            // create a new file for the new pipeline
            startTokens.add(startToken);
            benchmarkFileName = new File(
              temporaryDirectory, startToken + "_benchmark.txt");
            if (!benchmarkFileName.createNewFile()) {
              throw new BenchmarkReportFileAccessException(
                  "Could not create \"" + startToken + "_benchmark.txt"
                      + "\" in directory named \"" + temporaryDirectory + "\"");
            }
            out = new BufferedWriter(new FileWriter(benchmarkFileName));
            out.write(logEntry);
            out.newLine();
          }
        }
        // if a valid benchmark entry then write it to the pipeline specific
        // file
        if (matcher != null
            && matcher.matches()
            && (validateLogEntry(matcher.group(3), startTokens) || logEntry
                .matches(".*documentLoaded.*"))) {
          startToken = matcher.group(3).split("\\.")[0];
          if (!(lastStart.equals(startToken))) {
            if (out != null) { out.close(); }
            benchmarkFileName = new File(
              temporaryDirectory, startToken + "_benchmark.txt");
            out = new BufferedWriter(new FileWriter(benchmarkFileName, true));
          }
          if (out != null) {
            out.write(logEntry);
            out.newLine();
          }
          lastStart = startToken;
        }
      }

    } catch (IOException e) {
      e.printStackTrace();

    } finally {
      try {
        if (in != null) { in.close(); }
        if (out != null) { out.close(); }
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

  /**
   * A method for reading the file upside down.
   *
   * @param fileToBeRead
   *          An object of the file to be read.
   * @param chunkSize
   *          An integer specifying the size of the chunks in which file will be
   *          read.
   * @return A long value pointing to the start position of the given file
   *         chunk.
   */
  private long tail(File fileToBeRead, int chunkSize)
      throws BenchmarkReportInputFileFormatException {
    RandomAccessFile raf = null;
    try {
      raf = new RandomAccessFile(fileToBeRead, "r");
      Vector<String> lastNlines = new Vector<String>();
      int delta = 0;
      long curPos = 0;
      curPos = raf.length() - 1;
      long fromPos;
      byte[] bytearray;
      while (true) {
        fromPos = curPos - chunkSize;
        if (fromPos <= 0) {
          raf.seek(0);
          bytearray = new byte[(int) curPos];
          raf.readFully(bytearray);
          if (parseLinesFromLast(bytearray, lastNlines, fromPos)) {
            if (fromPos < 0)
              fromPos = 0;
          }
          break;
        } else {
          raf.seek(fromPos);
          bytearray = new byte[chunkSize];
          raf.readFully(bytearray);
          if (parseLinesFromLast(bytearray, lastNlines, fromPos)) {
            break;
          }
          delta = lastNlines.get(lastNlines.size() - 1).length();
          lastNlines.remove(lastNlines.size() - 1);
          curPos = fromPos + delta;
        }
      }
      if (fromPos < 0)
        throw new BenchmarkReportInputFileFormatException(getBenchmarkFile()
            + " does not contain a marker named "
            + getLogicalStart()
            + " indicating logical start of a run.");
      return fromPos;

    } catch (IOException e) {
      e.printStackTrace();
      return -1;
    }
    finally {
      IOUtils.closeQuietly(raf);
    }
  }

  /**
   * A method to ensure that the required line is read from the given file part.
   *
   * @param bytearray
   *          A part of a file being read upside down.
   * @param lastNlines
   *          A vector containing the lines extracted from file part.
   * @param fromPos
   *          A long value indicating the start of a file part.
   *
   * @return true if marker indicating the logical start of run is found; false
   *         otherwise.
   */
  private boolean parseLinesFromLast(byte[] bytearray,
                                     Vector<String> lastNlines, long fromPos) {
      String lastNChars = new String(bytearray);
      StringBuffer sb = new StringBuffer(lastNChars);
      lastNChars = sb.reverse().toString();
      StringTokenizer tokens = new StringTokenizer(lastNChars, NL);
      while (tokens.hasMoreTokens()) {
        StringBuffer sbLine = new StringBuffer(tokens.nextToken());
        lastNlines.add(sbLine.reverse().toString());
        if ((lastNlines.get(lastNlines.size() - 1))
            .trim().endsWith(getLogicalStart())) {
          return true;
        }
      }
      return false;
  }

  /**
   * Display a usage message
   */
  public static void usage() {
    System.out.println(
    "Usage: java gate.util.reporting.DocTimeReporter [Options]" + NL
  + "\t Options:" + NL
  + "\t -i input file path (default: benchmark.txt in the execution directory)" + NL
  + "\t -m print media - html/text (default: html)" + NL
  + "\t -d number of docs, use -1 for all docs (default: 10 docs)" + NL
  + "\t -p processing resource name to be matched (default: all_prs)" + NL
  + "\t -o output file path (default: report.html/txt in the system temporary directory)" + NL
  + "\t -l logical start (not set by default)" + NL
  + "\t -h show help" + NL);
  } // usage()

  /**
   * A main method which acts as a entry point while executing a report via
   * command line
   *
   * @param args
   *          A string array containing the command line arguments.
   * @throws BenchmarkReportExecutionException
   *           if a given input file is modified while generating the report.
   */
  public static void main(String[] args)
      throws BenchmarkReportInputFileFormatException,
             BenchmarkReportFileAccessException {
    // process command-line options
    DocTimeReporter reportTwo = new DocTimeReporter(args);
    reportTwo.generateReport();
  }

  /**
   * Calls store, calculate and printReport for generating the actual report.
   */
  private void generateReport() throws BenchmarkReportInputFileFormatException,
                                       BenchmarkReportFileAccessException {
    Timer timer = null;
    try {
      TimerTask task = new FileWatcher(getBenchmarkFile()) {
        @Override
        protected void onChange(File file) {
          throw new BenchmarkReportExecutionException(getBenchmarkFile()
              + " file has been modified while generating the report.");
        }
      };
      timer = new Timer();
      // repeat the check every second
      timer.schedule(task, new Date(), 1000);

      if (reportFile == null) {
        reportFile = new File(System.getProperty("java.io.tmpdir"),
          "report." + ((printMedia.equals(MEDIA_HTML)) ? "html" : "txt"));
      }
      splitBenchmarkFile(getBenchmarkFile(), reportFile);
      if (validEntries == 0) {
        if (logicalStart != null) {
          throw new BenchmarkReportInputFileFormatException(
            "No valid log entries present in " + getBenchmarkFile() +
            " does not contain a marker named " + logicalStart + ".");
        } else {
          throw new BenchmarkReportInputFileFormatException(
            "No valid log entries present in "
            + getBenchmarkFile().getAbsolutePath());
        }
      }
      File dir = temporaryDirectory;
      // Folder already exists; then delete all files in the temporary folder
      if (dir.isDirectory()) {
        File files[] = dir.listFiles();
        for (int count = 0; count < files.length; count++) {
          File inFile = files[count];
          Object report2Container1 = store(inFile);
          Object report2Container2 = calculate(report2Container1);
          printReport(report2Container2, reportFile);
        }
        if (files.length > 0 && files[0].exists()) {
          if (!files[0].delete()) {
            System.err.println(files[0] + " was not possible to delete.");
          }
        }
      }
    } finally {
      if (timer != null) { timer.cancel(); }
    }
  }

  /*
   * (non-Javadoc)
   *
   * @see gate.util.reporting.BenchmarkReportable#executeReport()
   */
  @Override
  public void executeReport() throws BenchmarkReportInputFileFormatException,
                                     BenchmarkReportFileAccessException {
    generateReport();
  }

  /**
   * Returns the marker indicating logical start of a run.
   *
   * @return logicalStart A String containing the marker indicating logical
   *         start of a run.
   */
  public String getLogicalStart() {
    return logicalStart;
  }

  /**
   * Sets optionally a string indicating the logical start of a run.
   *
   * @param logicalStart A String indicating the logical start of a run.
   * Useful when you you have marked different runs in
   * your benchmark file with this string at their start.
   * By default the value is null.
   */
  public void setLogicalStart(String logicalStart) {
    this.logicalStart = logicalStart;
  }

  /**
   * @return benchmarkFile path to input benchmark file.
   * @see #setBenchmarkFile(java.io.File)
   */
  public File getBenchmarkFile() {
    return benchmarkFile;
  }

  /**
   * Sets the input benchmark file from which the report is generated.
   * By default use the file named "benchmark.txt" from the application
   * execution directory.
   *
   * @param benchmarkFile Input benchmark file.
   */
  public void setBenchmarkFile(File benchmarkFile) {
    this.benchmarkFile = benchmarkFile;
  }

  /**
   * @return reportFile file path where the report file is written.
   * @see #setReportFile(java.io.File)
   */
  public File getReportFile() {
    return reportFile;
  }

  /**
   * If not set, the default is the file name "report.txt/html"
   * in the system temporary directory.
   *
   * @param reportFile file path to the report file to write.
   */
  public void setReportFile(File reportFile) {
    this.reportFile = reportFile;
  }

  /**
   * Returns the maximum no of documents to be shown in the report.
   *
   * @return maxDocumentInReport An integer specifying the maximum no of
   *         documents to be shown in the report.
   */
  public int getMaxDocumentInReport() {
    return maxDocumentInReport;
  }

  /**
   * Maximum number of documents contained in the report.
   * @param maxDocumentInReport Maximum number of documents contained in
   * the report. Use the constant ALL_DOCS for reporting all documents.
   * The default is 10.
   */
  public void setMaxDocumentInReport(int maxDocumentInReport) {
    if (!(maxDocumentInReport > 0 || maxDocumentInReport == ALL_DOCS)) {
      throw new IllegalArgumentException(
        "Illegal argument: " + maxDocumentInReport);
    }
    this.maxDocumentInReport = maxDocumentInReport;
  }

  /**
   * Returns the search string to be matched to PR names present in the log
   * entries.
   *
   * @return PRMatchingRegex A String to be matched to PR names present in the
   *         log entries.
   */
  public String getPRMatchingRegex() {
    return PRMatchingRegex;
  }

  /**
   * Search string to match PR names present in the benchmark file.
   *
   * @param matchingRegex regular expression to match PR names
   * present in the benchmark file. The default is MATCH_ALL_PR_REGEX.
   */
  public void setPRMatchingRegex(String matchingRegex) {
    PRMatchingRegex = matchingRegex;
  }
}

/**
 * A FileWather class to check whether the file is modified or not at specified
 * interval.
 */
abstract class FileWatcher extends TimerTask {
  private long timeStamp;
  private File file;

  /**
   * Creates a FileWatcher on a given file.
   *
   * @param file
   *          A handle of the file to be watched.
   */
  public FileWatcher(File file) {
    this.file = file;
    timeStamp = file.lastModified();
  }

  /*
   * (non-Javadoc)
   *
   * @see java.util.TimerTask#run()
   */
  @Override
  public final void run() {
    long oldTimeStamp = file.lastModified();
    if (timeStamp != oldTimeStamp) {
      cancel();
      onChange(file);
    }
  }

  /**
   * Specifies the actions to be taken when a file is modified.
   *
   * @param file
   *          A handle of the file to be watched.
   */
  protected abstract void onChange(File file)
    throws BenchmarkReportExecutionException;
}