/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to [email protected]                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package act.installer.bing;

import act.server.BingCacheMongoDB;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.HttpStatus;
import org.apache.http.impl.conn.BasicHttpClientConnectionManager;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;

/**
 * BingSearchResults provides methods for:
 * - querying the Bing Search API,
 * - caching its results in a Mongo database
 * - returning searched or cached results
 * - finding the best name for a molecule
 */

public class BingSearchResults {

  private static final Logger LOGGER = LogManager.getFormatterLogger(BingSearchResults.class);

  // Full path to the account key for the Bing Search API (on the NAS)
  private static final String ACCOUNT_KEY_FILEPATH = "data/bing/bing_search_api_account_key.txt";
  // Maximum number of results possible per API call. This is the maximum value for URL parameter "count"
  private static final Integer MAX_RESULTS_PER_CALL = 50;
  // How many search results should be retrieved when getting topSearchResults
  private static final Integer TOP_N = 50;
  
  // The centralized location for caching Bing Search queries.
  // TODO: make this changeable without a code change (with CLI maybe?)
  private static final String BING_CACHE_HOST = "localhost";
  private static final int BING_CACHE_MONGO_PORT = 27777;
  private static final String BING_CACHE_MONGO_DATABASE = "bingsearch";


  private static final String BING_API_HOST = "api.cognitive.microsoft.com";
  private static final String BING_API_PATH = "/bing/v5.0/search";

  private static ObjectMapper mapper = new ObjectMapper();

  private BingCacheMongoDB bingCacheMongoDB;
  private BasicHttpClientConnectionManager basicConnManager;
  private String accountKey;
  private boolean cacheOnly;

  public BingSearchResults() {
    this(ACCOUNT_KEY_FILEPATH);
  }

  public BingSearchResults(boolean cacheOnly) {
    this.cacheOnly = cacheOnly;
    this.bingCacheMongoDB = new BingCacheMongoDB(BING_CACHE_HOST, BING_CACHE_MONGO_PORT, BING_CACHE_MONGO_DATABASE);
  }

  public BingSearchResults(String accountKeyFilepath) {
    this.cacheOnly = false;
    this.bingCacheMongoDB = new BingCacheMongoDB(BING_CACHE_HOST, BING_CACHE_MONGO_PORT, BING_CACHE_MONGO_DATABASE);
    this.basicConnManager = new BasicHttpClientConnectionManager();
    try {
      this.accountKey = getAccountKey(accountKeyFilepath);
    } catch (IOException e) {
      String msg = String.format("Bing Searcher could not find account key at %s", accountKeyFilepath);
      LOGGER.error(msg);
      throw new RuntimeException(msg);
    }
  }

  /** This function gets the account key located on the NAS
   * @return the account key to be used for authentication purposes
   * @throws IOException
   */
  private static String getAccountKey(String accountKeyFilename) throws IOException {
    FileInputStream fs = new FileInputStream(accountKeyFilename);
    BufferedReader br = new BufferedReader(new InputStreamReader(fs));
    String account_key = br.readLine();
    return account_key;
  }

  /** This function fetches the total number of Bing search results and return the "totalCountSearchResult".
   * @param formattedName name that will be used as search query, lowercase formatted
   * @return the total count search results from Bing search
   * @throws IOException
   */
  private Long fetchTotalCountSearchResults(String formattedName) throws IOException {
    LOGGER.debug("Updating totalCountSearchResults for name: %s.", formattedName);
    final String queryTerm = URLEncoder.encode(formattedName, StandardCharsets.UTF_8.name());
    // Set count to 1 and offset to 0 since we need only one search result to extract the estimated count.
    final int count = 1;
    final int offset = 0;
    JsonNode results = fetchBingSearchAPIResponse(queryTerm, count, offset);
    return results.path("totalEstimatedMatches").asLong();
  }


  /** This function fetches the topN Bing search results for the current instance of NameSearchResult object
   * and updates the "topSearchResults" instance variable. Existing value is overridden.
   * @param formattedName name that will be used as search query, lowercase formatted
   * @param topN number of Web results to fetch from Bing Search API
   * @return returns a set of SearchResults containing the topN Bing search results
   * @throws IOException
   */
  private Set<SearchResult> fetchTopSearchResults(String formattedName, Integer topN)
      throws IOException {
    LOGGER.debug("Updating topSearchResults for name: %s.", formattedName);
    Set<SearchResult> topSearchResults = new HashSet<>();
    final String queryTerm = URLEncoder.encode(formattedName, StandardCharsets.UTF_8.name());
    // The Bing search API cannot return more than 100 results at once, but it is possible to iterate
    // through the results.
    // For example, if we need topN = 230 results, we will issue the following queries
    // (count and offset are URL parameters)
    // QUERY 1: count = 100, offset = 0
    // QUERY 2: count = 100, offset = 100
    // QUERY 3: count = 30, offset = 200
    Integer iterations = topN / MAX_RESULTS_PER_CALL;
    Integer remainder = topN % MAX_RESULTS_PER_CALL;
    for (int i = 0; i < iterations; i++) {
      topSearchResults.addAll(fetchSearchResults(queryTerm, MAX_RESULTS_PER_CALL, MAX_RESULTS_PER_CALL * i));
    }
    if (remainder > 0) {
      topSearchResults.addAll(fetchSearchResults(queryTerm, remainder, MAX_RESULTS_PER_CALL * iterations));
    }
    return topSearchResults;
  }


  /** This function issues a Bing Search API call and parses the response to extract a set of SearchResults.
   * @param query (String) the term to query for.
   * @param count (int) URL parameter indicating how many results to return. Max value is 100.
   * @param offset (int) URL parameter indicating the offset for results.
   * @return returns a set of SearchResults containing [count] search results with offset [offset]
   * @throws IOException
   */
  private Set<SearchResult> fetchSearchResults(String query, int count, int offset) throws IOException {
    if (count > MAX_RESULTS_PER_CALL) {
      LOGGER.warn("Number of results requested (%d) was too high. Will get only %d", count, MAX_RESULTS_PER_CALL);
    }
    Set<SearchResult> searchResults = new HashSet<>();
    JsonNode results = fetchBingSearchAPIResponse(query, count, offset);
    final JsonNode webResults = results.path("value");
    for (JsonNode webResult : webResults) {
      SearchResult searchResult = new SearchResult();
      searchResult.populateFromJsonNode(webResult);
      searchResults.add(searchResult);
    }
    return searchResults;
  }

  /** This function issues a Bing search API call and gets the JSONObject containing the relevant results
   * (including TotalCounts and SearchResults)
   * @param queryTerm (String) the term to query for.
   * @param count (int) URL parameter indicating how many results to return. Max value is 100.
   * @param offset (int) URL parameter indicating the offset for results.
   * @return a JSONObject containing the response.
   * @throws IOException
   */
  private JsonNode fetchBingSearchAPIResponse(String queryTerm, Integer count, Integer offset) throws IOException {

    if (count <= 0) {
      LOGGER.error("Bing Search API was called with \"count\" URL parameter = 0. Please request at least one result.");
      return null;
    }

    URI uri = null;
    try {
      // Bing URL pattern. Note that we use composite queries to allow retrieval of the total results count.
      // Transaction cost is [count] bings, where [count] is the value of the URL parameter "count".
      // In other words, we can make 5M calls with [count]=1 per month.


      // Example: https://api.cognitive.microsoft.com/bing/v5.0/search?q=porsche&responseFilter=webpages
      uri = new URIBuilder()
          .setScheme("https")
          .setHost(BING_API_HOST)
          .setPath(BING_API_PATH)
          // Wrap the query term (%s) with double quotes (%%22) for exact search
          .setParameter("q", String.format("%s", queryTerm))
          // Restrict response to Web Pages only
          .setParameter("responseFilter", "webpages")
          // "count" parameter.
          .setParameter("count", count.toString())
          // "offset" parameter.
          .setParameter("offset", offset.toString())
          .build();

    } catch (URISyntaxException e) {
      LOGGER.error("An error occurred when trying to build the Bing Search API URI", e);
    }

    JsonNode results;
    HttpGet httpget = new HttpGet(uri);
    // Yay for un-encrypted account key!
    // TODO: actually is there a way to encrypt it?
    httpget.setHeader("Ocp-Apim-Subscription-Key", accountKey);

    CloseableHttpClient httpclient = HttpClients.custom().setConnectionManager(basicConnManager).build();

    try (CloseableHttpResponse response = httpclient.execute(httpget)) {
      Integer statusCode = response.getStatusLine().getStatusCode();

      // TODO: The Web Search API returns useful error messages, we could use them to have better insights on failures.
      // See: https://dev.cognitive.microsoft.com/docs/services/56b43eeccf5ff8098cef3807/operations/56b4447dcf5ff8098cef380d
      if (!statusCode.equals(HttpStatus.SC_OK)) {
        LOGGER.error("Bing Search API call returned an unexpected status code (%d) for URI: %s", statusCode, uri);
        return null;
      }

      HttpEntity entity = response.getEntity();
      ContentType contentType = ContentType.getOrDefault(entity);
      Charset charset = contentType.getCharset();
      if (charset == null) {
        charset = StandardCharsets.UTF_8;
      }

      try (final BufferedReader in = new BufferedReader(new InputStreamReader(entity.getContent(), charset))) {
        String inputLine;
        final StringBuilder stringResponse = new StringBuilder();
        while ((inputLine = in.readLine()) != null) {
          stringResponse.append(inputLine);
        }
        JsonNode rootNode = mapper.readValue(stringResponse.toString(), JsonNode.class);
        results = rootNode.path("webPages");
      }
    }
    return results;
  }

  /** This key function caches in a MongoDB collection and returns a set of SearchResults.
   * If present, the results are returned from the cache. If not, the results are queried and returned after updating
   * the cache.
   * @param name (String) the name to return results for. Will be normalized to lower case.
   * @return a set of SearchResults
   * @throws IOException
   */
  public Set<SearchResult> getAndCacheTopSearchResults(String name) throws IOException {

    String formattedName = name.toLowerCase();
    BasicDBObject nameSearchResultDBObject = bingCacheMongoDB.getNameSearchResultDBObjectFromName(formattedName);
    Set<SearchResult> searchResults = new HashSet<>();

    // There are 3 cases:
    // 1) There is a corresponding entry in the cache AND the topSearchResults are populated.
    //    In this case, we read from the cache and return the results.
    // 2) There is a corresponding entry in the cache BUT the topSearchResults are not populated.
    //    This case occurs when only totalCountSearchResults is populated.
    //    In this case, perform the relevant query, update the cache and return the results
    // 3) There is no corresponding entry in the cache.
    //    In this case, perform the relevant query, create a new entry in the cache and return the results.

    if (nameSearchResultDBObject == null) {
      // Case 3)
      LOGGER.debug("No corresponding entry in the cache. Fetching results and populating cache.");
      // Query the results
      searchResults = fetchTopSearchResults(formattedName, TOP_N);
      // Create new object and update it
      NameSearchResults nameSearchResults = new NameSearchResults(formattedName);
      nameSearchResults.setTopSearchResults(searchResults);
      // Save new document in the cache
      bingCacheMongoDB.cacheNameSearchResult(nameSearchResults);
      return searchResults;
    }

    // There is an existing entry in the DB
    BasicDBList topSearchResultsList = (BasicDBList) nameSearchResultDBObject.get("topSearchResults");
    if (topSearchResultsList == null) {
      // Case 2)
      LOGGER.debug("Existing entry in the cache, with empty topSearchResults. Fetching results and updating cache.");
      // Query the results
      searchResults = fetchTopSearchResults(formattedName, TOP_N);
      // Create new object and update its instance variable
      NameSearchResults nameSearchResults = new NameSearchResults(formattedName);
      nameSearchResults.setTopSearchResults(searchResults);
      // Update the cache
      bingCacheMongoDB.updateTopSearchResults(formattedName, nameSearchResults);
      return searchResults;
    }

    // Case 1)
    LOGGER.debug("Existing entry in the cache, with populated topSearchResults. Returning from the cache.");
    for (Object topSearchResult : topSearchResultsList) {
      SearchResult searchResult = new SearchResult();
      BasicDBObject topSearchResultDBObject = (BasicDBObject) topSearchResult;
      searchResult.populateFromBasicDBObject(topSearchResultDBObject);
      searchResults.add(searchResult);
    }
    return searchResults;
  }

  public Set<SearchResult> getTopSearchResultsFromCache(String name) {
    Set<SearchResult> searchResults = new HashSet<>();
    String formattedName = name.toLowerCase();
    BasicDBObject nameSearchResultDBObject = bingCacheMongoDB.getNameSearchResultDBObjectFromName(formattedName);
    if (nameSearchResultDBObject == null) {
      return searchResults;
    }
    BasicDBList topSearchResultsList = (BasicDBList) nameSearchResultDBObject.get("topSearchResults");
    if (topSearchResultsList == null) {
      return searchResults;
    }
    for (Object topSearchResult : topSearchResultsList) {
      SearchResult searchResult = new SearchResult();
      BasicDBObject topSearchResultDBObject = (BasicDBObject) topSearchResult;
      searchResult.populateFromBasicDBObject(topSearchResultDBObject);
      searchResults.add(searchResult);
    }
    return searchResults;
  }

  public Long getTotalCountSearchResultsFromCache(String name) {
    String formattedName = name.toLowerCase();
    BasicDBObject nameSearchResultDBObject = bingCacheMongoDB.getNameSearchResultDBObjectFromName(formattedName);
    Long totalCountSearchResults;
    if (nameSearchResultDBObject == null) {
      return -1L;
    }
    totalCountSearchResults = (Long) nameSearchResultDBObject.get("totalCountSearchResults");
    if (totalCountSearchResults == null) {
      return -1L;
    }
    return totalCountSearchResults;
  }

  /** This key function caches in a MongoDB collection and returns the total count of Bing search results.
   * If present, the results are returned from the cache. If not, the results are queried and returned after updating
   * the cache.
   * @param name (String) the name to return results for. Will be normalized to lower case.
   * @return the total search result count
   * @throws IOException
   */
  public Long getAndCacheTotalCountSearchResults(String name) throws IOException {

    String formattedName = name.toLowerCase();
    BasicDBObject nameSearchResultDBObject = bingCacheMongoDB.getNameSearchResultDBObjectFromName(formattedName);
    Long totalCountSearchResults;

    // There are 3 cases:
    // 1) There is a corresponding entry in the cache AND the totalCountSearchResults are populated.
    //    In this case, we read from the cache and return the results.
    // 2) There is a corresponding entry in the cache BUT the totalCountSearchResults are not populated.
    //    This case occurs when only topSearchResults is populated.
    //    In this case, perform the relevant query, update the cache and return the total count
    // 3) There is no corresponding entry in the cache.
    //    In this case, perform the relevant query, create a new entry in the cache and return the total count.

    if (nameSearchResultDBObject == null) {
      // Case 3)
      LOGGER.debug("No corresponding entry in the cache. Fetching results and populating cache.");
      // Query the results
      totalCountSearchResults = fetchTotalCountSearchResults(formattedName);
      // Create new object and update it
      NameSearchResults nameSearchResults = new NameSearchResults(formattedName);
      nameSearchResults.setTotalCountSearchResults(totalCountSearchResults);
      // Save new document in the cache
      bingCacheMongoDB.cacheNameSearchResult(nameSearchResults);
      return totalCountSearchResults;
    }

    // There is an existing entry in the cache
    totalCountSearchResults = (Long) nameSearchResultDBObject.get("totalCountSearchResults");

    if (totalCountSearchResults == null || totalCountSearchResults < 0) {
      // Case 2)
      LOGGER.debug("Existing entry in the cache, with empty totalCountSearchResults. " +
          "Fetching results and updating cache.");
      // Query the results
      totalCountSearchResults = fetchTotalCountSearchResults(formattedName);
      // Create new object and update its instance variable
      NameSearchResults nameSearchResults = new NameSearchResults(formattedName);
      nameSearchResults.setTotalCountSearchResults(totalCountSearchResults);
      // Update the cache
      bingCacheMongoDB.updateTotalCountSearchResults(formattedName, nameSearchResults);
      return totalCountSearchResults;
    }

    // Case 1)
    LOGGER.debug("Existing entry in the cache, with populated totalCountSearchResults. Returning from the cache.");
    return totalCountSearchResults;
  }

  /** Heuristic to find the best name for a given InChI, based on the total number of search results
   * @param namesOfMolecule (NamesOfMolecule) Java object containing Brenda, MetaCyc, ChEBI and DrugBank names for a given
   *                      InChI.
   * @return the name with the highest total number of search results, called Best Name
   * @throws IOException
   */
  public String findBestMoleculeName(NamesOfMolecule namesOfMolecule) throws IOException {
    Long maxCount = -1L;
    String bestName = "";
    String inchi = namesOfMolecule.getInchi();
    String[] splittedInchi = inchi.split("/");
    String formulaFromInchi = null;
    if (splittedInchi.length >= 2) {
      formulaFromInchi = inchi.split("/")[1];
    }

    LOGGER.debug("Formula %s extracted from %s", formulaFromInchi, inchi);

    String wikipediaName = namesOfMolecule.getWikipediaName();
    if (wikipediaName != null) {
      bestName = wikipediaName;
    } else {
      Set<String> names = namesOfMolecule.getAllNames();
      names.remove(formulaFromInchi);
      for (String name : names) {
        // Ignore name if <= 4 characters
        if (name.length() <= 4) {
          continue;
        }
        LOGGER.debug("Getting search hits for %s", name);
        Long count = (cacheOnly) ? getTotalCountSearchResultsFromCache(name) : getAndCacheTotalCountSearchResults(name);
        // Ignore name if there was a previous better candidate
        if (count <= maxCount) {
          continue;
        }
        maxCount = count;
        bestName = name;
      }
    }

    // Note we don't use ChEBI or DrugBank names to keep this function simple.
    // If Brenda and MetaCyc names are not populated, it is very rare that ChEBI or DrugBank would be.
    LOGGER.debug("Best name found for %s is %s", namesOfMolecule.getInchi(), bestName);
    return bestName;
  }

  public static void main(String[] args) {
    String apiKeyFilepath = "MNT_SHARED_DATA/Thomas/test-bing/microsoft-cognitive-service-api-key";
    BingSearchResults bingSearchResults = new BingSearchResults(apiKeyFilepath);
    try {
      Set<SearchResult> res = bingSearchResults.getAndCacheTopSearchResults("new query");
      Long count = bingSearchResults.getAndCacheTotalCountSearchResults("new query");
    } catch (IOException e) {
      throw new RuntimeException("Exception occurred when computing example query, %s", e);
    }
  }
}