// NCA (c) Crown Copyright 2017
package uk.gov.dstl.baleen.collectionreaders;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;

import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import uk.gov.dstl.baleen.collectionreaders.helpers.AbstractSqlReader;
import uk.gov.dstl.baleen.types.metadata.Metadata;

/**
 * Read each cell in an SQL table as a separate document. The appropriate JDBC driver for the
 * database you wish to connect to must be on the classpath.
 *
 * <p>Users can specify which field(s) contain text content to be processed, and all other fields
 * will be treated as metadata. If more than one text field is specified, they will be joined
 * together in the order they were specified, with content separated by two newlines.
 *
 * @baleen.javadoc
 */
public class SqlRowReader extends AbstractSqlReader {

  /**
   * Text column(s) in table - these columns will be used as the main content to process. If more
   * than one column is specified, then the content of the cells will be joined with two newlines.
   *
   * @baleen.config
   */
  public static final String PARAM_TEXT_COLUMNS = "text";

  @ConfigurationParameter(name = PARAM_TEXT_COLUMNS)
  private String[] textColumn;

  private List<Long> idsToProcess = new ArrayList<>();
  private Set<String> textCols = new HashSet<>();
  private Set<String> metaCols = new HashSet<>();
  private long currId;

  @Override
  protected void doInitialize(UimaContext context) throws ResourceInitializationException {
    super.doInitialize(context);

    for (String columnName : getColumnNames()) {
      if (inArray(columnName, textColumn)) {
        textCols.add(columnName);
      } else {
        metaCols.add(columnName);
      }
    }

    idsToProcess.addAll(getIds(null));
  }

  @Override
  public boolean doHasNext() throws IOException, CollectionException {
    if (!idsToProcess.isEmpty()) return true;

    idsToProcess.addAll(getIds(currId));
    return !idsToProcess.isEmpty();
  }

  @Override
  protected void doGetNext(JCas jCas) throws IOException, CollectionException {
    currId = idsToProcess.remove(0);

    String content;
    Map<String, String> metadata = new HashMap<>();
    try (ResultSet rs =
        conn.prepareStatement("SELECT * FROM `" + table + "` WHERE `" + idColumn + "` = " + currId)
            .executeQuery()) {
      if (rs.next()) {
        StringJoiner sj = new StringJoiner("\n\n");
        for (String col : textCols) {
          sj.add(rs.getObject(col).toString());
        }
        content = sj.toString();

        for (String col : metaCols) {
          metadata.put(col, rs.getObject(col).toString());
        }
      } else {
        throw new IOException("Unable to get row content - query returned no results");
      }
    } catch (SQLException e) {
      throw new IOException("Unable to get row content", e);
    }

    String sourceUrl = sqlConn.substring(5) + "." + table + "#" + currId;

    extractContent(
        new ByteArrayInputStream(content.getBytes(Charset.defaultCharset())), sourceUrl, jCas);

    // Need to do after we've set the JCas content
    for (Map.Entry<String, String> e : metadata.entrySet()) {
      Metadata md = new Metadata(jCas);
      md.setKey(e.getKey());
      md.setValue(e.getValue());
      md.addToIndexes();
    }
  }
}