package org.icij.extract.spewer;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.tika.metadata.Metadata;
import org.icij.extract.document.TikaDocument;
import org.icij.extract.document.EmbeddedTikaDocument;
import org.icij.spewer.FieldNames;
import org.icij.task.Options;
import org.icij.task.annotation.Option;

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * A {@link SolrSpewer} that merges the {@literal path} and {@literal parent path} fields from the given
 * {@link TikaDocument} with those from any document with the same ID on the Solr server.
 *
 * This functionality allows documents with file-digest-type IDs to hold multiple paths, reflecting the multiple
 * duplicate copies that may exist on disk.
 */
@Option(name = "retriesOnConflict", description = "The number of times to retry adding a document when a " +
		"conflict error is returned by the index, after merging in existing fields.", parameter = "number")
public class MergingSolrSpewer extends SolrSpewer {

	private static final long serialVersionUID = -7084864083664544361L;
	private int retries = 100;

	public MergingSolrSpewer(final SolrClient client, final FieldNames fields) {
		super(client, fields);
	}

	public MergingSolrSpewer configure(final Options<String> options) {
		super.configure(options);
		options.get("retriesOnConflict").parse().asInteger().ifPresent(this::setRetries);

		return this;
	}

	public void setRetries(final int retries) {
		this.retries = retries;
	}

	@Override
	protected UpdateResponse write(final TikaDocument tikaDocument, final SolrInputDocument inputDocument) throws
			IOException {

		// Only root documents are merged.
		if (tikaDocument instanceof EmbeddedTikaDocument) {
			return super.write(tikaDocument, inputDocument);
		} else {
			return write(tikaDocument, inputDocument, retries);
		}
	}

	private UpdateResponse write(final TikaDocument tikaDocument, final SolrInputDocument inputDocument, final int retries)
			throws IOException {
		try {
			merge(tikaDocument, inputDocument);
		} catch (final SolrServerException e) {
			throw new IOException(e);
		}

		try {
			return super.write(tikaDocument, inputDocument);
		} catch (SolrException e) {
			if (retries > 0 && e.code() == 409) {
				return write(tikaDocument, inputDocument, retries - 1);
			}

			throw e;
		}
	}

	private void merge(final TikaDocument tikaDocument, final SolrInputDocument inputDocument) throws IOException,
			SolrServerException {
		final SolrDocument existingDocument;
		final SolrQuery params = new SolrQuery();
		final String resourceNameKey = fields.forMetadata(Metadata.RESOURCE_NAME_KEY);

		// The tikaDocument must be retrieved from the real-time-get (RTG) handler, otherwise we'd have to commit every
		// time a tikaDocument is added.
		params.setRequestHandler("/get");

		// Request only the fields which must be merged, not the entire tikaDocument.
		params.setFields(fields.forPath(), fields.forParentPath(), fields.forVersion(), resourceNameKey);
		existingDocument = client.getById(tikaDocument.getId(), params);

		// Since we're updating the path and parent path values of an existing tikaDocument, set the version field to
		// avoid conflicts. Note that child documents don't have a version field.
		if (null != existingDocument) {
			final Object version = existingDocument.getFieldValue(fields.forVersion());
			if (null != version) {
				inputDocument.setField(fields.forVersion(), version);
			}

		} else {
			inputDocument.setField(fields.forVersion(), "-1");
		}

		// Set the path field.
		if (null != fields.forPath()) {
			mergeField(fields.forPath(), tikaDocument.getPath().toString(), existingDocument, inputDocument);
		}

		// Set the parent path field.
		if (null != fields.forParentPath() && tikaDocument.getPath().getNameCount() > 1) {
			mergeField(fields.forParentPath(), tikaDocument.getPath().getParent().toString(), existingDocument,
					inputDocument);
		}

		// Merge the resource name field.
		if (tikaDocument.getMetadata() != null) {
			mergeField(resourceNameKey, tikaDocument.getMetadata().get(Metadata.RESOURCE_NAME_KEY), existingDocument,
					inputDocument);
		}
	}

	private void mergeField(final String name, final String newValue, final SolrDocument existingDocument, final
	SolrInputDocument inputDocument) {

		// Even though the superclass sets the path and parent path fields, we should set them again in case there's
		// a retry and they need to be overwritten.
		if (null == existingDocument) {
			setFieldValue(inputDocument, name, newValue);
			return;
		}

		// Create a HashSet from existing values so that only non-existing (distinct) values are added.
		// A HashSet gives constant time performance, as opposed to a loop, which is important when dealing with
		// potentially thousands of values.
		final Collection<Object> existingValues = existingDocument.getFieldValues(name);
		if (null != existingValues) {
			final Set<String> values = existingValues.stream()
					.map(String::valueOf)
					.collect(Collectors.toCollection(HashSet::new));

			values.add(newValue);
			setFieldValue(inputDocument, name, values.toArray(new String[values.size()]));
		} else {
			setFieldValue(inputDocument, name, newValue);
		}
	}
}