/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.hadoop.dedup;

import java.util.Iterator;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.solr.common.SolrInputDocument;

/**
 * Interface that enables deduplication and ordering of a series of document
 * updates for the same unique document key.
 * 
 * For example, a MapReduce batch job might index multiple files in the same job
 * where some of the files contain old and new versions of the very same
 * document, using the same unique document key.
 * 
 * Typically, implementations of this interface forbid collisions by throwing an
 * exception, or ignore all but the most recent document version, or, in the
 * general case, order colliding updates ascending from least recent to most
 * recent (partial) update.
 * 
 * The caller of this interface (i.e. the Hadoop Reducer) will then apply the
 * updates to Solr in the order returned by the orderUpdates() method.
 * 
 * Configuration: If an UpdateConflictResolver implementation also implements
 * {@link Configurable} then the Hadoop Reducer will call
 * {@link Configurable#setConf(org.apache.hadoop.conf.Configuration)} on
 * instance construction and pass the standard Hadoop configuration information.
 */
public interface UpdateConflictResolver {
  
  /**
   * Given a list of all colliding document updates for the same unique document
   * key, this method returns zero or more documents in an application specific
   * order.
   * 
   * The caller will then apply the updates for this key to Solr in the order
   * returned by the orderUpdate() method.
   * 
   * @param uniqueKey
   *          the document key common to all collidingUpdates mentioned below
   * @param collidingUpdates
   *          all updates in the MapReduce job that have a key equal to
   *          {@code uniqueKey} mentioned above. The input order is unspecified.
   * @param context
   *          The <code>Context</code> passed from the {@link Reducer}
   *          implementations.
   * @return the order in which the updates shall be applied to Solr
   */
  Iterator<SolrInputDocument> orderUpdates(
      Text uniqueKey, Iterator<SolrInputDocument> collidingUpdates, Context context);
  
}