* Copyright 2013 Cloudera Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.kitesdk.morphline.solr;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.TreeMap;

import org.apache.solr.schema.IndexSchema;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.AbstractCommand;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.typesafe.config.Config;

 * Command that sanitizes record fields that are unknown to Solr schema.xml by either deleting them
 * (renameToPrefix is absent or a zero length string), or by moving them to a field prefixed with
 * the given renameToPrefix (e.g. renameToPrefix = "ignored_" to use typical dynamic Solr fields).
 * <p>
 * Recall that Solr throws an exception on any attempt to load a document that contains a field that
 * isn't specified in schema.xml.
public final class SanitizeUnknownSolrFieldsBuilder implements CommandBuilder {

  public Collection<String> getNames() {
    return Collections.singletonList("sanitizeUnknownSolrFields");

  public Command build(Config config, Command parent, Command child, MorphlineContext context) {
    return new SanitizeUnknownSolrFields(this, config, parent, child, context);
  // Nested classes:
  private static final class SanitizeUnknownSolrFields extends AbstractCommand {
    private final IndexSchema schema;
    private final String renameToPrefix;
    public SanitizeUnknownSolrFields(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
      super(builder, config, parent, child, context);      
      Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
      SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
      LOG.debug("solrLocator: {}", locator);
      this.schema = locator.getIndexSchema();
      LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values()));
      String str = getConfigs().getString(config, "renameToPrefix", "").trim();
      this.renameToPrefix = str.length() > 0 ? str : null;  
    protected boolean doProcess(Record record) {
      Collection<Map.Entry> entries = new ArrayList<Map.Entry>(record.getFields().asMap().entrySet());
      for (Map.Entry<String, Collection<Object>> entry : entries) {
        String key = entry.getKey();
        if (schema.getFieldOrNull(key) == null 
            && !LoadSolrBuilder.LOAD_SOLR_DELETE_BY_ID.equals(key)
            && !LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY.equals(key)
            && !LoadSolrBuilder.LOAD_SOLR_CHILD_DOCUMENTS.equals(key)) {
          LOG.debug("Sanitizing unknown Solr field: {}", key);
          Collection values = entry.getValue();
          if (renameToPrefix != null) {
            record.getFields().putAll(renameToPrefix + key, values);
          values.clear(); // implicitly removes key from record
      // pass record to next command in chain:
      return super.doProcess(record);