/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.calrissian.accumulorecipes.commons.iterators;

import static java.util.Arrays.copyOfRange;
import static org.apache.commons.lang.StringUtils.join;
import static org.apache.commons.lang.StringUtils.splitPreserveAllTokens;
import static org.calrissian.accumulorecipes.commons.iterators.support.EventFields.initializeKryo;
import static org.calrissian.accumulorecipes.commons.support.Constants.NULL_BYTE;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.ByteBufferOutput;
import com.google.common.collect.Sets;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.PartialKey;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.OptionDescriber;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.commons.jexl2.parser.ParseException;
import org.apache.log4j.Logger;
import org.calrissian.accumulorecipes.commons.iterators.support.EventFields;
import org.calrissian.accumulorecipes.commons.iterators.support.QueryEvaluator;

/**
 * This iterator aggregates rows together using the specified key comparator. Subclasses will provide their own implementation of fillMap which will fill the
 * supplied EventFields object with field names (key) and field values (value). After all fields have been put into the aggregated object (by aggregating all
 * columns with the same key), the EventFields object will be compared against the supplied expression. If the expression returns true, then the return key and
 * return value can be retrieved via getTopKey() and getTopValue().
 * <p/>
 * Optionally, the caller can set an expression (field operator value) that should not be evaluated against the event. For example, if the criteria is
 * "A == 'foo' and B == 'bar'", but for some reason B may not be in the data, then setting the UNEVALUATED_EXPRESSIONS option to "B == 'bar'" will allow the
 * events to be evaluated against the remainder of the expression and still return as true.
 * <p/>
 * By default this iterator will return all Events in the shard. If the START_DATE and END_DATE are specified, then this iterator will evaluate the timestamp of
 * the key against the start and end dates. If the event date is not within the range of start to end, then it is skipped.
 * <p/>
 * This iterator will return up the stack an EventFields object serialized using Kryo in the cell Value.
 */
public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterator<Key, Value>, OptionDescriber {

    public static final String QUERY_OPTION = "expr";
    public static final String UNEVALUTED_EXPRESSIONS = "unevaluated.expressions";
    protected static final byte[] EMPTY_BYTE = new byte[0];
    private static Logger log = Logger.getLogger(AbstractEvaluatingIterator.class);
    private static Kryo kryo = new Kryo();
    protected SortedKeyValueIterator<Key,Value> iterator;
    private PartialKey comparator = null;
    private Key currentKey = new Key();
    private Key returnKey;
    private Value returnValue;
    private String expression;
    private QueryEvaluator evaluator;
    private EventFields event = null;
    private Range seekRange = null;
    private Set<String> skipExpressions = null;

    protected static final String SELECT_FIELDS = "selectFields";
    protected Set<String> selectFields;

    public static void setSelectFields(IteratorSetting is, Set<String> selectFields) {
        is.addOption(SELECT_FIELDS, join(selectFields, NULL_BYTE));
    }

    protected AbstractEvaluatingIterator(AbstractEvaluatingIterator other, IteratorEnvironment env) {
        iterator = other.iterator.deepCopy(env);
        event = other.event;
    }

    public AbstractEvaluatingIterator() {
    }

    /**
     * Copy of IteratorUtil.maximizeStartKeyTimeStamp due to IllegalAccessError
     *
     * @param range
     * @return
     */
    static Range maximizeStartKeyTimeStamp(Range range) {
        Range seekRange = range;

        if (range.getStartKey() != null && range.getStartKey().getTimestamp() != Long.MAX_VALUE) {
            Key seekKey = new Key(seekRange.getStartKey());
            seekKey.setTimestamp(Long.MAX_VALUE);
            seekRange = new Range(seekKey, true, range.getEndKey(), range.isEndKeyInclusive());
        }

        return seekRange;
    }

    /**
     * Implementations will return the PartialKey value to use for comparing keys for aggregating events
     *
     * @return the type of comparator to use
     */
    public abstract PartialKey getKeyComparator();

    /**
     * When the criteria expression evaluates to true against the event, the event fields will be serialized into the Value and returned up the iterator stack.
     * Implemenations will need to provide a key to be used with the event.
     *
     * @param k
     * @return the key that should be returned with the map of values.
     */
    public abstract Key getReturnKey(Key k) throws Exception;

    /**
     * Implementations will need to fill the map with field visibilities, names, and values. When all fields have been aggregated the event will be evaluated
     * against the criteria expression.
     *
     * @param event Multimap of event names and fields.
     * @param key   current Key
     * @param value current Value
     */
    public abstract void fillMap(EventFields event, Key key, Value value) throws Exception;

    /**
     * Provides the ability to skip this key and all of the following ones that match using the comparator.
     *
     * @param key
     * @return true if the key should be acted upon, otherwise false.
     * @throws IOException
     */
    public abstract boolean isKeyAccepted(Key key) throws IOException;

    /**
     * Reset state.
     */
    public void reset() {
        event.clear();
    }

    private void aggregateRowColumn(EventFields event) throws IOException {

        currentKey.set(iterator.getTopKey());

        try {
            fillMap(event, iterator.getTopKey(), iterator.getTopValue());
            iterator.next();

            while (iterator.hasTop() && iterator.getTopKey().equals(currentKey, this.comparator)) {
                fillMap(event, iterator.getTopKey(), iterator.getTopValue());
                iterator.next();
            }

            // Get the return key
            returnKey = getReturnKey(currentKey);
        } catch (Exception e) {
            throw new IOException("Error aggregating event", e);
        }
    }

    private void findTop() throws IOException {
        do {
            reset();
            // check if aggregation is needed
            if (iterator.hasTop()) {
                // Check to see if the current key is accepted. For example in the wiki
                // table there are field index rows. We don't want to process those in
                // some cases so return right away. Consume all of the non-accepted keys
                while (iterator.hasTop() && !isKeyAccepted(iterator.getTopKey())) {
                    iterator.next();
                }

                if (iterator.hasTop()) {
                    aggregateRowColumn(event);

                    // Evaluate the event against the expression
                    if (event.size() > 0 && this.evaluator.evaluate(returnKey, event)) {
                        if (log.isDebugEnabled()) {
                            log.debug("Event evaluated to true, key = " + returnKey);
                        }
                        // Create a byte array
                        byte[] serializedMap = new byte[event.getByteSize() + (event.size() * 20)];
                        // Wrap in ByteBuffer to work with Kryo
                        ByteBuffer buf = ByteBuffer.wrap(serializedMap);
                        // Serialize the EventFields object

                        Set<String> keysToRemove = new HashSet<String>();
                        if(selectFields != null) {
                            for(String field : event.keys()) {
                                if(!selectFields.contains(field))
                                    keysToRemove.add(field);
                            }
                        }

                        for(String field : keysToRemove)
                            event.removeAll(field);

                        event.write(kryo, new ByteBufferOutput(buf), event);
                        // Truncate array to the used size.
                        returnValue = new Value(copyOfRange(serializedMap, 0, buf.position()));
                    } else {
                        returnKey = null;
                        returnValue = null;
                    }
                } else {
                    if (log.isDebugEnabled()) {
                        log.debug("Iterator no longer has top.");
                    }
                }
            } else {
                log.debug("Iterator.hasTop() == false");
            }
        } while (returnValue == null && iterator.hasTop());

        // Sanity check. Make sure both returnValue and returnKey are null or both are not null
        if (!((returnKey == null && returnValue == null) || (returnKey != null && returnValue != null))) {
            log.warn("Key: " + ((returnKey == null) ? "null" : returnKey.toString()));
            log.warn("Value: " + ((returnValue == null) ? "null" : returnValue.toString()));
            throw new IOException("Return values are inconsistent");
        }
    }

    public Key getTopKey() {
        if (returnKey != null) {
            return returnKey;
        }
        return iterator.getTopKey();
    }

    public Value getTopValue() {
        if (returnValue != null) {
            return returnValue;
        }
        return iterator.getTopValue();
    }

    public boolean hasTop() {
        return returnKey != null || iterator.hasTop();
    }

    public void next() throws IOException {
        if (returnKey != null) {
            returnKey = null;
            returnValue = null;
        } else if (iterator.hasTop()) {
            iterator.next();
        }

        findTop();
    }

    public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
        // do not want to seek to the middle of a value that should be
        // aggregated...

        seekRange = maximizeStartKeyTimeStamp(range);

        iterator.seek(seekRange, columnFamilies, inclusive);
        findTop();

        if (range.getStartKey() != null) {
            while (hasTop() && getTopKey().equals(range.getStartKey(), this.comparator) && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) {
                // the value has a more recent time stamp, so
                // pass it up
                // log.debug("skipping "+getTopKey());
                next();
            }

            while (hasTop() && range.beforeStartKey(getTopKey())) {
                next();
            }
        }

    }

    public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
        validateOptions(options);
        event = new EventFields();
        this.comparator = getKeyComparator();
        this.iterator = source;

        String eventFieldsOpt = options.get(SELECT_FIELDS);

        if (eventFieldsOpt != null)
            selectFields = Sets.newHashSet(splitPreserveAllTokens(eventFieldsOpt, NULL_BYTE));

        try {
            // Replace any expressions that we should not evaluate.
            if (null != this.skipExpressions && this.skipExpressions.size() != 0) {
                for (String skip : this.skipExpressions) {
                    // Expression should have form: field<sp>operator<sp>literal.
                    // We are going to replace the expression with field == null.
                    String field = skip.substring(0, skip.indexOf(" ") - 1);
                    this.expression = this.expression.replaceAll(skip, field + " == null");
                }
            }
            this.evaluator = getQueryEvaluator(this.expression);
        } catch (ParseException e) {
            throw new IllegalArgumentException("Failed to parse criteria", e);
        }
        initializeKryo(kryo);
    }

    public IteratorOptions describeOptions() {
        Map<String,String> options = new HashMap<String,String>();
        options.put(QUERY_OPTION, "criteria expression");
        options.put(UNEVALUTED_EXPRESSIONS, "comma separated list of expressions to skip");
        return new IteratorOptions(getClass().getSimpleName(), "evaluates event objects against an expression", options, null);
    }

    public boolean validateOptions(Map<String,String> options) {
        if (!options.containsKey(QUERY_OPTION))
            return false;
        else
            this.expression = options.get(QUERY_OPTION);

        if (options.containsKey(UNEVALUTED_EXPRESSIONS)) {
            String expressionList = options.get(UNEVALUTED_EXPRESSIONS);
            if (expressionList != null && !expressionList.trim().equals("")) {
                this.skipExpressions = Sets.newHashSet();
                for (String e : expressionList.split(","))
                    this.skipExpressions.add(e);
            }
        }
        return true;
    }

    public String getQueryExpression() {
        return this.expression;
    }

    public abstract QueryEvaluator getQueryEvaluator(String expression) throws ParseException;
}