/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROpPlanVisitor;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.PORelationToExprProject;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.JoinPackager;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODistinct;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSortedDistinct;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.DepthFirstWalker;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;

class POToChange {
    POToChange(PhysicalOperator oper, PhysicalPlan plan, POForEach forEach) {
        this.oper = oper;
        this.plan = plan;
        this.forEach = forEach;
    }

    PhysicalOperator oper;

    PhysicalPlan plan;

    POForEach forEach;
}

public class SecondaryKeyOptimizer extends MROpPlanVisitor {
    private Log log = LogFactory.getLog(getClass());

    private int numMRUseSecondaryKey = 0;

    private int numDistinctChanged = 0;

    private int numSortRemoved = 0;

    /**
     * @param plan
     *            The MROperPlan to visit to discover keyType
     */
    public SecondaryKeyOptimizer(MROperPlan plan) {
        super(plan, new DepthFirstWalker<MapReduceOper, MROperPlan>(plan));
    }

    // Build sort key structure from POLocalRearrange
    SortKeyInfo getSortKeyInfo(POLocalRearrange rearrange) throws ExecException {
        SortKeyInfo result = new SortKeyInfo();
        List<PhysicalPlan> plans = rearrange.getPlans();
        nextPlan: for (int i = 0; i < plans.size(); i++) {
            PhysicalPlan plan = plans.get(i);
            ColumnChainInfo columnChainInfo = new ColumnChainInfo();
            if (plan.getRoots() == null) {
                log.debug("POLocalRearrange plan is null");
                return null;
            } else if (plan.getRoots().size() != 1) {
                // POLocalRearrange plan contains more than 1 root.
                // Probably there is an Expression operator in the local
                // rearrangement plan, skip this plan
                continue nextPlan;
            } else {
                List<Integer> columns = new ArrayList<Integer>();
                columns
                        .add(rearrange.getIndex()
                                & PigNullableWritable.idxSpace);
                
                // The first item inside columnChainInfo is set to type Tuple.
                // This value is not actually in use, but it intends to match
                // the type of POProject in reduce side
                columnChainInfo.insert(columns, DataType.TUPLE);

                PhysicalOperator node = plan.getRoots().get(0);
                while (node != null) {
                    if (node instanceof POProject) {
                        POProject project = (POProject) node;
                        if(project.isProjectToEnd()){
                            columnChainInfo.insert(project.getStartCol(), 
                                    project.getResultType());
                        }else {
                            columnChainInfo.insert(
                                    project.getColumns(), project.getResultType());
                        }
                        
                        if (plan.getSuccessors(node) == null)
                            node = null;
                        else if (plan.getSuccessors(node).size() != 1) {
                            log.debug(node + " have more than 1 successor");
                            node = null;
                        } else
                            node = plan.getSuccessors(node).get(0);
                    } else
                        // constant, UDF, we will pass
                        continue nextPlan;
                }
            }
            // Let's assume all main key is sorted ascendant, we can further
            // optimize it to match one of the nested sort/distinct key, because we do not
            // really care about how cogroup key are sorted; But it may not be the case
            // if sometime we switch all the comparator to byte comparator, so just
            // leave it as it is for now
            result.insertColumnChainInfo(i, columnChainInfo, true);
        }
        return result;
    }

    @Override
    public void visitMROp(MapReduceOper mr) throws VisitorException {
        log.trace("Entering SecondaryKeyOptimizer.visitMROp, skip optimizing");
        List<SortKeyInfo> sortKeyInfos = new ArrayList<SortKeyInfo>();
        SortKeyInfo secondarySortKeyInfo = null;
        List<POToChange> sortsToRemove = null;
        List<POToChange> distinctsToChange = null;

        // Only optimize for Cogroup case
        if (mr.isGlobalSort())
            return;

        // Don't optimize when we already have a custom partitioner
        if (mr.getCustomPartitioner()!=null)
            return;

        List<PhysicalOperator> mapLeaves = mr.mapPlan.getLeaves();
        if (mapLeaves == null || mapLeaves.size() != 1) {
            log
                    .debug("Expected map to have single leaf! Skip secondary key optimizing");
            return;
        }
        PhysicalOperator mapLeaf = mapLeaves.get(0);

        // Figure out the main key of the map-reduce job from POLocalRearrange
        try {
            if (mapLeaf instanceof POLocalRearrange) {
                SortKeyInfo sortKeyInfo = getSortKeyInfo((POLocalRearrange) mapLeaf);
                if (sortKeyInfo == null) {
                    log
                            .debug("Cannot get sortKeyInfo from POLocalRearrange, skip secondary key optimizing");
                    return;
                }
                sortKeyInfos.add(sortKeyInfo);
            } else if (mapLeaf instanceof POUnion) {
                List<PhysicalOperator> preds = mr.mapPlan
                        .getPredecessors(mapLeaf);
                for (PhysicalOperator pred : preds) {
                    if (pred instanceof POLocalRearrange) {
                        SortKeyInfo sortKeyInfo = getSortKeyInfo((POLocalRearrange) pred);
                        if (sortKeyInfo == null) {
                            log.debug("Cannot get sortKeyInfo from POLocalRearrange, skip secondary key optimizing");
                            return;
                        }
                        sortKeyInfos.add(sortKeyInfo);
                    }
                }
            } else {
                log.debug("Cannot find POLocalRearrange or POUnion in map leaf, skip secondary key optimizing");
                return;
            }
        } catch (ExecException e) {
            log
                    .debug("Cannot get sortKeyInfo from POLocalRearrange, skip secondary key optimizing");
            return;
        }

        if (mr.reducePlan.isEmpty()) {
            log.debug("Reduce plan is empty, skip secondary key optimizing");
            return;
        }

        List<PhysicalOperator> reduceRoots = mr.reducePlan.getRoots();
        if (reduceRoots.size() != 1) {
            log
                    .debug("Expected reduce to have single root, skip secondary key optimizing");
            return;
        }

        PhysicalOperator root = reduceRoots.get(0);
        if (!(root instanceof POPackage)) {
            log
                    .debug("Expected reduce root to be a POPackage, skip secondary key optimizing");
            return;
        }

        // visit the POForEach of the reduce plan. We can have Limit and Filter
        // in the middle
        PhysicalOperator currentNode = root;
        POForEach foreach = null;
        while (currentNode != null) {
            if (currentNode instanceof POPackage
                    && !(((POPackage) currentNode).getPkgr() instanceof JoinPackager)
                    || currentNode instanceof POFilter
                    || currentNode instanceof POLimit) {
                List<PhysicalOperator> succs = mr.reducePlan
                        .getSuccessors(currentNode);
                if (succs == null) // We didn't find POForEach
                    return;
                if (succs.size() != 1) {
                    log.debug("See multiple output for " + currentNode
                            + " in reduce plan, skip secondary key optimizing");
                    return;
                }
                currentNode = succs.get(0);
            } else if (currentNode instanceof POForEach) {
                foreach = (POForEach) currentNode;
                break;
            } else { // Skip optimization
                return;
            }
        }
        
        // We do not find a foreach (we shall not come here, a trick to fool findbugs)
        if (foreach==null)
            return;

        sortsToRemove = new ArrayList<POToChange>();
        distinctsToChange = new ArrayList<POToChange>();

        for (PhysicalPlan innerPlan : foreach.getInputPlans()) {
            // visit inner plans to figure out the sort order for distinct /
            // sort
            SecondaryKeyDiscover innerPlanDiscover = new SecondaryKeyDiscover(
                    innerPlan, sortKeyInfos, secondarySortKeyInfo);
            try {
                innerPlanDiscover.process();
            } catch (FrontendException e) {
                int errorCode = 2213;
                throw new VisitorException("Error visiting inner plan for ForEach", errorCode, e);
            }
            secondarySortKeyInfo = innerPlanDiscover.getSecondarySortKeyInfo();
            if (innerPlanDiscover.getSortsToRemove() != null) {
                for (POSort sort : innerPlanDiscover.getSortsToRemove()) {
                    sortsToRemove.add(new POToChange(sort, innerPlan, foreach));
                }
            }
            if (innerPlanDiscover.getDistinctsToChange() != null) {
                for (PODistinct distinct : innerPlanDiscover
                        .getDistinctsToChange()) {
                    distinctsToChange.add(new POToChange(distinct, innerPlan,
                            foreach));
                }
            }
        }

        try {
            // Change PODistinct to use POSortedDistinct, which assume the input
            // data is sorted
            for (POToChange distinctToChange : distinctsToChange) {
                numDistinctChanged++;
                PODistinct oldDistinct = (PODistinct) distinctToChange.oper;
                String scope = oldDistinct.getOperatorKey().scope;
                POSortedDistinct newDistinct = new POSortedDistinct(
                        new OperatorKey(scope, NodeIdGenerator.getGenerator()
                                .getNextNodeId(scope)), oldDistinct
                                .getRequestedParallelism(), oldDistinct
                                .getInputs());
                newDistinct.setInputs(oldDistinct.getInputs());
                newDistinct.setResultType(oldDistinct.getResultType());
                distinctToChange.plan.replace(oldDistinct, newDistinct);
                distinctToChange.forEach.getLeaves();
            }
            // Removed POSort, if the successor require a databag, we need to
            // add a PORelationToExprProject
            // to convert tuples into databag
            for (POToChange sortToRemove : sortsToRemove) {
                numSortRemoved++;
                POSort oldSort = (POSort) sortToRemove.oper;
                String scope = oldSort.getOperatorKey().scope;
                List<PhysicalOperator> preds = sortToRemove.plan
                        .getPredecessors(sortToRemove.oper);
                List<PhysicalOperator> succs = sortToRemove.plan
                .getSuccessors(sortToRemove.oper);
                POProject project = null;
                if ((preds == null
                        || preds.get(0).getResultType() != DataType.BAG
                        && oldSort.getResultType() == DataType.BAG) // sort to remove do change the result type
                        && (succs == null || !(succs.get(0) instanceof PORelationToExprProject))) // successor is not PORelationToExprProject
                {
                    project = new PORelationToExprProject(new OperatorKey(
                            scope, NodeIdGenerator.getGenerator()
                                    .getNextNodeId(scope)), oldSort
                            .getRequestedParallelism());
                    project.setInputs(oldSort.getInputs());
                    project.setResultType(DataType.BAG);
                    project.setStar(true);
                }
                if (project == null)
                    sortToRemove.plan.removeAndReconnect(sortToRemove.oper);
                else
                    sortToRemove.plan.replace(oldSort, project);
                sortToRemove.forEach.getLeaves();
            }
        } catch (PlanException e) {
            int errorCode = 2202;
            throw new VisitorException(
                    "Error change distinct/sort to use secondary key optimizer",
                    errorCode, e);
        }
        if (secondarySortKeyInfo != null) {
            // Adjust POLocalRearrange, POPackage, MapReduceOper to use the
            // secondary key
            numMRUseSecondaryKey++;
            mr.setUseSecondaryKey(true);
            mr.setSecondarySortOrder(secondarySortKeyInfo.getAscs());
            int indexOfRearrangeToChange = -1;
            for (ColumnChainInfo columnChainInfo : secondarySortKeyInfo
                    .getColumnChains()) {
                ColumnInfo currentColumn = columnChainInfo.getColumnInfos()
                        .get(0);
                int index = currentColumn.columns.get(0);
                if (indexOfRearrangeToChange == -1)
                    indexOfRearrangeToChange = index;
                else if (indexOfRearrangeToChange != index) {
                    int errorCode = 2203;
                    throw new VisitorException("Sort on columns from different inputs.", errorCode);
                }
            }
            if (mapLeaf instanceof POLocalRearrange) {
                ((POLocalRearrange) mapLeaf).setUseSecondaryKey(true);
                setSecondaryPlan(mr.mapPlan, (POLocalRearrange) mapLeaf,
                        secondarySortKeyInfo);
            } else if (mapLeaf instanceof POUnion) {
                List<PhysicalOperator> preds = mr.mapPlan
                        .getPredecessors(mapLeaf);
                boolean found = false;
                for (PhysicalOperator pred : preds) {
                    POLocalRearrange rearrange = (POLocalRearrange) pred;
                    rearrange.setUseSecondaryKey(true);
                    if (rearrange.getIndex() == indexOfRearrangeToChange) { 
                        // Try to find the POLocalRearrange for the secondary key
                        found = true;
                        setSecondaryPlan(mr.mapPlan, rearrange, secondarySortKeyInfo);
                    }
                }
                if (!found)
                {
                    int errorCode = 2214;
                    throw new VisitorException("Cannot find POLocalRearrange to set secondary plan", errorCode);
                }
            }
            POPackage pack = (POPackage) root;
            pack.getPkgr().setUseSecondaryKey(true);
        }
    }

    void setSecondaryPlan(PhysicalPlan plan, POLocalRearrange rearrange,
            SortKeyInfo secondarySortKeyInfo) throws VisitorException {
        // Put plan to project secondary key to the POLocalRearrange
        try {
            String scope = rearrange.getOperatorKey().scope;
            List<PhysicalPlan> secondaryPlanList = new ArrayList<PhysicalPlan>();
            for (ColumnChainInfo columnChainInfo : secondarySortKeyInfo
                    .getColumnChains()) {
                PhysicalPlan secondaryPlan = new PhysicalPlan();
                for (int i = 1; i < columnChainInfo.size(); i++) {
                    // The first item in columnChainInfo indicate the index of
                    // input, we have addressed
                    // already before we come here
                    ColumnInfo columnInfo = columnChainInfo.getColumnInfo(i);
                    POProject project = new POProject(
                            new OperatorKey(scope, NodeIdGenerator
                                    .getGenerator().getNextNodeId(scope)),
                            rearrange.getRequestedParallelism());
                    if(columnInfo.isRangeProject)
                        project.setProjectToEnd(columnInfo.startCol);
                    else
                        project
                                .setColumns((ArrayList<Integer>) columnInfo.columns);
                    project.setResultType(columnInfo.resultType);
                    secondaryPlan.addAsLeaf(project);
                }
                if (secondaryPlan.isEmpty()) { // If secondary key sort on the
                                               // input as a whole
                    POProject project = new POProject(
                            new OperatorKey(scope, NodeIdGenerator
                                    .getGenerator().getNextNodeId(scope)),
                            rearrange.getRequestedParallelism());
                    project.setStar(true);
                    secondaryPlan.addAsLeaf(project);
                }
                secondaryPlanList.add(secondaryPlan);
            }
            rearrange.setSecondaryPlans(secondaryPlanList);
        } catch (PlanException e) {
            int errorCode = 2204;
            throw new VisitorException("Error setting secondary key plan",
                    errorCode, e);
        }
    }

    public int getNumMRUseSecondaryKey() {
        return numMRUseSecondaryKey;
    }

    public int getNumSortRemoved() {
        return numSortRemoved;
    }

    public int getDistinctChanged() {
        return numDistinctChanged;
    }

    // Find eligible sort and distinct physical operators from the reduce plan.
    // SecondaryKeyChecker will check for sort/distinct keys (for distinct, it
    // is
    // always the entire bag), if it is the same with the main key, then we can
    // just
    // remove it. If it is not, then we can have 1 secondary sort key, put the
    // first
    // such sort/distinct key as the secondary sort key. For subsequent
    // sort/distinct,
    // we cannot do any secondary key optimization because we only have 1
    // secondary
    // sort key.
    private static class SecondaryKeyDiscover {
        PhysicalPlan mPlan;
        
        List<POSort> sortsToRemove = new ArrayList<POSort>();

        List<PODistinct> distinctsToChange = new ArrayList<PODistinct>();

        List<SortKeyInfo> sortKeyInfos;

        SortKeyInfo secondarySortKeyInfo;

        ColumnChainInfo columnChainInfo = null;

        // PhysicalPlan here is foreach inner plan
        SecondaryKeyDiscover(PhysicalPlan plan,
                List<SortKeyInfo> sortKeyInfos, SortKeyInfo secondarySortKeyInfo) {
            this.mPlan = plan;
            this.sortKeyInfos = sortKeyInfos;
            this.secondarySortKeyInfo = secondarySortKeyInfo;
        }
        
        public void process() throws FrontendException
        {
            List<PhysicalOperator> roots = mPlan.getRoots();
            for (PhysicalOperator root : roots) {
                columnChainInfo = new ColumnChainInfo();
                processRoot(root);
            }
        }
        
        public void processRoot(PhysicalOperator root) throws FrontendException {
            PhysicalOperator currentNode = root;
            while (currentNode!=null) {
                boolean sawInvalidPhysicalOper = false;
                if (currentNode instanceof PODistinct)
                    sawInvalidPhysicalOper = processDistinct((PODistinct)currentNode);
                else if (currentNode instanceof POSort)
                    sawInvalidPhysicalOper = processSort((POSort)currentNode);
                else if (currentNode instanceof POProject)
                    sawInvalidPhysicalOper = processProject((POProject)currentNode);
                else if (currentNode instanceof POUserFunc ||
                         currentNode instanceof POUnion ||
                         // We don't process foreach, since foreach is too complex to get right
                         currentNode instanceof POForEach)
                    break;
                
                if (sawInvalidPhysicalOper)
                    break;
                
                List<PhysicalOperator> succs = mPlan.getSuccessors(currentNode);
                if (succs==null)
                    currentNode = null;
                else {
                    if (succs.size()>1) {
                        int errorCode = 2215;
                        throw new FrontendException("See more than 1 successors in the nested plan for "+currentNode,
                                errorCode);
                    }
                    currentNode = succs.get(0);
                }
            }
        }

        // We see PODistinct, check which key it is using
        public boolean processDistinct(PODistinct distinct) throws FrontendException {
            SortKeyInfo keyInfos = new SortKeyInfo();
            try {
                keyInfos.insertColumnChainInfo(0,
                        (ColumnChainInfo) columnChainInfo.clone(), true);
            } catch (CloneNotSupportedException e) { // We implement Clonable,
                                                     // impossible to get here
            }

            // if it is part of main key
            for (SortKeyInfo sortKeyInfo : sortKeyInfos) {
                if (sortKeyInfo.moreSpecificThan(keyInfos)) {
                    distinctsToChange.add(distinct);
                    return false;
                }
            }

            // if it is part of secondary key
            if (secondarySortKeyInfo != null
                    && secondarySortKeyInfo.moreSpecificThan(keyInfos)) {
                distinctsToChange.add(distinct);
                return false;
            }

            // Now set the secondary key
            if (secondarySortKeyInfo == null) {
                distinctsToChange.add(distinct);
                secondarySortKeyInfo = keyInfos;
            }
            return false;
        }

        // Accumulate column info
        public boolean processProject(POProject project) throws FrontendException {
            columnChainInfo.insertInReduce(project);
            return false;
        }

        // We see POSort, check which key it is using
        public boolean processSort(POSort sort) throws FrontendException{
            SortKeyInfo keyInfo = new SortKeyInfo();
            for (int i = 0; i < sort.getSortPlans().size(); i++) {
                PhysicalPlan sortPlan = sort.getSortPlans().get(i);
                ColumnChainInfo sortChainInfo = null;
                try {
                    sortChainInfo = (ColumnChainInfo) columnChainInfo.clone();
                } catch (CloneNotSupportedException e) { // We implement
                                                         // Clonable, impossible
                                                         // to get here
                }
                boolean r = false;
                try {
                    r = collectColumnChain(sortPlan, sortChainInfo);
                } catch (PlanException e) {
                    int errorCode = 2206;
                    throw new FrontendException("Error visiting POSort inner plan",
                            errorCode, e);
                }
                if (r==true) // if we saw physical operator other than project in sort plan
                {
                    return true;
                }
                keyInfo.insertColumnChainInfo(i, sortChainInfo, sort
                        .getMAscCols().get(i));
            }
            // if it is part of main key
            for (SortKeyInfo sortKeyInfo : sortKeyInfos) {
                if (sortKeyInfo.moreSpecificThan(keyInfo)) {
                    sortsToRemove.add(sort);
                    return false;
                }
            }
            // if it is part of secondary key
            if (secondarySortKeyInfo != null
                    && secondarySortKeyInfo.moreSpecificThan(keyInfo)) {
                sortsToRemove.add(sort);
                return false;
            }

            // Now set the secondary key
            if (secondarySortKeyInfo == null) {
                sortsToRemove.add(sort);
                secondarySortKeyInfo = keyInfo;
            }
            return false;
        }

        public List<POSort> getSortsToRemove() {
            return sortsToRemove;
        }

        public List<PODistinct> getDistinctsToChange() {
            return distinctsToChange;
        }

        public SortKeyInfo getSecondarySortKeyInfo() {
            return secondarySortKeyInfo;
        }
    }

    // Return true if we saw physical operators other than project in the plan
    static private boolean collectColumnChain(PhysicalPlan plan,
            ColumnChainInfo columnChainInfo) throws PlanException {
        if (plan.getRoots().size() != 1) {
        	return true;
        }

        PhysicalOperator currentNode = plan.getRoots().get(0);

        while (currentNode != null) {
            if (currentNode instanceof POProject) {
                POProject project = (POProject) currentNode;
                columnChainInfo.insertInReduce(project);
            } else {
                return true;
            }
            List<PhysicalOperator> succs = plan.getSuccessors(currentNode);
            if (succs == null)
                break;
            if (succs.size() != 1) {
                int errorCode = 2208;
                throw new PlanException(
                        "Exception visiting foreach inner plan", errorCode);
            }
            currentNode = succs.get(0);
        }
        return false;
    }
}