/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.pinterest.secor.util.orc;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
import com.pinterest.secor.util.BackOffUtil;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.TypeDescription;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 
 * @author Ashish ([email protected])
 *
 */
public class VectorColumnFiller {
    private static final Logger LOG = LoggerFactory.getLogger(VectorColumnFiller.class);

    public interface JsonConverter {
        void convert(JsonElement value, ColumnVector vect, int row);
    }

    static class BooleanColumnConverter implements JsonConverter {
        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                LongColumnVector vector = (LongColumnVector) vect;
                vector.vector[row] = value.getAsBoolean() ? 1 : 0;
            }
        }
    }

    static class LongColumnConverter implements JsonConverter {
        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                LongColumnVector vector = (LongColumnVector) vect;
                vector.vector[row] = value.getAsLong();
            }
        }
    }

    static class DoubleColumnConverter implements JsonConverter {
        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                DoubleColumnVector vector = (DoubleColumnVector) vect;
                vector.vector[row] = value.getAsDouble();
            }
        }
    }

    static class StringColumnConverter implements JsonConverter {
        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                BytesColumnVector vector = (BytesColumnVector) vect;
                byte[] bytes = value.getAsString().getBytes(
                        StandardCharsets.UTF_8);
                vector.setRef(row, bytes, 0, bytes.length);
            }
        }
    }

    static class BinaryColumnConverter implements JsonConverter {
        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                BytesColumnVector vector = (BytesColumnVector) vect;
                String binStr = value.getAsString();
                byte[] bytes = new byte[binStr.length() / 2];
                for (int i = 0; i < bytes.length; ++i) {
                    bytes[i] = (byte) Integer.parseInt(
                            binStr.substring(i * 2, i * 2 + 2), 16);
                }
                vector.setRef(row, bytes, 0, bytes.length);
            }
        }
    }

    static class TimestampColumnConverter implements JsonConverter {
        BackOffUtil back = new BackOffUtil(true);

        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                if (value.getAsJsonPrimitive().isString()) {
                    TimestampColumnVector vector = (TimestampColumnVector) vect;
                    vector.set(
                            row,
                            Timestamp.valueOf(value.getAsString().replaceAll(
                                    "[TZ]", " ")));
                } else if (value.getAsJsonPrimitive().isNumber()) {
                    TimestampColumnVector vector = (TimestampColumnVector) vect;
                    vector.set(
                            row,
                            new Timestamp(value.getAsLong()));
                } else {
                    if (!back.isBackOff()) {
                        LOG.warn("Timestamp is neither string nor number: {}", value);
                    }
                    vect.noNulls = false;
                    vect.isNull[row] = true;
                }
            }
        }
    }

    static class DecimalColumnConverter implements JsonConverter {
        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                DecimalColumnVector vector = (DecimalColumnVector) vect;
                vector.vector[row].set(HiveDecimal.create(value.getAsString()));
            }
        }
    }

    static class MapColumnConverter implements JsonConverter {
        private JsonConverter[] childConverters;

        public MapColumnConverter(TypeDescription schema) {
            assertKeyType(schema);

            List<TypeDescription> childTypes = schema.getChildren();
            childConverters = new JsonConverter[childTypes.size()];
            for (int c = 0; c < childConverters.length; ++c) {
                childConverters[c] = createConverter(childTypes.get(c));
            }
        }

        /**
         * Rejects non-string keys. This is a limitation imposed by JSON specifications that only allows strings
         * as keys.
         */
        private void assertKeyType(TypeDescription schema) {
            // NOTE: It may be tempting to ensure that schema.getChildren() returns at least one child here, but the
            // validity of an ORC schema is ensured by TypeDescription. Malformed ORC schema could be a concern.
            // For example, an ORC schema of `map<>` may produce a TypeDescription instance with no child. However,
            // TypeDescription.fromString() rejects any malformed ORC schema and therefore we may assume only valid
            // ORC schema will make to this point.
            TypeDescription keyType = schema.getChildren().get(0);
            String keyTypeName = keyType.getCategory().getName();
            if (!keyTypeName.equalsIgnoreCase("string")) {
                throw new IllegalArgumentException(
                        String.format("Unsupported key type: %s", keyTypeName));
            }
        }

        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                MapColumnVector vector = (MapColumnVector) vect;
                JsonObject obj = value.getAsJsonObject();
                vector.lengths[row] = obj.size();
                vector.offsets[row] = row > 0 ? vector.offsets[row - 1] + vector.lengths[row - 1] : 0;

                // Ensure enough space is available to store the keys and the values
                vector.keys.ensureSize((int) vector.offsets[row] + obj.size(), true);
                vector.values.ensureSize((int) vector.offsets[row] + obj.size(), true);

                int i = 0;
                for (String key : obj.keySet()) {
                    childConverters[0].convert(new JsonPrimitive(key), vector.keys, (int) vector.offsets[row] + i);
                    childConverters[1].convert(obj.get(key), vector.values, (int) vector.offsets[row] + i);
                    i++;
                }
            }
        }
    }

    /**
     * The primary challenge here is that available type information at the time of class instantiation and at the
     * time of invocation of {@code convert()} is different. We have exact type information when
     * {@code UnionColumnConverter} is instantiated, as it is given as {@code TypeDescription} which represents an
     * ORC schema. Conversely, when {@code convert()} method is called, limited type information is available because
     * JSON supports three primitive types only: boolean, number, and string.
     *
     * The proposed solution for this issue is to register appropriate converters at the time of instantiation with
     * a matching {@code ColumnVector} index. Note that {@code UnionColumnVector} has child column vectors to support
     * each of its child type.
     */
    static class UnionColumnConverter implements JsonConverter {

        private enum JsonType {
            NULL, BOOLEAN, NUMBER, STRING, ARRAY, OBJECT
        }

        // TODO: Could we come up with a better name?
        private class ConverterInfo {
            private int vectorIndex;
            private JsonConverter converter;

            public ConverterInfo(int vectorIndex, JsonConverter converter) {
                this.vectorIndex = vectorIndex;
                this.converter = converter;
            }

            public int getVectorIndex() {
                return vectorIndex;
            }

            public JsonConverter getConverter() {
                return converter;
            }
        }

        /**
         * Union type in ORC is essentially a collection of two or more non-compatible types,
         * and it is represented by multiple child columns under UnionColumnVector.
         * Thus we need converters for each type.
         */
        private Map<JsonType, ConverterInfo> childConverters = new HashMap<>();

        public UnionColumnConverter(TypeDescription schema) {
            List<TypeDescription> children = schema.getChildren();
            int index = 0;
            for (TypeDescription childType : children) {
                JsonType jsonType = getJsonType(childType.getCategory());
                JsonConverter converter = createConverter(childType);
                // FIXME: Handle cases where childConverters is pre-occupied with the same mask
                childConverters.put(jsonType, new ConverterInfo(index++, converter));
            }
        }

        private JsonType getJsonType(TypeDescription.Category category) {
            switch (category) {
                case BOOLEAN:
                    return JsonType.BOOLEAN;
                case BYTE:
                case SHORT:
                case INT:
                case LONG:
                case FLOAT:
                case DOUBLE:
                case DECIMAL:
                    return JsonType.NUMBER;
                case CHAR:
                case VARCHAR:
                case STRING:
                    return JsonType.STRING;
                default:
                    throw new UnsupportedOperationException();
            }
        }

        private JsonType getJsonType(JsonPrimitive value) {
            if (value.isBoolean()) {
                return JsonType.BOOLEAN;
            } else if (value.isNumber()) {
                return JsonType.NUMBER;
            } else if (value.isString()) {
                return JsonType.STRING;
            } else {
                throw new UnsupportedOperationException();
            }
        }

        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else if (value.isJsonPrimitive()) {
                UnionColumnVector vector = (UnionColumnVector) vect;
                JsonPrimitive primitive = value.getAsJsonPrimitive();

                JsonType jsonType = getJsonType(primitive);
                ConverterInfo converterInfo = childConverters.get(jsonType);
                if (converterInfo == null) {
                    String message = String.format("Unable to infer type for '%s'", primitive);
                    throw new IllegalArgumentException(message);
                }

                int vectorIndex = converterInfo.getVectorIndex();
                JsonConverter converter = converterInfo.getConverter();
                vector.tags[row] = vectorIndex;
                converter.convert(value, vector.fields[vectorIndex], row);
            } else {
                // It would be great to support non-primitive types in union type.
                // Let's leave this for another PR in the future.
                throw new UnsupportedOperationException();
            }
        }
    }

    static class StructColumnConverter implements JsonConverter {
        private JsonConverter[] childrenConverters;
        private List<String> fieldNames;

        public StructColumnConverter(TypeDescription schema) {
            List<TypeDescription> kids = schema.getChildren();
            childrenConverters = new JsonConverter[kids.size()];
            for (int c = 0; c < childrenConverters.length; ++c) {
                childrenConverters[c] = createConverter(kids.get(c));
            }
            fieldNames = schema.getFieldNames();
        }

        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                StructColumnVector vector = (StructColumnVector) vect;
                JsonObject obj = value.getAsJsonObject();
                for (int c = 0; c < childrenConverters.length; ++c) {
                    JsonElement elem = obj.get(fieldNames.get(c));
                    childrenConverters[c].convert(elem, vector.fields[c], row);
                }
            }
        }
    }

    static class ListColumnConverter implements JsonConverter {
        private JsonConverter childrenConverter;

        public ListColumnConverter(TypeDescription schema) {
            childrenConverter = createConverter(schema.getChildren().get(0));
        }

        public void convert(JsonElement value, ColumnVector vect, int row) {
            if (value == null || value.isJsonNull()) {
                vect.noNulls = false;
                vect.isNull[row] = true;
            } else {
                ListColumnVector vector = (ListColumnVector) vect;
                JsonArray obj = value.getAsJsonArray();
                vector.lengths[row] = obj.size();
                vector.offsets[row] = vector.childCount;
                vector.childCount += vector.lengths[row];
                vector.child.ensureSize(vector.childCount, true);
                for (int c = 0; c < obj.size(); ++c) {
                    childrenConverter.convert(obj.get(c), vector.child,
                            (int) vector.offsets[row] + c);
                }
            }
        }
    }

    public static JsonConverter createConverter(TypeDescription schema) {
        switch (schema.getCategory()) {
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            return new LongColumnConverter();
        case FLOAT:
        case DOUBLE:
            return new DoubleColumnConverter();
        case CHAR:
        case VARCHAR:
        case STRING:
            return new StringColumnConverter();
        case DECIMAL:
            return new DecimalColumnConverter();
        case TIMESTAMP:
            return new TimestampColumnConverter();
        case BINARY:
            return new BinaryColumnConverter();
        case BOOLEAN:
            return new BooleanColumnConverter();
        case STRUCT:
            return new StructColumnConverter(schema);
        case LIST:
            return new ListColumnConverter(schema);
        case MAP:
            return new MapColumnConverter(schema);
        case UNION:
            return new UnionColumnConverter(schema);
        default:
            throw new IllegalArgumentException("Unhandled type " + schema);
        }
    }

    public static void fillRow(int rowIndex, JsonConverter[] converters,
            TypeDescription schema, VectorizedRowBatch batch, JsonObject data) {
        List<String> fieldNames = schema.getFieldNames();
        for (int c = 0; c < converters.length; ++c) {
            JsonElement field = data.get(fieldNames.get(c));
            if (field == null) {
                batch.cols[c].noNulls = false;
                batch.cols[c].isNull[rowIndex] = true;
            } else {
                converters[c].convert(field, batch.cols[c], rowIndex);
            }
        }
    }
}