/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kitesdk.data.spi;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.BinaryNode;
import com.fasterxml.jackson.databind.node.BooleanNode;
import com.fasterxml.jackson.databind.node.MissingNode;
import com.fasterxml.jackson.databind.node.NullNode;
import com.fasterxml.jackson.databind.node.NumericNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetRecordException;
import org.kitesdk.data.ValidationException;

public class JsonUtil {

  private static final JsonFactory FACTORY = new JsonFactory();

  public static Iterator<JsonNode> parser(final InputStream stream) {
    try {
      JsonParser parser = FACTORY.createParser(stream);
      parser.setCodec(new ObjectMapper());
      return parser.readValuesAs(JsonNode.class);
    } catch (IOException e) {
      throw new DatasetIOException("Cannot read from stream", e);
    }
  }

  public static JsonNode parse(String json) {
    return parse(json, JsonNode.class);
  }

  public static <T> T parse(String json, Class<T> returnType) {
    ObjectMapper mapper = new ObjectMapper();
    try {
      return mapper.readValue(json, returnType);
    } catch (JsonParseException e) {
      throw new ValidationException("Invalid JSON", e);
    } catch (JsonMappingException e) {
      throw new ValidationException("Invalid JSON", e);
    } catch (IOException e) {
      throw new DatasetIOException("Cannot initialize JSON parser", e);
    }
  }

  public static JsonNode parse(File file) {
    return parse(file, JsonNode.class);
  }

  public static <T> T parse(File file, Class<T> returnType) {
    ObjectMapper mapper = new ObjectMapper();
    try {
      return mapper.readValue(file, returnType);
    } catch (JsonParseException e) {
      throw new ValidationException("Invalid JSON", e);
    } catch (JsonMappingException e) {
      throw new ValidationException("Invalid JSON", e);
    } catch (IOException e) {
      throw new DatasetIOException("Cannot initialize JSON parser", e);
    }
  }

  public static JsonNode parse(InputStream in) {
    return parse(in, JsonNode.class);
  }

  public static <T> T parse(InputStream in, Class<T> returnType) {
    ObjectMapper mapper = new ObjectMapper();
    try {
      return mapper.readValue(in, returnType);
    } catch (JsonParseException e) {
      throw new ValidationException("Invalid JSON", e);
    } catch (JsonMappingException e) {
      throw new ValidationException("Invalid JSON", e);
    } catch (IOException e) {
      throw new DatasetIOException("Cannot initialize JSON parser", e);
    }
  }

  public abstract static class JsonTreeVisitor<T> {
    protected LinkedList<String> recordLevels = Lists.newLinkedList();

    public T object(ObjectNode object, Map<String, T> fields) {
      return null;
    }

    public T array(ArrayNode array, List<T> elements) {
      return null;
    }

    public T binary(BinaryNode binary) {
      return null;
    }

    public T text(TextNode text) {
      return null;
    }

    public T number(NumericNode number) {
      return null;
    }

    public T bool(BooleanNode bool) {
      return null;
    }

    public T missing(MissingNode missing) {
      return null;
    }

    public T nullNode(NullNode nullNode) {
      return null;
    }
  }

  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
      value="BC_UNCONFIRMED_CAST",
      justification="Uses precondition to validate casts")
  public static <T> T visit(JsonNode node, JsonTreeVisitor<T> visitor) {
    switch (node.getNodeType()) {
      case OBJECT:
        Preconditions.checkArgument(node instanceof ObjectNode,
            "Expected instance of ObjectNode: " + node);

        // use LinkedHashMap to preserve field order
        Map<String, T> fields = Maps.newLinkedHashMap();

        Iterator<Map.Entry<String, JsonNode>> iter = node.fields();
        while (iter.hasNext()) {
          Map.Entry<String, JsonNode> entry = iter.next();

          visitor.recordLevels.push(entry.getKey());
          fields.put(entry.getKey(), visit(entry.getValue(), visitor));
          visitor.recordLevels.pop();
        }

        return visitor.object((ObjectNode) node, fields);

      case ARRAY:
        Preconditions.checkArgument(node instanceof ArrayNode,
            "Expected instance of ArrayNode: " + node);

        List<T> elements = Lists.newArrayListWithExpectedSize(node.size());

        for (JsonNode element : node) {
          elements.add(visit(element, visitor));
        }

        return visitor.array((ArrayNode) node, elements);

      case BINARY:
        Preconditions.checkArgument(node instanceof BinaryNode,
            "Expected instance of BinaryNode: " + node);
        return visitor.binary((BinaryNode) node);

      case STRING:
        Preconditions.checkArgument(node instanceof TextNode,
            "Expected instance of TextNode: " + node);

        return visitor.text((TextNode) node);

      case NUMBER:
        Preconditions.checkArgument(node instanceof NumericNode,
            "Expected instance of NumericNode: " + node);

        return visitor.number((NumericNode) node);

      case BOOLEAN:
        Preconditions.checkArgument(node instanceof BooleanNode,
            "Expected instance of BooleanNode: " + node);

        return visitor.bool((BooleanNode) node);

      case MISSING:
        Preconditions.checkArgument(node instanceof MissingNode,
            "Expected instance of MissingNode: " + node);

        return visitor.missing((MissingNode) node);

      case NULL:
        Preconditions.checkArgument(node instanceof NullNode,
            "Expected instance of NullNode: " + node);

        return visitor.nullNode((NullNode) node);

      default:
        throw new IllegalArgumentException(
            "Unknown node type: " + node.getNodeType() + ": " + node);
    }
  }

  public static Object convertToAvro(GenericData model, JsonNode datum,
                                     Schema schema) {
    if (datum == null) {
      return null;
    }
    switch (schema.getType()) {
      case RECORD:
        DatasetRecordException.check(datum.isObject(),
            "Cannot convert non-object to record: %s", datum);
        Object record = model.newRecord(null, schema);
        for (Schema.Field field : schema.getFields()) {
          model.setField(record, field.name(), field.pos(),
              convertField(model, datum.get(field.name()), field));
        }
        return record;

      case MAP:
        DatasetRecordException.check(datum.isObject(),
            "Cannot convert non-object to map: %s", datum);
        Map<String, Object> map = Maps.newLinkedHashMap();
        Iterator<Map.Entry<String, JsonNode>> iter = datum.fields();
        while (iter.hasNext()) {
          Map.Entry<String, JsonNode> entry = iter.next();
          map.put(entry.getKey(), convertToAvro(
              model, entry.getValue(), schema.getValueType()));
        }
        return map;

      case ARRAY:
        DatasetRecordException.check(datum.isArray(),
            "Cannot convert to array: %s", datum);
        List<Object> list = Lists.newArrayListWithExpectedSize(datum.size());
        for (JsonNode element : datum) {
          list.add(convertToAvro(model, element, schema.getElementType()));
        }
        return list;

      case UNION:
        return convertToAvro(model, datum,
            resolveUnion(datum, schema.getTypes()));

      case BOOLEAN:
        DatasetRecordException.check(datum.isBoolean(),
            "Cannot convert to boolean: %s", datum);
        return datum.booleanValue();

      case FLOAT:
        DatasetRecordException.check(datum.isFloat() || datum.isInt(),
            "Cannot convert to float: %s", datum);
        return datum.floatValue();

      case DOUBLE:
        DatasetRecordException.check(
            datum.isDouble() || datum.isFloat() ||
            datum.isLong() || datum.isInt(),
            "Cannot convert to double: %s", datum);
        return datum.doubleValue();

      case INT:
        DatasetRecordException.check(datum.isInt(),
            "Cannot convert to int: %s", datum);
        return datum.intValue();

      case LONG:
        DatasetRecordException.check(datum.isLong() || datum.isInt(),
            "Cannot convert to long: %s", datum);
        return datum.longValue();

      case STRING:
        DatasetRecordException.check(datum.isTextual(),
            "Cannot convert to string: %s", datum);
        return datum.textValue();

      case ENUM:
        DatasetRecordException.check(datum.isTextual(),
            "Cannot convert to string: %s", datum);
        return model.createEnum(datum.textValue(), schema);

      case BYTES:
        DatasetRecordException.check(datum.isBinary(),
            "Cannot convert to binary: %s", datum);
        try {
          return ByteBuffer.wrap(datum.binaryValue());
        } catch (IOException e) {
          throw new DatasetRecordException("Failed to read JSON binary", e);
        }

      case FIXED:
        DatasetRecordException.check(datum.isBinary(),
            "Cannot convert to fixed: %s", datum);
        byte[] bytes;
        try {
          bytes = datum.binaryValue();
        } catch (IOException e) {
          throw new DatasetRecordException("Failed to read JSON binary", e);
        }
        DatasetRecordException.check(bytes.length < schema.getFixedSize(),
            "Binary data is too short: %s bytes for %s", bytes.length, schema);
        return model.createFixed(null, bytes, schema);

      case NULL:
        return null;

      default:
        // don't use DatasetRecordException because this is a Schema problem
        throw new IllegalArgumentException("Unknown schema type: " + schema);
    }
  }

  private static Object convertField(GenericData model, JsonNode datum,
                                     Schema.Field field) {
    try {
      Object value = convertToAvro(model, datum, field.schema());
      if (value != null || SchemaUtil.nullOk(field.schema())) {
        return value;
      } else {
        return model.getDefaultValue(field);
      }
    } catch (DatasetRecordException e) {
      // add the field name to the error message
      throw new DatasetRecordException(String.format(
          "Cannot convert field %s", field.name()), e);
    } catch (AvroRuntimeException e) {
      throw new DatasetRecordException(String.format(
          "Field %s: cannot make %s value: '%s'",
          field.name(), field.schema(), String.valueOf(datum)), e);
    }
  }

  private static Schema resolveUnion(JsonNode datum, Collection<Schema> schemas) {
    Set<Schema.Type> primitives = Sets.newHashSet();
    List<Schema> others = Lists.newArrayList();
    for (Schema schema : schemas) {
      if (PRIMITIVES.containsKey(schema.getType())) {
        primitives.add(schema.getType());
      } else {
        others.add(schema);
      }
    }

    // Try to identify specific primitive types
    Schema primitiveSchema = null;
    if (datum == null || datum.isNull()) {
      primitiveSchema = closestPrimitive(primitives, Schema.Type.NULL);
    } else if (datum.isShort() || datum.isInt()) {
      primitiveSchema = closestPrimitive(primitives,
          Schema.Type.INT, Schema.Type.LONG,
          Schema.Type.FLOAT, Schema.Type.DOUBLE);
    } else if (datum.isLong()) {
      primitiveSchema = closestPrimitive(primitives,
          Schema.Type.LONG, Schema.Type.DOUBLE);
    } else if (datum.isFloat()) {
      primitiveSchema = closestPrimitive(primitives,
          Schema.Type.FLOAT, Schema.Type.DOUBLE);
    } else if (datum.isDouble()) {
      primitiveSchema = closestPrimitive(primitives, Schema.Type.DOUBLE);
    } else if (datum.isBoolean()) {
      primitiveSchema = closestPrimitive(primitives, Schema.Type.BOOLEAN);
    }

    if (primitiveSchema != null) {
      return primitiveSchema;
    }

    // otherwise, select the first schema that matches the datum
    for (Schema schema : others) {
      if (matches(datum, schema)) {
        return schema;
      }
    }

    throw new DatasetRecordException(String.format(
        "Cannot resolve union: %s not in %s", datum, schemas));
  }

  // this does not contain string, bytes, or fixed because the datum type
  // doesn't necessarily determine the schema.
  private static ImmutableMap<Schema.Type, Schema> PRIMITIVES = ImmutableMap
      .<Schema.Type, Schema>builder()
      .put(Schema.Type.NULL, Schema.create(Schema.Type.NULL))
      .put(Schema.Type.BOOLEAN, Schema.create(Schema.Type.BOOLEAN))
      .put(Schema.Type.INT, Schema.create(Schema.Type.INT))
      .put(Schema.Type.LONG, Schema.create(Schema.Type.LONG))
      .put(Schema.Type.FLOAT, Schema.create(Schema.Type.FLOAT))
      .put(Schema.Type.DOUBLE, Schema.create(Schema.Type.DOUBLE))
      .build();

  private static Schema closestPrimitive(Set<Schema.Type> possible, Schema.Type... types) {
    for (Schema.Type type : types) {
      if (possible.contains(type) && PRIMITIVES.containsKey(type)) {
        return PRIMITIVES.get(type);
      }
    }
    return null;
  }

  private static boolean matches(JsonNode datum, Schema schema) {
    switch (schema.getType()) {
      case RECORD:
        if (datum.isObject()) {
          // check that each field is present or has a default
          boolean missingField = false;
          for (Schema.Field field : schema.getFields()) {
            if (!datum.has(field.name()) && field.defaultValue() == null) {
              missingField = true;
              break;
            }
          }
          if (!missingField) {
            return true;
          }
        }
        break;
      case UNION:
        if (resolveUnion(datum, schema.getTypes()) != null) {
          return true;
        }
        break;
      case MAP:
        if (datum.isObject()) {
          return true;
        }
        break;
      case ARRAY:
        if (datum.isArray()) {
          return true;
        }
        break;
      case BOOLEAN:
        if (datum.isBoolean()) {
          return true;
        }
        break;
      case FLOAT:
        if (datum.isFloat() || datum.isInt()) {
          return true;
        }
        break;
      case DOUBLE:
        if (datum.isDouble() || datum.isFloat() ||
            datum.isLong() || datum.isInt()) {
          return true;
        }
        break;
      case INT:
        if (datum.isInt()) {
          return true;
        }
        break;
      case LONG:
        if (datum.isLong() || datum.isInt()) {
          return true;
        }
        break;
      case STRING:
        if (datum.isTextual()) {
          return true;
        }
        break;
      case ENUM:
        if (datum.isTextual() && schema.hasEnumSymbol(datum.textValue())) {
          return true;
        }
        break;
      case BYTES:
      case FIXED:
        if (datum.isBinary()) {
          return true;
        }
        break;
      case NULL:
        if (datum == null || datum.isNull()) {
          return true;
        }
        break;
      default: // UNION or unknown
        throw new IllegalArgumentException("Unsupported schema: " + schema);
    }
    return false;
  }

  public static Schema inferSchema(InputStream incoming, final String name,
                                   int numRecords) {
    Iterator<Schema> schemas = Iterators.transform(parser(incoming),
        new Function<JsonNode, Schema>() {
          @Override
          public Schema apply(JsonNode node) {
            return inferSchema(node, name);
          }
        });

    if (!schemas.hasNext()) {
      return null;
    }

    Schema result = schemas.next();
    for (int i = 1; schemas.hasNext() && i < numRecords; i += 1) {
      result = SchemaUtil.merge(result, schemas.next());
    }

    return result;
  }

  public static Schema inferSchema(JsonNode node, String name) {
    return visit(node, new JsonSchemaVisitor(name));
  }

  public static Schema inferSchemaWithMaps(JsonNode node, String name) {
    return visit(node, new JsonSchemaVisitor(name).useMaps());
  }

  private static class JsonSchemaVisitor extends JsonTreeVisitor<Schema> {

    private static final Joiner DOT = Joiner.on('.');
    private final String name;
    private boolean objectsToRecords = true;

    public JsonSchemaVisitor(String name) {
      this.name = name;
    }

    public JsonSchemaVisitor useMaps() {
      this.objectsToRecords = false;
      return this;
    }

    @Override
    public Schema object(ObjectNode object, Map<String, Schema> fields) {
      if (objectsToRecords || recordLevels.size() < 1) {
        List<Schema.Field> recordFields = Lists.newArrayListWithExpectedSize(
            fields.size());

        for (Map.Entry<String, Schema> entry : fields.entrySet()) {
          recordFields.add(new Schema.Field(
              entry.getKey(), entry.getValue(),
              "Type inferred from '" + object.get(entry.getKey()) + "'",
              null));
        }

        Schema recordSchema;
        if (recordLevels.size() < 1) {
          recordSchema = Schema.createRecord(name, null, null, false);
        } else {
          recordSchema = Schema.createRecord(
              DOT.join(recordLevels), null, null, false);
        }

        recordSchema.setFields(recordFields);

        return recordSchema;

      } else {
        // translate to a map; use LinkedHashSet to preserve schema order
        switch (fields.size()) {
          case 0:
            return Schema.createMap(Schema.create(Schema.Type.NULL));
          case 1:
            return Schema.createMap(Iterables.getOnlyElement(fields.values()));
          default:
            return Schema.createMap(SchemaUtil.mergeOrUnion(fields.values()));
        }
      }
    }

    @Override
    public Schema array(ArrayNode ignored, List<Schema> elementSchemas) {
      // use LinkedHashSet to preserve schema order
      switch (elementSchemas.size()) {
        case 0:
          return Schema.createArray(Schema.create(Schema.Type.NULL));
        case 1:
          return Schema.createArray(Iterables.getOnlyElement(elementSchemas));
        default:
          return Schema.createArray(SchemaUtil.mergeOrUnion(elementSchemas));
      }
    }

    @Override
    public Schema binary(BinaryNode ignored) {
      return Schema.create(Schema.Type.BYTES);
    }

    @Override
    public Schema text(TextNode ignored) {
      return Schema.create(Schema.Type.STRING);
    }

    @Override
    public Schema number(NumericNode number) {
      if (number.isInt()) {
        return Schema.create(Schema.Type.INT);
      } else if (number.isLong()) {
        return Schema.create(Schema.Type.LONG);
      } else if (number.isFloat()) {
        return Schema.create(Schema.Type.FLOAT);
      } else if (number.isDouble()) {
        return Schema.create(Schema.Type.DOUBLE);
      } else {
        throw new UnsupportedOperationException(
            number.getClass().getName() + " is not supported");
      }
    }

    @Override
    public Schema bool(BooleanNode ignored) {
      return Schema.create(Schema.Type.BOOLEAN);
    }

    @Override
    public Schema nullNode(NullNode ignored) {
      return Schema.create(Schema.Type.NULL);
    }

    @Override
    public Schema missing(MissingNode ignored) {
      throw new UnsupportedOperationException("MissingNode is not supported.");
    }
  }
}