/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.micmiu.hive.serde;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.codehaus.jackson.map.ObjectMapper;

/**
 *
 * https://github.com/cloudera/cdh-twitter-example/tree/master/hive-serdes
 *
 * This SerDe can be used for processing JSON data in Hive. It supports
 * arbitrary JSON data, and can handle all Hive types except for UNION.
 * However, the JSON data is expected to be a series of discrete records,
 * rather than a JSON array of objects.
 * <p/>
 * The Hive table is expected to contain columns with names corresponding to
 * fields in the JSON data, but it is not necessary for every JSON field to
 * have a corresponding Hive column. Those JSON fields will be ignored during
 * queries.
 * <p/>
 * Example:
 * <p/>
 * { "a": 1, "b": [ "str1", "str2" ], "c": { "field1": "val1" } }
 * <p/>
 * Could correspond to a table:
 * <p/>
 * CREATE TABLE foo (a INT, b ARRAY<STRING>, c STRUCT<field1:STRING>);
 * <p/>
 * JSON objects can also interpreted as a Hive MAP type, so long as the keys
 * and values in the JSON object are all of the appropriate types. For example,
 * in the JSON above, another valid table declaraction would be:
 * <p/>
 * CREATE TABLE foo (a INT, b ARRAY<STRING>, c MAP<STRING,STRING>);
 * <p/>
 * Only STRING keys are supported for Hive MAPs.
 */
public class JSONCDHSerDe implements SerDe {

	private StructTypeInfo rowTypeInfo;
	private ObjectInspector rowOI;
	private List<String> colNames;
	private List<Object> row = new ArrayList<Object>();

	/**
	 * An initialization function used to gather information about the table.
	 * Typically, a SerDe implementation will be interested in the list of
	 * column names and their types. That information will be used to help perform
	 * actual serialization and deserialization of data.
	 */
	@Override
	public void initialize(Configuration conf, Properties tbl)
			throws SerDeException {
		// Get a list of the table's column names.
		String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
		colNames = Arrays.asList(colNamesStr.split(","));

		// Get a list of TypeInfos for the columns. This list lines up with
		// the list of column names.
		String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
		List<TypeInfo> colTypes =
				TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

		rowTypeInfo =
				(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
		rowOI =
				TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
	}

	/**
	 * This method does the work of deserializing a record into Java objects that
	 * Hive can work with via the ObjectInspector interface. For this SerDe, the
	 * blob that is passed in is a JSON string, and the Jackson JSON parser is
	 * being used to translate the string into Java objects.
	 * <p/>
	 * The JSON deserialization works by taking the column names in the Hive
	 * table, and looking up those fields in the parsed JSON object. If the value
	 * of the field is not a primitive, the object is parsed further.
	 */
	@Override
	public Object deserialize(Writable blob) throws SerDeException {
		Map<?, ?> root = null;
		row.clear();
		try {
			ObjectMapper mapper = new ObjectMapper();
			// This is really a Map<String, Object>. For more information about how
			// Jackson parses JSON in this example, see
			// http://wiki.fasterxml.com/JacksonDataBinding
			root = mapper.readValue(blob.toString(), Map.class);
		} catch (Exception e) {
			throw new SerDeException(e);
		}

		// Lowercase the keys as expected by hive
		Map<String, Object> lowerRoot = new HashMap();
		for (Map.Entry entry : root.entrySet()) {
			lowerRoot.put(((String) entry.getKey()).toLowerCase(), entry.getValue());
		}
		root = lowerRoot;

		Object value = null;
		for (String fieldName : rowTypeInfo.getAllStructFieldNames()) {
			try {
				TypeInfo fieldTypeInfo = rowTypeInfo.getStructFieldTypeInfo(fieldName);
				value = parseField(root.get(fieldName), fieldTypeInfo);
			} catch (Exception e) {
				value = null;
			}
			row.add(value);
		}
		return row;
	}

	/**
	 * Parses a JSON object according to the Hive column's type.
	 *
	 * @param field         - The JSON object to parse
	 * @param fieldTypeInfo - Metadata about the Hive column
	 * @return - The parsed value of the field
	 */
	private Object parseField(Object field, TypeInfo fieldTypeInfo) {
		switch (fieldTypeInfo.getCategory()) {
			case PRIMITIVE:
				// Jackson will return the right thing in this case, so just return
				// the object
				if (field instanceof String) {
					field = field.toString().replaceAll("\n", "\\\\n");
				}
				return field;
			case LIST:
				return parseList(field, (ListTypeInfo) fieldTypeInfo);
			case MAP:
				return parseMap(field, (MapTypeInfo) fieldTypeInfo);
			case STRUCT:
				return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
			case UNION:
				// Unsupported by JSON
			default:
				return null;
		}
	}

	/**
	 * Parses a JSON object and its fields. The Hive metadata is used to
	 * determine how to parse the object fields.
	 *
	 * @param field         - The JSON object to parse
	 * @param fieldTypeInfo - Metadata about the Hive column
	 * @return - A map representing the object and its fields
	 */
	private Object parseStruct(Object field, StructTypeInfo fieldTypeInfo) {
		Map<Object, Object> map = (Map<Object, Object>) field;
		ArrayList<TypeInfo> structTypes = fieldTypeInfo.getAllStructFieldTypeInfos();
		ArrayList<String> structNames = fieldTypeInfo.getAllStructFieldNames();

		List<Object> structRow = new ArrayList<Object>(structTypes.size());
		if (map != null) {
			for (int i = 0; i < structNames.size(); i++) {
				structRow.add(parseField(map.get(structNames.get(i)), structTypes.get(i)));
			}
		}
		return structRow;
	}

	/**
	 * Parse a JSON list and its elements. This uses the Hive metadata for the
	 * list elements to determine how to parse the elements.
	 *
	 * @param field         - The JSON list to parse
	 * @param fieldTypeInfo - Metadata about the Hive column
	 * @return - A list of the parsed elements
	 */
	private Object parseList(Object field, ListTypeInfo fieldTypeInfo) {
		ArrayList<Object> list = (ArrayList<Object>) field;
		TypeInfo elemTypeInfo = fieldTypeInfo.getListElementTypeInfo();
		if (list != null) {
			for (int i = 0; i < list.size(); i++) {
				list.set(i, parseField(list.get(i), elemTypeInfo));
			}
		}
		return list.toArray();
	}

	/**
	 * Parse a JSON object as a map. This uses the Hive metadata for the map
	 * values to determine how to parse the values. The map is assumed to have
	 * a string for a key.
	 *
	 * @param field         - The JSON list to parse
	 * @param fieldTypeInfo - Metadata about the Hive column
	 * @return
	 */
	private Object parseMap(Object field, MapTypeInfo fieldTypeInfo) {
		Map<Object, Object> map = (Map<Object, Object>) field;
		TypeInfo valueTypeInfo = fieldTypeInfo.getMapValueTypeInfo();
		if (map != null) {
			for (Map.Entry<Object, Object> entry : map.entrySet()) {
				map.put(entry.getKey(), parseField(entry.getValue(), valueTypeInfo));
			}
		}
		return map;
	}

	/**
	 * Return an ObjectInspector for the row of data
	 */
	@Override
	public ObjectInspector getObjectInspector() throws SerDeException {
		return rowOI;
	}

	/**
	 * Unimplemented
	 */
	@Override
	public SerDeStats getSerDeStats() {
		return null;
	}

	/**
	 * JSON is just a textual representation, so our serialized class
	 * is just Text.
	 */
	@Override
	public Class<? extends Writable> getSerializedClass() {
		return Text.class;
	}

	/**
	 * This method takes an object representing a row of data from Hive, and uses
	 * the ObjectInspector to get the data for each column and serialize it. This
	 * implementation deparses the row into an object that Jackson can easily
	 * serialize into a JSON blob.
	 */
	@Override
	public Writable serialize(Object obj, ObjectInspector oi)
			throws SerDeException {
		Object deparsedObj = deparseRow(obj, oi);
		ObjectMapper mapper = new ObjectMapper();
		try {
			// Let Jackson do the work of serializing the object
			return new Text(mapper.writeValueAsString(deparsedObj));
		} catch (Exception e) {
			throw new SerDeException(e);
		}
	}

	/**
	 * Deparse a Hive object into a Jackson-serializable object. This uses
	 * the ObjectInspector to extract the column data.
	 *
	 * @param obj - Hive object to deparse
	 * @param oi  - ObjectInspector for the object
	 * @return - A deparsed object
	 */
	private Object deparseObject(Object obj, ObjectInspector oi) {
		switch (oi.getCategory()) {
			case LIST:
				return deparseList(obj, (ListObjectInspector) oi);
			case MAP:
				return deparseMap(obj, (MapObjectInspector) oi);
			case PRIMITIVE:
				return deparsePrimitive(obj, (PrimitiveObjectInspector) oi);
			case STRUCT:
				return deparseStruct(obj, (StructObjectInspector) oi, false);
			case UNION:
				// Unsupported by JSON
			default:
				return null;
		}
	}

	/**
	 * Deparses a row of data. We have to treat this one differently from
	 * other structs, because the field names for the root object do not match
	 * the column names for the Hive table.
	 *
	 * @param obj      - Object representing the top-level row
	 * @param structOI - ObjectInspector for the row
	 * @return - A deparsed row of data
	 */
	private Object deparseRow(Object obj, ObjectInspector structOI) {
		return deparseStruct(obj, (StructObjectInspector) structOI, true);
	}

	/**
	 * Deparses struct data into a serializable JSON object.
	 *
	 * @param obj      - Hive struct data
	 * @param structOI - ObjectInspector for the struct
	 * @param isRow    - Whether or not this struct represents a top-level row
	 * @return - A deparsed struct
	 */
	private Object deparseStruct(Object obj,
								 StructObjectInspector structOI,
								 boolean isRow) {
		Map<Object, Object> struct = new HashMap<Object, Object>();
		List<? extends StructField> fields = structOI.getAllStructFieldRefs();
		for (int i = 0; i < fields.size(); i++) {
			StructField field = fields.get(i);
			// The top-level row object is treated slightly differently from other
			// structs, because the field names for the row do not correctly reflect
			// the Hive column names. For lower-level structs, we can get the field
			// name from the associated StructField object.
			String fieldName = isRow ? colNames.get(i) : field.getFieldName();
			ObjectInspector fieldOI = field.getFieldObjectInspector();
			Object fieldObj = structOI.getStructFieldData(obj, field);
			struct.put(fieldName, deparseObject(fieldObj, fieldOI));
		}
		return struct;
	}

	/**
	 * Deparses a primitive type.
	 *
	 * @param obj - Hive object to deparse
	 * @param primOI  - ObjectInspector for the object
	 * @return - A deparsed object
	 */
	private Object deparsePrimitive(Object obj, PrimitiveObjectInspector primOI) {
		return primOI.getPrimitiveJavaObject(obj);
	}

	private Object deparseMap(Object obj, MapObjectInspector mapOI) {
		Map<Object, Object> map = new HashMap<Object, Object>();
		ObjectInspector mapValOI = mapOI.getMapValueObjectInspector();
		Map<?, ?> fields = mapOI.getMap(obj);
		for (Map.Entry<?, ?> field : fields.entrySet()) {
			Object fieldName = field.getKey();
			Object fieldObj = field.getValue();
			map.put(fieldName, deparseObject(fieldObj, mapValOI));
		}
		return map;
	}

	/**
	 * Deparses a list and its elements.
	 *
	 * @param obj - Hive object to deparse
	 * @param listOI  - ObjectInspector for the object
	 * @return - A deparsed object
	 */
	private Object deparseList(Object obj, ListObjectInspector listOI) {
		List<Object> list = new ArrayList<Object>();
		List<?> field = listOI.getList(obj);
		ObjectInspector elemOI = listOI.getListElementObjectInspector();
		for (Object elem : field) {
			list.add(deparseObject(elem, elemOI));
		}
		return list;
	}
}