package org.jai.hive.serde;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.codehaus.jackson.map.ObjectMapper;

/**
 * This SerDe can be used for processing JSON data in Hive. It supports
 * arbitrary JSON data, and can handle all Hive types except for UNION. However,
 * the JSON data is expected to be a series of discrete records, rather than a
 * JSON array of objects.
 *
 * The Hive table is expected to contain columns with names corresponding to
 * fields in the JSON data, but it is not necessary for every JSON field to have
 * a corresponding Hive column. Those JSON fields will be ignored during
 * queries.
 *
 * Example:
 *
 * { "a": 1, "b": [ "str1", "str2" ], "c": { "field1": "val1" } }
 *
 * Could correspond to a table:
 *
 * CREATE TABLE foo (a INT, b ARRAY<STRING>, c STRUCT<field1:STRING>);
 *
 * JSON objects can also interpreted as a Hive MAP type, so long as the keys and
 * values in the JSON object are all of the appropriate types. For example, in
 * the JSON above, another valid table declaraction would be:
 *
 * CREATE TABLE foo (a INT, b ARRAY<STRING>, c MAP<STRING,STRING>);
 *
 * Only STRING keys are supported for Hive MAPs. TODO: add case insensitive for
 * using this in hive.
 * 
 * Note: Check the original code from
 * https://github.com/cloudera/cdh-twitter-example
 * /blob/master/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java
 * Changes made for handling BIGINT field and hive table columns case sensitive
 * part.
 */
@SuppressWarnings("deprecation")
public class JSONSerDe extends AbstractSerDe {
	private StructTypeInfo rowTypeInfo;

	private ObjectInspector rowOI;

	private List<String> colNames;

	private final List<Object> row = new ArrayList<Object>();

	/**
	 * An initialization function used to gather information about the table.
	 * Typically, a SerDe implementation will be interested in the list of
	 * column names and their types. That information will be used to help
	 * perform actual serialization and deserialization of data.
	 */
	@Override
	public void initialize(final Configuration conf, final Properties tbl)
			throws SerDeException {
		// Get a list of the table's column names.
		final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
		// Jai...change column names to lower case.
		colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
		// Get a list of TypeInfos for the columns. This list lines up with
		// the list of column names.
		final String colTypesStr = tbl
				.getProperty(serdeConstants.LIST_COLUMN_TYPES);
		final List<TypeInfo> colTypes = TypeInfoUtils
				.getTypeInfosFromTypeString(colTypesStr);
		rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
				colNames, colTypes);
		rowOI = TypeInfoUtils
				.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
	}

	/**
	 * This method does the work of deserializing a record into Java objects
	 * that Hive can work with via the ObjectInspector interface. For this
	 * SerDe, the blob that is passed in is a JSON string, and the Jackson JSON
	 * parser is being used to translate the string into Java objects.
	 *
	 * The JSON deserialization works by taking the column names in the Hive
	 * table, and looking up those fields in the parsed JSON object. If the
	 * value of the field is not a primitive, the object is parsed further.
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	@Override
	public Object deserialize(final Writable blob) throws SerDeException {
		Map<?, ?> root = null;
		row.clear();
		try {
			final ObjectMapper mapper = new ObjectMapper();
			// This is really a Map<String, Object>. For more information about
			// how
			// Jackson parses JSON in this example, see
			// http://wiki.fasterxml.com/JacksonDataBinding
			root = mapper.readValue(blob.toString(), Map.class);
		} catch (final Exception e) {
			throw new SerDeException(e);
		}
		// Lowercase the keys as expected by hive
		final Map<String, Object> lowerRoot = new HashMap();
		for (final Map.Entry entry : root.entrySet()) {
			lowerRoot.put(((String) entry.getKey()).toLowerCase(),
					entry.getValue());
		}
		root = lowerRoot;
		Object value = null;
		for (final String fieldName : rowTypeInfo.getAllStructFieldNames()) {
			try {
				final TypeInfo fieldTypeInfo = rowTypeInfo
						.getStructFieldTypeInfo(fieldName);
				value = parseField(root.get(fieldName), fieldTypeInfo);
			} catch (final Exception e) {
				value = null;
			}
			row.add(value);
		}
		return row;
	}

	/**
	 * Parses a JSON object according to the Hive column's type.
	 *
	 * @param field
	 *            - The JSON object to parse
	 * @param fieldTypeInfo
	 *            - Metadata about the Hive column
	 * @return - The parsed value of the field
	 */
	private Object parseField(Object field, final TypeInfo fieldTypeInfo) {
		switch (fieldTypeInfo.getCategory()) {
		case PRIMITIVE:
			// Jackson will return the right thing in this case, so just return
			// the object
			// Get type-safe JSON values
			if (field instanceof String) {
				field = field.toString().replaceAll("\n", "\\\\n");
			}
			if (fieldTypeInfo.getTypeName().equalsIgnoreCase(
					serdeConstants.BIGINT_TYPE_NAME)) {
				field = new Long(String.valueOf(field));
			}
			return field;
		case LIST:
			return parseList(field, (ListTypeInfo) fieldTypeInfo);
		case MAP:
			return parseMap(field, (MapTypeInfo) fieldTypeInfo);
		case STRUCT:
			return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
		}
	}

	/**
	 * Parses a JSON object and its fields. The Hive metadata is used to
	 * determine how to parse the object fields.
	 *
	 * @param field
	 *            - The JSON object to parse
	 * @param fieldTypeInfo
	 *            - Metadata about the Hive column
	 * @return - A map representing the object and its fields
	 */
	@SuppressWarnings("unchecked")
	private Object parseStruct(final Object field,
			final StructTypeInfo fieldTypeInfo) {
		final Map<Object, Object> map = (Map<Object, Object>) field;
		final ArrayList<TypeInfo> structTypes = fieldTypeInfo
				.getAllStructFieldTypeInfos();
		final ArrayList<String> structNames = fieldTypeInfo
				.getAllStructFieldNames();
		final List<Object> structRow = new ArrayList<Object>(structTypes.size());
		for (int i = 0; i < structNames.size(); i++) {
			structRow.add(parseField(map.get(structNames.get(i)),
					structTypes.get(i)));
		}
		return structRow;
	}

	/**
	 * Parse a JSON list and its elements. This uses the Hive metadata for the
	 * list elements to determine how to parse the elements.
	 *
	 * @param field
	 *            - The JSON list to parse
	 * @param fieldTypeInfo
	 *            - Metadata about the Hive column
	 * @return - A list of the parsed elements
	 */
	@SuppressWarnings("unchecked")
	private Object parseList(final Object field,
			final ListTypeInfo fieldTypeInfo) {
		final ArrayList<Object> list = (ArrayList<Object>) field;
		final TypeInfo elemTypeInfo = fieldTypeInfo.getListElementTypeInfo();
		for (int i = 0; i < list.size(); i++) {
			list.set(i, parseField(list.get(i), elemTypeInfo));
		}
		return list.toArray();
	}

	/**
	 * Parse a JSON object as a map. This uses the Hive metadata for the map
	 * values to determine how to parse the values. The map is assumed to have a
	 * string for a key.
	 *
	 * @param field
	 *            - The JSON list to parse
	 * @param fieldTypeInfo
	 *            - Metadata about the Hive column
	 * @return
	 */
	@SuppressWarnings("unchecked")
	private Object parseMap(final Object field, final MapTypeInfo fieldTypeInfo) {
		final Map<Object, Object> map = (Map<Object, Object>) field;
		final TypeInfo valueTypeInfo = fieldTypeInfo.getMapValueTypeInfo();
		for (final Map.Entry<Object, Object> entry : map.entrySet()) {
			map.put(entry.getKey(), parseField(entry.getValue(), valueTypeInfo));
		}
		return map;
	}

	/**
	 * Return an ObjectInspector for the row of data
	 */
	@Override
	public ObjectInspector getObjectInspector() throws SerDeException {
		return rowOI;
	}

	/**
	 * Unimplemented
	 */
	@Override
	public SerDeStats getSerDeStats() {
		return null;
	}

	/**
	 * JSON is just a textual representation, so our serialized class is just
	 * Text.
	 */
	@Override
	public Class<? extends Writable> getSerializedClass() {
		return Text.class;
	}

	/**
	 * This method takes an object representing a row of data from Hive, and
	 * uses the ObjectInspector to get the data for each column and serialize
	 * it. This implementation deparses the row into an object that Jackson can
	 * easily serialize into a JSON blob.
	 */
	@Override
	public Writable serialize(final Object obj, final ObjectInspector oi)
			throws SerDeException {
		final Object deparsedObj = deparseRow(obj, oi);
		final ObjectMapper mapper = new ObjectMapper();
		try {
			// Let Jackson do the work of serializing the object
			return new Text(mapper.writeValueAsString(deparsedObj));
		} catch (final Exception e) {
			throw new SerDeException(e);
		}
	}

	/**
	 * Deparse a Hive object into a Jackson-serializable object. This uses the
	 * ObjectInspector to extract the column data.
	 *
	 * @param obj
	 *            - Hive object to deparse
	 * @param oi
	 *            - ObjectInspector for the object
	 * @return - A deparsed object
	 */
	private Object deparseObject(final Object obj, final ObjectInspector oi) {
		switch (oi.getCategory()) {
		case PRIMITIVE:
			return deparsePrimitive(obj, (PrimitiveObjectInspector) oi);
		case LIST:
			return deparseList(obj, (ListObjectInspector) oi);
		case MAP:
			return deparseMap(obj, (MapObjectInspector) oi);
		case STRUCT:
			return deparseStruct(obj, (StructObjectInspector) oi, false);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
		}
	}

	/**
	 * Deparses a row of data. We have to treat this one differently from other
	 * structs, because the field names for the root object do not match the
	 * column names for the Hive table.
	 *
	 * @param obj
	 *            - Object representing the top-level row
	 * @param structOI
	 *            - ObjectInspector for the row
	 * @return - A deparsed row of data
	 */
	private Object deparseRow(final Object obj, final ObjectInspector structOI) {
		return deparseStruct(obj, (StructObjectInspector) structOI, true);
	}

	/**
	 * Deparses struct data into a serializable JSON object.
	 *
	 * @param obj
	 *            - Hive struct data
	 * @param structOI
	 *            - ObjectInspector for the struct
	 * @param isRow
	 *            - Whether or not this struct represents a top-level row
	 * @return - A deparsed struct
	 */
	private Object deparseStruct(final Object obj,
			final StructObjectInspector structOI, final boolean isRow) {
		final Map<Object, Object> struct = new HashMap<Object, Object>();
		final List<? extends StructField> fields = structOI
				.getAllStructFieldRefs();
		for (int i = 0; i < fields.size(); i++) {
			final StructField field = fields.get(i);
			// The top-level row object is treated slightly differently from
			// other
			// structs, because the field names for the row do not correctly
			// reflect
			// the Hive column names. For lower-level structs, we can get the
			// field
			// name from the associated StructField object.
			final String fieldName = isRow ? colNames.get(i) : field
					.getFieldName();
			final ObjectInspector fieldOI = field.getFieldObjectInspector();
			final Object fieldObj = structOI.getStructFieldData(obj, field);
			struct.put(fieldName, deparseObject(fieldObj, fieldOI));
		}
		return struct;
	}

	/**
	 * Deparses a primitive type.
	 *
	 * @param obj
	 *            - Hive object to deparse
	 * @param oi
	 *            - ObjectInspector for the object
	 * @return - A deparsed object
	 */
	private Object deparsePrimitive(final Object obj,
			final PrimitiveObjectInspector primOI) {
		return primOI.getPrimitiveJavaObject(obj);
	}

	private Object deparseMap(final Object obj, final MapObjectInspector mapOI) {
		final Map<Object, Object> map = new HashMap<Object, Object>();
		final ObjectInspector mapValOI = mapOI.getMapValueObjectInspector();
		final Map<?, ?> fields = mapOI.getMap(obj);
		for (final Map.Entry<?, ?> field : fields.entrySet()) {
			final Object fieldName = field.getKey();
			final Object fieldObj = field.getValue();
			map.put(fieldName, deparseObject(fieldObj, mapValOI));
		}
		return map;
	}

	/**
	 * Deparses a list and its elements.
	 *
	 * @param obj
	 *            - Hive object to deparse
	 * @param oi
	 *            - ObjectInspector for the object
	 * @return - A deparsed object
	 */
	private Object deparseList(final Object obj,
			final ListObjectInspector listOI) {
		final List<Object> list = new ArrayList<Object>();
		final List<?> field = listOI.getList(obj);
		final ObjectInspector elemOI = listOI.getListElementObjectInspector();
		for (final Object elem : field) {
			list.add(deparseObject(elem, elemOI));
		}
		return list;
	}
}