package org.apache.hadoop.hive.cassandra.serde; import java.io.IOException; import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazyCassandraUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; public abstract class TableMapping { /* names of columns from SerdeParameters */ protected final List<String> cassandraColumnNames; /* index of key column in results */ protected final int iKey; protected final String cassandraColumnFamily; private boolean useJSONSerialize; protected final ByteStream.Output serializeStream = new ByteStream.Output(); private final byte[] separators; // the separators array private final boolean escaped; // whether we need to escape the data when writing out private final byte escapeChar; // which char to use as the escape char, e.g. '\\' private final boolean[] needsEscape; // which chars need to be escaped. This array should have size TableMapping(String colFamily, List<String> columnNames, SerDeParameters serdeParams) { this.cassandraColumnFamily = colFamily; this.cassandraColumnNames = columnNames; this.iKey = cassandraColumnNames.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN); separators = serdeParams.getSeparators(); escaped = serdeParams.isEscaped(); escapeChar = serdeParams.getEscapeChar(); needsEscape = serdeParams.getNeedsEscape(); } public Writable getWritable( List<? extends StructField> fields, List<Object> list, List<? extends StructField> declaredFields) throws IOException { assert iKey >= 0; //First get the cassandra row key byte[] keyBytes = serializeToBytes(iKey, fields, list, declaredFields); return write(keyBytes, fields, list, declaredFields); } public abstract Writable write( byte[] keyBytes, List<? extends StructField> fields, List<Object> list, List<? extends StructField> declaredFields) throws IOException; /** * Serialize the index-th object into bytes array. * * @param index the index of the object to be seralized. * @param fields a list of fields * @param list a list of objects * @param declaredFields a list of declared fields * @return object serialized into bytes * @throws IOException */ protected byte[] serializeToBytes(int index, List<? extends StructField> fields, List<Object> list, List<? extends StructField> declaredFields) throws IOException { return serializeToBytes( fields.get(index).getFieldObjectInspector(), declaredFields.get(index).getFieldObjectInspector(), list.get(index), useJsonSerialize(index, declaredFields)); } /** * Return true if using json serialization. Otherwise, false; * * @param index the index of the field to be deserialized. * @param declaredFields a list of declared fields * @return true if using json serialization */ protected boolean useJsonSerialize(int index, List<? extends StructField> declaredFields) { return (declaredFields == null || declaredFields.get(index).getFieldObjectInspector().getCategory() .equals(Category.PRIMITIVE) || useJSONSerialize); } /** * Serialize a object into bytes. * @param foi object inspector * @param decalred output object inspector * @param obj object to be serialized * @param useJsonSerialize true to use json serialization * @return object in serialized bytes * @throws IOException when error happens */ protected byte[] serializeToBytes(ObjectInspector foi, ObjectInspector doi, Object obj, boolean useJsonSerialize) throws IOException { serializeStream.reset(); boolean isNotNull; if (!foi.getCategory().equals(Category.PRIMITIVE) && useJsonSerialize) { isNotNull = serialize(SerDeUtils.getJSONString(obj, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, doi, 1); } else { isNotNull = serialize(obj, foi, doi, 1); } if (!isNotNull) { return null; } byte[] key = new byte[serializeStream.getCount()]; System.arraycopy(serializeStream.getData(), 0, key, 0, serializeStream.getCount()); return key; } protected boolean serialize(Object obj, ObjectInspector objInspector, ObjectInspector declaredObjInspector, int level) throws IOException { switch (objInspector.getCategory()) { case PRIMITIVE: { //Marshal to expected cassandra format AbstractType validator = LazyCassandraUtils.getCassandraType((PrimitiveObjectInspector)declaredObjInspector); if (validator instanceof BytesType) { BytesWritable bw = ((BinaryObjectInspector) objInspector).getPrimitiveWritableObject(obj); serializeStream.write(bw.getBytes(),0,bw.getLength()); } else { LazyUtils.writePrimitiveUTF8( serializeStream, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); //convert from string to cassandra type if (!declaredObjInspector.getTypeName().equals(PrimitiveObjectInspectorUtils.stringTypeEntry.typeName)) { ByteBuffer bb = validator.fromString(serializeStream.toString()); serializeStream.reset(); serializeStream.write(ByteBufferUtil.getArray(bb)); } } return true; } case LIST: { char separator = (char) separators[level]; ListObjectInspector loi = (ListObjectInspector) objInspector; List<?> list = loi.getList(obj); ObjectInspector eoi = loi.getListElementObjectInspector(); if (list == null) { return false; } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { serializeStream.write(separator); } serialize(list.get(i), eoi, PrimitiveObjectInspectorFactory.javaStringObjectInspector, level + 1); } } return true; } case MAP: { char separator = (char) separators[level]; char keyValueSeparator = (char) separators[level + 1]; MapObjectInspector moi = (MapObjectInspector) objInspector; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); Map<?, ?> map = moi.getMap(obj); if (map == null) { return false; } else { boolean first = true; for (Map.Entry<?, ?> entry : map.entrySet()) { if (first) { first = false; } else { serializeStream.write(separator); } serialize(entry.getKey(), koi, PrimitiveObjectInspectorFactory.javaStringObjectInspector, level + 2); serializeStream.write(keyValueSeparator); serialize(entry.getValue(), voi, PrimitiveObjectInspectorFactory.javaStringObjectInspector, level + 2); } } return true; } case STRUCT: { char separator = (char) separators[level]; StructObjectInspector soi = (StructObjectInspector) objInspector; List<? extends StructField> fields = soi.getAllStructFieldRefs(); List<Object> list = soi.getStructFieldsDataAsList(obj); if (list == null) { return false; } else { for (int i = 0; i < list.size(); i++) { if (i > 0) { serializeStream.write(separator); } serialize(list.get(i), fields.get(i).getFieldObjectInspector(), PrimitiveObjectInspectorFactory.javaStringObjectInspector, level + 1); } } return true; } } throw new RuntimeException("Unknown category type: " + objInspector.getCategory()); } }