package org.apache.hadoop.hive.cassandra.serde; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Properties; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.TypeParser; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.BytesWritable; public class CassandraColumnSerDe extends AbstractColumnSerDe { public static final String CASSANDRA_VALIDATOR_TYPE = "cassandra.cf.validatorType"; // validator type public static final AbstractType DEFAULT_VALIDATOR_TYPE = BytesType.instance; private List<AbstractType> validatorType; /** * Initialize the cassandra serialization and deserialization parameters from table properties and configuration. * * @param job * @param tbl * @param serdeName * @throws SerDeException */ @Override protected void initCassandraSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException { cassandraColumnFamily = getCassandraColumnFamily(tbl); cassandraColumnNames = parseOrCreateColumnMapping(tbl); cassandraColumnNamesBytes = new ArrayList<BytesWritable>(); for (String columnName : cassandraColumnNames) { cassandraColumnNamesBytes.add(new BytesWritable(columnName.getBytes())); } iKey = cassandraColumnNames.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN); serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName); validatorType = parseOrCreateValidatorType(tbl); setTableMapping(); if (cassandraColumnNames.size() != serdeParams.getColumnNames().size()) { throw new SerDeException(serdeName + ": columns has " + serdeParams.getColumnNames().size() + " elements while cassandra.columns.mapping has " + cassandraColumnNames.size() + " elements" + " (counting the key if implicit)"); } // we just can make sure that "StandardColumn:" is mapped to MAP<String,?> for (int i = 0; i < cassandraColumnNames.size(); i++) { String cassandraColName = cassandraColumnNames.get(i); if (cassandraColName.endsWith(":")) { TypeInfo typeInfo = serdeParams.getColumnTypes().get(i); if ((typeInfo.getCategory() != Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getTypeName() != Constants.STRING_TYPE_NAME)) { throw new SerDeException( serdeName + ": Cassandra column family '" + cassandraColName + "' should be mapped to map<string,?> but is mapped to " + typeInfo.getTypeName()); } } } } @Override protected ObjectInspector createObjectInspector() { return CassandraLazyFactory.createLazyStructInspector( serdeParams.getColumnNames(), serdeParams.getColumnTypes(), validatorType, serdeParams.getSeparators(), serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); } /** * Parse or create the validator types. If <code>CASSANDRA_VALIDATOR_TYPE</code> is defined in the property, * it will be used for parsing; Otherwise an empty list will be returned; * * @param tbl property list * @return a list of validator type or an empty list if no property is defined * @throws SerDeException when the number of validator types is fewer than the number of columns or when no matching * validator type is found in Cassandra. */ private List<AbstractType> parseOrCreateValidatorType(Properties tbl) throws SerDeException { String prop = tbl.getProperty(CASSANDRA_VALIDATOR_TYPE); List<AbstractType> result = new ArrayList<AbstractType>(); if (prop != null) { assert StringUtils.isNotBlank(prop); String[] validators = prop.split(","); String[] trimmedValidators = trim(validators); List<String> columnList = Arrays.asList(trimmedValidators); result = parseValidatorType(columnList); if (result.size() < cassandraColumnNames.size()) { throw new SerDeException("There are fewer validator types defined than the column names. " + "ColumnaName size: " + cassandraColumnNames.size() + " ValidatorType size: " + result.size()); } } return result; } /** * Parses the cassandra columns mapping to identify the column name. * One of the Hive table columns maps to the cassandra row key, by default the * first column. * * @param columnList a list of column validator type in String format * @return a list of cassandra validator type */ private List<AbstractType> parseValidatorType(List<String> columnList) throws SerDeException { List<AbstractType> types = new ArrayList<AbstractType>(); for (String str : columnList) { if (StringUtils.isBlank(str)) { types.add(DEFAULT_VALIDATOR_TYPE); } else { try { types.add(TypeParser.parse(str)); } catch (Exception e) { throw new SerDeException("Invalid Cassandra validator type ' " + str + "'"); } } } return types; } }