org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveTypeConverter.java    From metacat with Apache License 2.0 6 votes vote down vote up
@Override
public Type toMetacatType(final String type) {
    // Hack to fix presto "varchar" type coming in with no length which is required by Hive.
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
        "varchar".equals(type.toLowerCase()) ? serdeConstants.STRING_TYPE_NAME : type);
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
    // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
    // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
    // their original case
    if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
        final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi;
        oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector(
            structTypeInfo.getAllStructFieldNames(), objectInspector);
    }
    return getCanonicalType(oi);
}
 
Example #2
Source File: HiveSchemaConverter.java    From kite with Apache License 2.0 6 votes vote down vote up
public static Schema convertTable(String table, Collection<FieldSchema> columns,
                                  @Nullable PartitionStrategy strategy) {
  ArrayList<String> fieldNames = Lists.newArrayList();
  ArrayList<TypeInfo> fieldTypes = Lists.newArrayList();
  LinkedList<String> start = Lists.newLinkedList();
  Collection<String[]> requiredFields = requiredFields(strategy);

  List<Schema.Field> fields = Lists.newArrayList();
  for (FieldSchema column : columns) {
    // pass null for the initial path to exclude the table name
    TypeInfo type = parseTypeInfo(column.getType());
    fieldNames.add(column.getName());
    fieldTypes.add(type);
    fields.add(convertField(start, column.getName(), type, requiredFields));
  }

  StructTypeInfo struct = new StructTypeInfo();
  struct.setAllStructFieldNames(fieldNames);
  struct.setAllStructFieldTypeInfos(fieldTypes);

  Schema recordSchema = Schema.createRecord(table, doc(struct), null, false);
  recordSchema.setFields(fields);

  return recordSchema;
}
 
Example #3
Source File: HiveSchemaConverter.java    From kite with Apache License 2.0 6 votes vote down vote up
private static Schema convert(LinkedList<String> path, String name,
                              StructTypeInfo type,
                              Collection<String[]> required) {
  List<String> names = type.getAllStructFieldNames();
  List<TypeInfo> types = type.getAllStructFieldTypeInfos();
  Preconditions.checkArgument(names.size() == types.size(),
      "Cannot convert struct: %s names != %s types",
      names.size(), types.size());

  List<Schema.Field> fields = Lists.newArrayList();
  for (int i = 0; i < names.size(); i += 1) {
    path.addLast(name);
    fields.add(convertField(path, names.get(i), types.get(i), required));
    path.removeLast();
  }

  Schema recordSchema = Schema.createRecord(name, doc(type), null, false);
  recordSchema.setFields(fields);

  return recordSchema;
}
 
Example #4
Source File: HiveCoercionPolicy.java    From presto with Apache License 2.0 6 votes vote down vote up
private boolean canCoerceForStruct(HiveType fromHiveType, HiveType toHiveType)
{
    if (fromHiveType.getCategory() != Category.STRUCT || toHiveType.getCategory() != Category.STRUCT) {
        return false;
    }
    List<String> fromFieldNames = ((StructTypeInfo) fromHiveType.getTypeInfo()).getAllStructFieldNames();
    List<String> toFieldNames = ((StructTypeInfo) toHiveType.getTypeInfo()).getAllStructFieldNames();
    List<HiveType> fromFieldTypes = extractStructFieldTypes(fromHiveType);
    List<HiveType> toFieldTypes = extractStructFieldTypes(toHiveType);
    // Rule:
    // * Fields may be added or dropped from the end.
    // * For all other field indices, the corresponding fields must have
    //   the same name, and the type must be coercible.
    for (int i = 0; i < min(fromFieldTypes.size(), toFieldTypes.size()); i++) {
        if (!fromFieldNames.get(i).equals(toFieldNames.get(i))) {
            return false;
        }
        if (!fromFieldTypes.get(i).equals(toFieldTypes.get(i)) && !canCoerce(fromFieldTypes.get(i), toFieldTypes.get(i))) {
            return false;
        }
    }
    return true;
}
 
Example #5
Source File: HiveWriteUtils.java    From presto with Apache License 2.0 6 votes vote down vote up
private static boolean isWritableType(TypeInfo typeInfo)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            return isWritablePrimitiveType(primitiveCategory);
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isWritableType(mapTypeInfo.getMapKeyTypeInfo()) && isWritableType(mapTypeInfo.getMapValueTypeInfo());
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isWritableType(listTypeInfo.getListElementTypeInfo());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream().allMatch(HiveWriteUtils::isWritableType);
    }
    return false;
}
 
Example #6
Source File: HiveType.java    From presto with Apache License 2.0 6 votes vote down vote up
public List<String> getHiveDereferenceNames(List<Integer> dereferences)
{
    ImmutableList.Builder<String> dereferenceNames = ImmutableList.builder();
    TypeInfo typeInfo = getTypeInfo();
    for (int fieldIndex : dereferences) {
        checkArgument(typeInfo instanceof StructTypeInfo, "typeInfo should be struct type", typeInfo);
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;

        checkArgument(fieldIndex >= 0, "fieldIndex cannot be negative");
        checkArgument(fieldIndex < structTypeInfo.getAllStructFieldNames().size(),
                "fieldIndex should be less than the number of fields in the struct");
        String fieldName = structTypeInfo.getAllStructFieldNames().get(fieldIndex);
        dereferenceNames.add(fieldName);
        typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex);
    }

    return dereferenceNames.build();
}
 
Example #7
Source File: MDSSerde.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getAllReadTypeInfo( final String columnNameProperty , final String columnTypeProperty ){
  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  ArrayList<String> columnNames = new ArrayList<String>();
  if ( columnNameProperty != null && 0 < columnNameProperty.length() ) {
    String[] columnNameArray = columnNameProperty.split(",");
    for( int i = 0 ; i < columnNameArray.length ; i++ ){
      columnNames.add( columnNameArray[i] );
      filedIndexMap.put( columnNameArray[i] , i );
    }
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( columnNames );
  rootType.setAllStructFieldTypeInfos( fieldTypes );

  return rootType;
}
 
Example #8
Source File: NiFiRecordSerDe.java    From nifi with Apache License 2.0 6 votes vote down vote up
private List<Object> deserialize(Record record, StructTypeInfo schema) throws SerDeException {
    List<Object> result = new ArrayList<>(Collections.nCopies(schema.getAllStructFieldNames().size(), null));

    try {
        RecordSchema recordSchema = record.getSchema();
        for (RecordField field : recordSchema.getFields()) {
            populateRecord(result, record.getValue(field), field, schema);
        }
    } catch(SerDeException se) {
        log.error("Error [{}] parsing Record [{}].", new Object[]{se.toString(), record}, se);
        throw se;
    } catch (Exception e) {
        log.error("Error [{}] parsing Record [{}].", new Object[]{e.toString(), record}, e);
        throw new SerDeException(e);
    }

    return result;
}
 
Example #9
Source File: MDSSerde.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){
  Set<String> columnNameSet = new HashSet<String>();
  for( String columnName : projectionColumnNames.split(",") ){
    columnNameSet.add( columnName );
  }

  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  String[] splitNames = columnNameProperty.split(",");

  ArrayList<String> projectionColumnNameList = new ArrayList<String>();
  ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>();
  for( int i = 0 ; i < fieldTypes.size() ; i++ ){
    if( columnNameSet.contains( splitNames[i] ) ){
      projectionColumnNameList.add( splitNames[i] );
      projectionFieldTypeList.add( fieldTypes.get(i) );
    }
    filedIndexMap.put( splitNames[i] , i );
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( projectionColumnNameList );
  rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );

  return rootType;
}
 
Example #10
Source File: MDSSerde.java    From multiple-dimension-spread with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize( final Configuration conf, final Properties table , final Properties part ) throws SerDeException{
  LOG.info( table.toString() );
  if( part != null ){
    LOG.info( part.toString() );
  }
  String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
  String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);

  String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );

  StructTypeInfo rootType;
  if( projectionColumnNames.isEmpty() ){
    rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
  }
  else{
    rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames );
  }

  inspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}
 
Example #11
Source File: OrcStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public RequiredFieldResponse pushProjection(
        RequiredFieldList requiredFieldList) throws FrontendException {
    if (requiredFieldList == null)
        return null;
    if (requiredFieldList.getFields() != null)
    {
        int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size();
        mRequiredColumns = new boolean[schemaSize];
        for (RequiredField rf: requiredFieldList.getFields())
        {
            if (rf.getIndex()!=-1)
                mRequiredColumns[rf.getIndex()] = true;
        }
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
        try {
            p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns));
        } catch (Exception e) {
            throw new RuntimeException("Cannot serialize mRequiredColumns");
        }
    }
    return new RequiredFieldResponse(true);
}
 
Example #12
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
/**
 * Parses a JSON object and its fields. The Hive metadata is used to
 * determine how to parse the object fields.
 *
 * @param field
 *            - The JSON object to parse
 * @param fieldTypeInfo
 *            - Metadata about the Hive column
 * @return - A map representing the object and its fields
 */
@SuppressWarnings("unchecked")
private Object parseStruct(final Object field,
		final StructTypeInfo fieldTypeInfo) {
	final Map<Object, Object> map = (Map<Object, Object>) field;
	final ArrayList<TypeInfo> structTypes = fieldTypeInfo
			.getAllStructFieldTypeInfos();
	final ArrayList<String> structNames = fieldTypeInfo
			.getAllStructFieldNames();
	final List<Object> structRow = new ArrayList<Object>(structTypes.size());
	for (int i = 0; i < structNames.size(); i++) {
		structRow.add(parseField(map.get(structNames.get(i)),
				structTypes.get(i)));
	}
	return structRow;
}
 
Example #13
Source File: EmoSerDe.java    From emodb with Apache License 2.0 6 votes vote down vote up
private Object deserializeStruct(StructTypeInfo type, Object data)
        throws SerDeException {
    if (!(data instanceof Map)) {
        throw new SerDeException("Value not of type map");
    }
    //noinspection unchecked
    Map<String, Object> map = (Map<String, Object>) data;

    List<String> fieldNames = type.getAllStructFieldNames();
    List<TypeInfo> fieldTypes = type.getAllStructFieldTypeInfos();

    // When deserializing a struct the returned value is a list of values in the same order as the field names.

    List<Object> values = Lists.newArrayListWithCapacity(fieldNames.size());
    for (int i=0; i < fieldNames.size(); i++) {
        Object rawValue = getRawValueOrNullIfAbsent(fieldNames.get(i), map);
        Object value = deserialize(fieldTypes.get(i), rawValue);
        values.add(value);
    }

    return values;
}
 
Example #14
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help
 * perform actual serialization and deserialization of data.
 */
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	final String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	// Jai...change column names to lower case.
	colNames = Arrays.asList(colNamesStr.toLowerCase().split(","));
	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	final String colTypesStr = tbl
			.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	final List<TypeInfo> colTypes = TypeInfoUtils
			.getTypeInfosFromTypeString(colTypesStr);
	rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(
			colNames, colTypes);
	rowOI = TypeInfoUtils
			.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example #15
Source File: BlurObjectInspectorGenerator.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
  switch (ti.getCategory()) {
  case PRIMITIVE:
    PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti;
    return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
  case STRUCT:
    StructTypeInfo sti = (StructTypeInfo) ti;
    List<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
    for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
      ois.add(createObjectInspectorWorker(typeInfo));
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
  case LIST:
    ListTypeInfo lti = (ListTypeInfo) ti;
    TypeInfo listElementTypeInfo = lti.getListElementTypeInfo();
    return ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(listElementTypeInfo));
  default:
    throw new SerDeException("No Hive categories matched for [" + ti + "]");
  }
}
 
Example #16
Source File: ArrayWritableObjectInspector.java    From indexr with Apache License 2.0 6 votes vote down vote up
public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

        typeInfo = rowTypeInfo;
        fieldNames = rowTypeInfo.getAllStructFieldNames();
        fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
        fields = new ArrayList<StructField>(fieldNames.size());
        fieldsByName = new HashMap<String, StructFieldImpl>();

        for (int i = 0; i < fieldNames.size(); ++i) {
            final String name = fieldNames.get(i);
            final TypeInfo fieldInfo = fieldInfos.get(i);

            final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
            fields.add(field);
            fieldsByName.put(name, field);
        }
    }
 
Example #17
Source File: ArrayWritableObjectInspector.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public ArrayWritableObjectInspector(final StructTypeInfo rowTypeInfo) {

    typeInfo = rowTypeInfo;
    fieldNames = rowTypeInfo.getAllStructFieldNames();
    fieldInfos = rowTypeInfo.getAllStructFieldTypeInfos();
    fields = new ArrayList<StructField>(fieldNames.size());
    fieldsByName = new HashMap<String, StructFieldImpl>();

    for (int i = 0; i < fieldNames.size(); ++i) {
      final String name = fieldNames.get(i);
      final TypeInfo fieldInfo = fieldInfos.get(i);

      final StructFieldImpl field = new StructFieldImpl(name, getObjectInspector(fieldInfo), i);
      fields.add(field);
      fieldsByName.put(name, field);
    }
  }
 
Example #18
Source File: CobolDeserializer.java    From Cobol-to-Hive with Apache License 2.0 6 votes vote down vote up
private Object worker(String columnName, TypeInfo columnType){
	
	switch(columnType.getCategory()) {
		
		case STRUCT:
			return deserializeStruct(columnName, (StructTypeInfo) columnType);
		case UNION:
			return deserializeUnion(columnName,(UnionTypeInfo) columnType);
		case LIST:
		return deserializeList(columnName, (ListTypeInfo) columnType);
		case MAP:
		throw new RuntimeException("map type is not possible for cobol layout" + columnType.getCategory());
		case PRIMITIVE:
		return deserializePrimitive(columnName, (PrimitiveTypeInfo) columnType);
		default:
		throw new RuntimeException("Unknown TypeInfo: " + columnType.getCategory());
	}
}
 
Example #19
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example #20
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * Parses a JSON object according to the Hive column's type.
 *
 * @param field         - The JSON object to parse
 * @param fieldTypeInfo - Metadata about the Hive column
 * @return - The parsed value of the field
 */
private Object parseField(Object field, TypeInfo fieldTypeInfo) {
	switch (fieldTypeInfo.getCategory()) {
		case PRIMITIVE:
			// Jackson will return the right thing in this case, so just return
			// the object
			if (field instanceof String) {
				field = field.toString().replaceAll("\n", "\\\\n");
			}
			return field;
		case LIST:
			return parseList(field, (ListTypeInfo) fieldTypeInfo);
		case MAP:
			return parseMap(field, (MapTypeInfo) fieldTypeInfo);
		case STRUCT:
			return parseStruct(field, (StructTypeInfo) fieldTypeInfo);
		case UNION:
			// Unsupported by JSON
		default:
			return null;
	}
}
 
Example #21
Source File: HiveOrcSerDeManager.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Extensible if there's other source-of-truth for fetching schema instead of interacting with HDFS.
 *
 * For purpose of initializing {@link org.apache.hadoop.hive.ql.io.orc.OrcSerde} object, it will require:
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMNS and
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMN_TYPES
 *
 */
protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit hiveUnit) throws IOException {
  TypeInfo schema = getSchemaFromLatestFile(path, this.fs);
  if (schema instanceof StructTypeInfo) {
    StructTypeInfo structTypeInfo = (StructTypeInfo) schema;
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMNS,
        Joiner.on(",").join(structTypeInfo.getAllStructFieldNames()));
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMN_TYPES,
        Joiner.on(",").join(
            structTypeInfo.getAllStructFieldTypeInfos().stream().map(x -> x.getTypeName())
                .collect(Collectors.toList())));
  } else {
    // Hive always uses a struct with a field for each of the top-level columns as the root object type.
    // So for here we assume to-be-registered ORC files follow this pattern.
    throw new IllegalStateException("A valid ORC schema should be an instance of struct");
  }
}
 
Example #22
Source File: OrcLazyRowObjectInspector.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
public OrcLazyRowObjectInspector(StructTypeInfo info) {
  super(info.getAllStructFieldNames().size());
  ArrayList<String> fieldNames = info.getAllStructFieldNames();
  ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
  for(int i=0; i < fieldNames.size(); ++i) {
    fields.add(new Field(fieldNames.get(i),
        OrcLazyObjectInspectorUtils.createLazyObjectInspector(fieldTypes.get(i)), i));
  }
}
 
Example #23
Source File: OrcUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
PigStructInspector(StructTypeInfo info) {
    ArrayList<String> fieldNames = info.getAllStructFieldNames();
    ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
    fields = new ArrayList<StructField>(fieldNames.size());
    for (int i = 0; i < fieldNames.size(); ++i) {
        fields.add(new Field(fieldNames.get(i),
                createObjectInspector(fieldTypes.get(i)), i));
    }
}
 
Example #24
Source File: OrcLazyStructObjectInspector.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
public OrcLazyStructObjectInspector(StructTypeInfo info) {
  ArrayList<String> fieldNames = info.getAllStructFieldNames();
  ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
  fields = new ArrayList<StructField>(fieldNames.size());
  for(int i=0; i < fieldNames.size(); ++i) {
    fields.add(new Field(fieldNames.get(i),
        OrcLazyObjectInspectorUtils.createWritableObjectInspector(fieldTypes.get(i)), i));
  }
}
 
Example #25
Source File: TestSchemaConversion.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleRecord() {
  TypeInfo type = HiveSchemaConverter.convert(SIMPLE_RECORD);

  Assert.assertTrue("Record should be converted to struct",
      type instanceof StructTypeInfo);
  Assert.assertEquals("Field names should match",
      Lists.newArrayList("id", "name"),
      ((StructTypeInfo) type).getAllStructFieldNames());
  Assert.assertEquals("Field types should match",
      Lists.newArrayList(
          INT_TYPE_INFO,
          STRING_TYPE_INFO),
      ((StructTypeInfo) type).getAllStructFieldTypeInfos());
}
 
Example #26
Source File: ArrayWritableObjectInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private ObjectInspector getObjectInspector(final TypeInfo typeInfo) {
  if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
    return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
  } else if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetStringInspector;
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return new ArrayWritableObjectInspector((StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    final TypeInfo subTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
    return new ParquetHiveArrayInspector(getObjectInspector(subTypeInfo));
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    final TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
    final TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
    if (keyTypeInfo.equals(TypeInfoFactory.stringTypeInfo) || keyTypeInfo.equals(TypeInfoFactory.byteTypeInfo)
            || keyTypeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
      return new DeepParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    } else {
      return new StandardParquetHiveMapInspector(getObjectInspector(keyTypeInfo), getObjectInspector(valueTypeInfo));
    }
  } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
    throw new UnsupportedOperationException("timestamp not implemented yet");
  } else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetByteInspector;
  } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
    return ParquetPrimitiveInspectorFactory.parquetShortInspector;
  } else {
    throw new IllegalArgumentException("Unknown field info: " + typeInfo);
  }

}
 
Example #27
Source File: ParquetHiveSerDe.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}
 
Example #28
Source File: HiveSchemaConverter.java    From kite with Apache License 2.0 5 votes vote down vote up
private static String doc(TypeInfo type) {
  if (type instanceof StructTypeInfo) {
    // don't add struct<a:t1,b:t2> when fields a and b will have doc strings
    return null;
  }
  return "Converted from '" + String.valueOf(type) + "'";
}
 
Example #29
Source File: HiveSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static Type convertType(final String name, final TypeInfo typeInfo, final Repetition repetition) {
  if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
    if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name);
    } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
        typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT32, name);
    } else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.INT64, name);
    } else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.DOUBLE, name);
    } else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.FLOAT, name);
    } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
      return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name);
    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
      // TODO : binaryTypeInfo is a byte array. Need to map it
      throw new UnsupportedOperationException("Binary type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
      throw new UnsupportedOperationException("Timestamp type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
      throw new UnsupportedOperationException("Void type not implemented");
    } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
      throw new UnsupportedOperationException("Unknown type not implemented");
    } else {
      throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
  } else if (typeInfo.getCategory().equals(Category.LIST)) {
    return convertArrayType(name, (ListTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.STRUCT)) {
    return convertStructType(name, (StructTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.MAP)) {
    return convertMapType(name, (MapTypeInfo) typeInfo);
  } else if (typeInfo.getCategory().equals(Category.UNION)) {
    throw new UnsupportedOperationException("Union type not implemented");
  } else {
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
  }
}
 
Example #30
Source File: HiveType.java    From presto with Apache License 2.0 5 votes vote down vote up
public static boolean isSupportedType(TypeInfo typeInfo, StorageFormat storageFormat)
{
    switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            return getPrimitiveType((PrimitiveTypeInfo) typeInfo) != null;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            return isSupportedType(mapTypeInfo.getMapKeyTypeInfo(), storageFormat) && isSupportedType(mapTypeInfo.getMapValueTypeInfo(), storageFormat);
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            return isSupportedType(listTypeInfo.getListElementTypeInfo(), storageFormat);
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            return structTypeInfo.getAllStructFieldTypeInfos().stream()
                    .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
        case UNION:
            // This feature (reading uniontypes as structs) has only been verified against Avro and ORC tables. Here's a discussion:
            //   1. Avro tables are supported and verified.
            //   2. ORC tables are supported and verified.
            //   3. The Parquet format doesn't support uniontypes itself so there's no need to add support for it in Presto.
            //   4. TODO: RCFile tables are not supported yet.
            //   5. TODO: The support for Avro is done in SerDeUtils so it's possible that formats other than Avro are also supported. But verification is needed.
            if (storageFormat.getSerDe().equalsIgnoreCase(AVRO.getSerDe()) || storageFormat.getSerDe().equalsIgnoreCase(ORC.getSerDe())) {
                UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
                return unionTypeInfo.getAllUnionObjectTypeInfos().stream()
                        .allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
            }
    }
    return false;
}