Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: HiveTableUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a Flink's TableSchema from Hive table's columns and partition keys.
 */
public static TableSchema createTableSchema(List<FieldSchema> cols, List<FieldSchema> partitionKeys) {
	List<FieldSchema> allCols = new ArrayList<>(cols);
	allCols.addAll(partitionKeys);

	String[] colNames = new String[allCols.size()];
	DataType[] colTypes = new DataType[allCols.size()];

	for (int i = 0; i < allCols.size(); i++) {
		FieldSchema fs = allCols.get(i);

		colNames[i] = fs.getName();
		colTypes[i] = HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
	}

	return TableSchema.builder()
			.fields(colNames, colTypes)
			.build();
}
 
Example 2
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData =
					getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example 3
Source Project: flink   Source File: HiveGenericUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void openInternal() {

	LOG.info("Open HiveGenericUDF as {}", hiveFunctionWrapper.getClassName());

	function = hiveFunctionWrapper.createFunction();

	try {
		returnInspector = function.initializeAndFoldConstants(
			HiveInspectors.toInspectors(constantArguments, argTypes));
	} catch (UDFArgumentException e) {
		throw new FlinkHiveUDFException(e);
	}

	deferredObjects = new GenericUDF.DeferredObject[argTypes.length];

	for (int i = 0; i < deferredObjects.length; i++) {
		deferredObjects[i] = new DeferredObjectAdapter(
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
				HiveTypeUtil.toHiveTypeInfo(argTypes[i])),
			argTypes[i].getLogicalType()
		);
	}
}
 
Example 4
Source Project: flink   Source File: HiveGenericUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public DataType getHiveResultType(Object[] constantArguments, DataType[] argTypes) {
	LOG.info("Getting result type of HiveGenericUDF from {}", hiveFunctionWrapper.getClassName());

	try {
		ObjectInspector[] argumentInspectors = HiveInspectors.toInspectors(constantArguments, argTypes);

		ObjectInspector resultObjectInspector =
			hiveFunctionWrapper.createFunction().initializeAndFoldConstants(argumentInspectors);

		return HiveTypeUtil.toFlinkType(
			TypeInfoUtils.getTypeInfoFromObjectInspector(resultObjectInspector));
	} catch (UDFArgumentException e) {
		throw new FlinkHiveUDFException(e);
	}
}
 
Example 5
@Test
public void generateEventHiveRecordLimited() throws Exception {
    Map<Writable, Writable> map = new MapWritable();
    map.put(new Text("one"), new IntWritable(1));
    map.put(new Text("two"), new IntWritable(2));
    map.put(new Text("three"), new IntWritable(3));

    HiveType tuple = new HiveType(map, TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
            TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo)));

    SerializationEventConverter eventConverter = new SerializationEventConverter();

    SerializationFailure iaeFailure = new SerializationFailure(new IllegalArgumentException("garbage"), tuple, new ArrayList<String>());

    String rawEvent = eventConverter.getRawEvent(iaeFailure);
    assertThat(rawEvent, startsWith("HiveType{[email protected]"));
    String timestamp = eventConverter.getTimestamp(iaeFailure);
    assertTrue(StringUtils.hasText(timestamp));
    assertTrue(DateUtils.parseDate(timestamp).getTime().getTime() > 1L);
    String exceptionType = eventConverter.renderExceptionType(iaeFailure);
    assertEquals("illegal_argument_exception", exceptionType);
    String exceptionMessage = eventConverter.renderExceptionMessage(iaeFailure);
    assertEquals("garbage", exceptionMessage);
    String eventMessage = eventConverter.renderEventMessage(iaeFailure);
    assertEquals("Could not construct bulk entry from record", eventMessage);
}
 
Example 6
Source Project: localization_nifi   Source File: TestNiFiOrcUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_getWritable() throws Exception {
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[]{1, 2, 3}) instanceof List);
    assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List);
    Map<String, Float> map = new HashMap<>();
    map.put("Hello", 1.0f);
    map.put("World", 2.0f);

    Object writable = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map);
    assertTrue(writable instanceof MapWritable);
    MapWritable mapWritable = (MapWritable) writable;
    mapWritable.forEach((key, value) -> {
        assertTrue(key instanceof Text);
        assertTrue(value instanceof FloatWritable);
    });
}
 
Example 7
@Override
    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
        //判断参数个数
        if (parameters.length != 2) {
            throw new UDFArgumentTypeException(parameters.length - 1, "Two argument is excepted.");
        }

        ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
//        ObjectInspector oi1 = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[1]);
        if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(0, "Argument must be PRIMITIVE, but"
                + oi.getCategory().name()
                + " was passed.");
        }

//        PrimitiveObjectInspector inputOI = (PrimitiveObjectInspector) oi;
//        if (inputOI.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
//            throw new UDFArgumentTypeException(0, "Argument must be String, but"
//                    + inputOI.getPrimitiveCategory().name()
//                    + " was passed.");
//        }

        return new AllActionsOfThisPeople30MinBefore();
    }
 
Example 8
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){
  Set<String> columnNameSet = new HashSet<String>();
  for( String columnName : projectionColumnNames.split(",") ){
    columnNameSet.add( columnName );
  }

  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  String[] splitNames = columnNameProperty.split(",");

  ArrayList<String> projectionColumnNameList = new ArrayList<String>();
  ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>();
  for( int i = 0 ; i < fieldTypes.size() ; i++ ){
    if( columnNameSet.contains( splitNames[i] ) ){
      projectionColumnNameList.add( splitNames[i] );
      projectionFieldTypeList.add( fieldTypes.get(i) );
    }
    filedIndexMap.put( splitNames[i] , i );
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( projectionColumnNameList );
  rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );

  return rootType;
}
 
Example 9
Source Project: metacat   Source File: HiveTypeConverter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Type toMetacatType(final String type) {
    // Hack to fix presto "varchar" type coming in with no length which is required by Hive.
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
        "varchar".equals(type.toLowerCase()) ? serdeConstants.STRING_TYPE_NAME : type);
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
    // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
    // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
    // their original case
    if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
        final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi;
        oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector(
            structTypeInfo.getAllStructFieldNames(), objectInspector);
    }
    return getCanonicalType(oi);
}
 
Example 10
Source Project: incubator-hivemall   Source File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Nullable
public static String[] getConstStringArray(@Nonnull final ObjectInspector oi)
        throws UDFArgumentException {
    if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
        throw new UDFArgumentException("argument must be a constant value: "
                + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
    }
    ConstantObjectInspector constOI = (ConstantObjectInspector) oi;
    if (constOI.getCategory() != Category.LIST) {
        throw new UDFArgumentException(
            "argument must be an array: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
    }
    final List<?> lst = (List<?>) constOI.getWritableConstantValue();
    if (lst == null) {
        return null;
    }
    final int size = lst.size();
    final String[] ary = new String[size];
    for (int i = 0; i < size; i++) {
        Object o = lst.get(i);
        if (o != null) {
            ary[i] = o.toString();
        }
    }
    return ary;
}
 
Example 11
Source Project: elasticsearch-hadoop   Source File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
        inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
 
Example 12
Source Project: incubator-hivemall   Source File: JsonSerdeUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMapValues() throws SerDeException {
    List<String> columnNames = Arrays.asList("a,b".split(","));
    List<TypeInfo> columnTypes =
            TypeInfoUtils.getTypeInfosFromTypeString("array<string>,map<string,int>");

    Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} ");
    Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}");
    Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}");

    List<Object> expected1 = Arrays.<Object>asList(Arrays.<String>asList("aaa"),
        createHashMapStringInteger("bbb", 1));
    List<Object> expected2 = Arrays.<Object>asList(Arrays.<String>asList("yyy"),
        createHashMapStringInteger("zzz", 123));
    List<Object> expected3 = Arrays.<Object>asList(Arrays.<String>asList("a"),
        createHashMapStringInteger("x", 11, "y", 22, "z", null));

    List<Object> result1 = JsonSerdeUtils.deserialize(text1, columnNames, columnTypes);
    List<Object> result2 = JsonSerdeUtils.deserialize(text2, columnNames, columnTypes);
    List<Object> result3 = JsonSerdeUtils.deserialize(text3, columnNames, columnTypes);

    Assert.assertEquals(expected1, result1);
    Assert.assertEquals(expected2, result2);
    Assert.assertEquals(expected3, result3);
}
 
Example 13
Source Project: incubator-hivemall   Source File: JsonSerdeUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTopLevelArray() throws Exception {
    List<String> expected1 = Arrays.asList("Taro", "Tanaka");
    Text json1 = new Text("[\"Taro\",\"Tanaka\"]");
    TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("array<string>");

    List<Object> deserialized1 = JsonSerdeUtils.deserialize(json1, type1);
    assertRecordEquals(expected1, deserialized1);
    Text serialized1 = JsonSerdeUtils.serialize(deserialized1,
        HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1));
    Assert.assertEquals(json1, serialized1);

    List<Double> expected2 = Arrays.asList(1.1d, 2.2d, 3.3d);
    Text json2 = new Text("[1.1,2.2,3.3]");
    TypeInfo type2 = TypeInfoUtils.getTypeInfoFromTypeString("array<double>");

    List<Object> deserialized2 = JsonSerdeUtils.deserialize(json2, type2);
    assertRecordEquals(expected2, deserialized2);
    Text serialized2 = JsonSerdeUtils.serialize(deserialized2,
        HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type2));
    Assert.assertEquals(json2, serialized2);
}
 
Example 14
Source Project: incubator-hivemall   Source File: JsonSerdeUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTopLevelPrimitive() throws Exception {
    Double expected1 = Double.valueOf(3.3);
    Text json1 = new Text("3.3");
    TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("double");

    Object deserialized1 = JsonSerdeUtils.deserialize(json1, type1);
    Assert.assertEquals(expected1, deserialized1);
    Text serialized1 = JsonSerdeUtils.serialize(deserialized1,
        HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1));
    Assert.assertEquals(json1, serialized1);

    Boolean expected2 = Boolean.FALSE;
    Text json2 = new Text("false");

    Boolean deserialized2 = JsonSerdeUtils.deserialize(json2);
    Assert.assertEquals(expected2, deserialized2);
    Text serialized2 = JsonSerdeUtils.serialize(deserialized2,
        PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
    Assert.assertEquals(json2, serialized2);
}
 
Example 15
Source Project: bigdata-tutorial   Source File: JSONCDHSerDe.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example 16
Source Project: flink   Source File: HiveTableUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a Flink's TableSchema from Hive table's columns and partition keys.
 */
public static TableSchema createTableSchema(List<FieldSchema> cols, List<FieldSchema> partitionKeys,
		Set<String> notNullColumns, UniqueConstraint primaryKey) {
	List<FieldSchema> allCols = new ArrayList<>(cols);
	allCols.addAll(partitionKeys);

	String[] colNames = new String[allCols.size()];
	DataType[] colTypes = new DataType[allCols.size()];

	for (int i = 0; i < allCols.size(); i++) {
		FieldSchema fs = allCols.get(i);

		colNames[i] = fs.getName();
		colTypes[i] = HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
		if (notNullColumns.contains(colNames[i])) {
			colTypes[i] = colTypes[i].notNull();
		}
	}

	TableSchema.Builder builder = TableSchema.builder().fields(colNames, colTypes);
	if (primaryKey != null) {
		builder.primaryKey(primaryKey.getName(), primaryKey.getColumns().toArray(new String[0]));
	}
	return builder.build();
}
 
Example 17
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 6 votes vote down vote up
private static ColumnStatistics createHiveColumnStatistics(
		Map<String, CatalogColumnStatisticsDataBase> colStats,
		StorageDescriptor sd,
		ColumnStatisticsDesc desc,
		String hiveVersion) {
	List<ColumnStatisticsObj> colStatsList = new ArrayList<>();

	for (FieldSchema field : sd.getCols()) {
		String hiveColName = field.getName();
		String hiveColType = field.getType();
		CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName());
		if (null != flinkColStat) {
			ColumnStatisticsData statsData = getColumnStatisticsData(
					HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)),
					flinkColStat,
					hiveVersion);
			ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData);
			colStatsList.add(columnStatisticsObj);
		}
	}

	return new ColumnStatistics(desc, colStatsList);
}
 
Example 18
Source Project: incubator-gobblin   Source File: OrcTestTools.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * AvroRow version of writeAsOrcBinary
 */
private void writeAsOrcBinary(OrcRowIterator input, TypeInfo schema, Path outputPath) throws IOException {
  Configuration configuration = new Configuration();

  // Note that it doesn't support schema evolution at all.
  // If the schema in realRow is inconsistent with given schema, writing into disk
  // would run into failure.
  ObjectInspector oi = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(schema);
  OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).inspector(oi);
  Writer writer = null;

  while (input.hasNext()) {
    AvroRow avroRow = (AvroRow) input.next();
    if (writer == null) {
      options.inspector(avroRow.getInspector());
      writer = OrcFile.createWriter(outputPath, options);
    }
    writer.addRow(avroRow.realRow);
  }
  if (writer != null) {
    writer.close();
  }
}
 
Example 19
Source Project: presto   Source File: TestHiveBucketing.java    License: Apache License 2.0 5 votes vote down vote up
private static int computeHive(BucketingVersion bucketingVersion, List<String> hiveTypeStrings, List<Object> hiveValues, List<TypeInfo> hiveTypeInfos)
{
    ImmutableList.Builder<Entry<ObjectInspector, Object>> columnBindingsBuilder = ImmutableList.builder();
    for (int i = 0; i < hiveTypeStrings.size(); i++) {
        Object javaValue = hiveValues.get(i);

        columnBindingsBuilder.add(Maps.immutableEntry(
                TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(hiveTypeInfos.get(i)),
                javaValue));
    }
    return getHiveBucketHashCode(bucketingVersion, columnBindingsBuilder.build());
}
 
Example 20
Source Project: spork   Source File: OrcStorage.java    License: Apache License 2.0 5 votes vote down vote up
private TypeInfo getTypeInfoFromLocation(String location, Job job) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path path = getFirstFile(location, fs);
    if (path == null) {
        log.info("Cannot find any ORC files from " + location +
                ". Probably multiple load store in script.");
        return null;
    }
    Reader reader = OrcFile.createReader(fs, path);
    ObjectInspector oip = (ObjectInspector)reader.getObjectInspector();
    return TypeInfoUtils.getTypeInfoFromObjectInspector(oip);
}
 
Example 21
Source Project: flink   Source File: HiveStatsUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a map of Flink column stats from the given Hive column stats.
 */
public static Map<String, CatalogColumnStatisticsDataBase> createCatalogColumnStats(@Nonnull List<ColumnStatisticsObj> hiveColStats) {
	checkNotNull(hiveColStats, "hiveColStats can not be null");
	Map<String, CatalogColumnStatisticsDataBase> colStats = new HashMap<>();
	for (ColumnStatisticsObj colStatsObj : hiveColStats) {
		CatalogColumnStatisticsDataBase columnStats = createTableColumnStats(
				HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(colStatsObj.getColType())),
				colStatsObj.getStatsData());
		colStats.put(colStatsObj.getColName(), columnStats);
	}

	return colStats;
}
 
Example 22
Source Project: incubator-iotdb   Source File: TsFileSerDe.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(@Nullable Configuration conf, Properties tbl) throws SerDeException {

  final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
  final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
  final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl
          .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);


  deviceId = tbl.getProperty(DEVICE_ID);


  if (columnNameProperty == null || columnNameProperty.isEmpty()
  || columnTypeProperty == null || columnTypeProperty.isEmpty()) {
    columnNames = Collections.emptyList();
    columnTypes = Collections.emptyList();
  }
  else {
    columnNames = StringInternUtils.internStringsInList(
            Arrays.asList(columnNameProperty.split(columnNameDelimiter)));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }

  // Check column and types equals
  if (columnTypes.size() != columnNames.size()) {
    throw new TsFileSerDeException("len(columnNames) != len(columnTypes)");
  }

  oi = createObjectInspector();
}
 
Example 23
Source Project: parquet-mr   Source File: TestHiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) {
  List<TypeInfo> columnTypes;

  if (columnsTypeStr.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr);
  }

  return columnTypes;
}
 
Example 24
Source Project: dremio-oss   Source File: HiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * iterates over all fields of a table and checks if any field exceeds
 * maximum allowed nested level
 * @param table
 * @param maxNestedLevels
 */
public static void checkFieldNestedLevels(final Table table, int maxNestedLevels) {
  for (FieldSchema hiveField : table.getSd().getCols()) {
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
    int depth = findFieldDepth(typeInfo);
    if (depth > maxNestedLevels) {
      throw new ColumnNestedTooDeepException(hiveField.getName(), maxNestedLevels);
    }
  }
}
 
Example 25
Source Project: dremio-oss   Source File: ManagedHiveSchema.java    License: Apache License 2.0 5 votes vote down vote up
public ManagedHiveSchema(final JobConf jobConf, final HiveReaderProto.HiveTableXattr tableXattr) {
  final java.util.Properties tableProperties = new java.util.Properties();
  HiveUtilities.addProperties(jobConf, tableProperties, HiveReaderProtoUtil.getTableProperties(tableXattr));
  final String fieldNameProp = Optional.ofNullable(tableProperties.getProperty("columns")).orElse("");
  final String fieldTypeProp = Optional.ofNullable(tableProperties.getProperty("columns.types")).orElse("");
  varcharTruncationEnabled = HiveDatasetOptions
      .enforceVarcharWidth(HiveReaderProtoUtil.convertValuesToNonProtoAttributeValues(tableXattr.getDatasetOptionMap()));

  final Iterator<String> fieldNames = Splitter.on(",").trimResults().split(fieldNameProp).iterator();
  final Iterator<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(fieldTypeProp).iterator();

  final Map<String, ManagedSchemaField> schemaFieldMap = new HashMap<>();
  final Map<String, TypeInfo> typeInfoMap = new HashMap<>();
  while (fieldNames.hasNext() && fieldTypes.hasNext()) {
    final String fieldName = fieldNames.next();
    final TypeInfo fieldType = fieldTypes.next();
    ManagedSchemaField field;
    if (fieldType instanceof DecimalTypeInfo) {
      field = ManagedSchemaField.newFixedLenField(fieldName, fieldType.getTypeName(),
        ((DecimalTypeInfo) fieldType).getPrecision(), ((DecimalTypeInfo) fieldType).getScale());
      typeInfoMap.put(fieldName, fieldType);
    } else if (fieldType instanceof BaseCharTypeInfo) {
      if (varcharTruncationEnabled) {
        field = ManagedSchemaField.newFixedLenField(fieldName, fieldType.getTypeName(),
            ((BaseCharTypeInfo) fieldType).getLength(), 0);
        typeInfoMap.put(fieldName, fieldType);
      } else {
        field = ManagedSchemaField.newUnboundedLenField(fieldName, fieldType.getTypeName());
      }
    } else {
      // Extend ManagedSchemaField.java in case granular information has to be stored.
      // No mention of len and scale means it is unbounded. So, we store max values.
      field = ManagedSchemaField.newUnboundedLenField(fieldName, fieldType.getTypeName());
      typeInfoMap.put(fieldName, fieldType);
    }
    schemaFieldMap.put(fieldName, field);
  }
  fieldInfo = CaseInsensitiveMap.newImmutableMap(schemaFieldMap);
  typeInfo = CaseInsensitiveMap.newImmutableMap(typeInfoMap);
}
 
Example 26
Source Project: dremio-oss   Source File: HiveMetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static boolean isFieldTypeVarchar(FieldSchema hiveField) {
  final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
  if (typeInfo.getCategory() == Category.PRIMITIVE) {
    PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo;
    if (pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR ||
      pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.CHAR) {
      return true;
    }
  }
  return false;
}
 
Example 27
Source Project: dremio-oss   Source File: HiveMetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static List<ColumnInfo> buildColumnInfo(final Table table, final InputFormat<?, ?> format, final boolean includeComplexParquetCols) {
  final List<ColumnInfo> columnInfos = new ArrayList<>();
  for (FieldSchema hiveField : table.getSd().getCols()) {
    final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
    Field f = HiveSchemaConverter.getArrowFieldFromHiveType(hiveField.getName(), typeInfo, format, includeComplexParquetCols);
    if (f != null) {
      columnInfos.add(getColumnInfo(typeInfo));
    }
  }
  return columnInfos;
}
 
Example 28
Source Project: dremio-oss   Source File: HiveMetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static boolean isFieldTypeVarchar(FieldSchema hiveField) {
  final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
  if (typeInfo.getCategory() == Category.PRIMITIVE) {
    PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo;
    if (pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR ||
      pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.CHAR) {
      return true;
    }
  }
  return false;
}
 
Example 29
@Test
public void generateEventHiveRecord() throws Exception {
    Map<Writable, Writable> map = new LinkedMapWritable();
    map.put(new Text("one"), new IntWritable(1));
    map.put(new Text("two"), new IntWritable(2));
    map.put(new Text("three"), new IntWritable(3));

    HiveType tuple = new HiveType(map, TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
            TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo)));

    SerializationEventConverter eventConverter = new SerializationEventConverter();

    SerializationFailure iaeFailure = new SerializationFailure(new IllegalArgumentException("garbage"), tuple, new ArrayList<String>());

    String rawEvent = eventConverter.getRawEvent(iaeFailure);
    assertThat(rawEvent, startsWith("HiveType{object={one=1, two=2, three=3}, " +
            "inspector=org.apach[email protected]"));
    String timestamp = eventConverter.getTimestamp(iaeFailure);
    assertTrue(StringUtils.hasText(timestamp));
    assertTrue(DateUtils.parseDate(timestamp).getTime().getTime() > 1L);
    String exceptionType = eventConverter.renderExceptionType(iaeFailure);
    assertEquals("illegal_argument_exception", exceptionType);
    String exceptionMessage = eventConverter.renderExceptionMessage(iaeFailure);
    assertEquals("garbage", exceptionMessage);
    String eventMessage = eventConverter.renderEventMessage(iaeFailure);
    assertEquals("Could not construct bulk entry from record", eventMessage);
}
 
Example 30
Source Project: nifi   Source File: TestNiFiOrcUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test_convertToORCObject() {
    Schema schema = SchemaBuilder.enumeration("myEnum").symbols("x", "y", "z");
    List<Object> objects = Arrays.asList(new Utf8("Hello"), new GenericData.EnumSymbol(schema, "x"));
    objects.forEach((avroObject) -> {
        Object o = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,string>"), avroObject);
        assertTrue(o instanceof UnionObject);
        UnionObject uo = (UnionObject) o;
        assertTrue(uo.getObject() instanceof Text);
    });
}