org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils Java Examples
The following examples show how to use
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: flink Author: flink-tpc-ds File: HiveTableUtil.java License: Apache License 2.0 | 6 votes |
/** * Create a Flink's TableSchema from Hive table's columns and partition keys. */ public static TableSchema createTableSchema(List<FieldSchema> cols, List<FieldSchema> partitionKeys) { List<FieldSchema> allCols = new ArrayList<>(cols); allCols.addAll(partitionKeys); String[] colNames = new String[allCols.size()]; DataType[] colTypes = new DataType[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { FieldSchema fs = allCols.get(i); colNames[i] = fs.getName(); colTypes[i] = HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); } return TableSchema.builder() .fields(colNames, colTypes) .build(); }
Example #2
Source Project: flink Author: flink-tpc-ds File: HiveStatsUtil.java License: Apache License 2.0 | 6 votes |
private static ColumnStatistics createHiveColumnStatistics( Map<String, CatalogColumnStatisticsDataBase> colStats, StorageDescriptor sd, ColumnStatisticsDesc desc) { List<ColumnStatisticsObj> colStatsList = new ArrayList<>(); for (FieldSchema field : sd.getCols()) { String hiveColName = field.getName(); String hiveColType = field.getType(); CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName()); if (null != flinkColStat) { ColumnStatisticsData statsData = getColumnStatisticsData(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData); colStatsList.add(columnStatisticsObj); } } return new ColumnStatistics(desc, colStatsList); }
Example #3
Source Project: flink Author: flink-tpc-ds File: HiveGenericUDF.java License: Apache License 2.0 | 6 votes |
@Override public void openInternal() { LOG.info("Open HiveGenericUDF as {}", hiveFunctionWrapper.getClassName()); function = hiveFunctionWrapper.createFunction(); try { returnInspector = function.initializeAndFoldConstants( HiveInspectors.toInspectors(constantArguments, argTypes)); } catch (UDFArgumentException e) { throw new FlinkHiveUDFException(e); } deferredObjects = new GenericUDF.DeferredObject[argTypes.length]; for (int i = 0; i < deferredObjects.length; i++) { deferredObjects[i] = new DeferredObjectAdapter( TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( HiveTypeUtil.toHiveTypeInfo(argTypes[i])), argTypes[i].getLogicalType() ); } }
Example #4
Source Project: flink Author: flink-tpc-ds File: HiveGenericUDF.java License: Apache License 2.0 | 6 votes |
@Override public DataType getHiveResultType(Object[] constantArguments, DataType[] argTypes) { LOG.info("Getting result type of HiveGenericUDF from {}", hiveFunctionWrapper.getClassName()); try { ObjectInspector[] argumentInspectors = HiveInspectors.toInspectors(constantArguments, argTypes); ObjectInspector resultObjectInspector = hiveFunctionWrapper.createFunction().initializeAndFoldConstants(argumentInspectors); return HiveTypeUtil.toFlinkType( TypeInfoUtils.getTypeInfoFromObjectInspector(resultObjectInspector)); } catch (UDFArgumentException e) { throw new FlinkHiveUDFException(e); } }
Example #5
Source Project: elasticsearch-hadoop Author: elastic File: HiveSerializationEventConverterTest.java License: Apache License 2.0 | 6 votes |
@Test public void generateEventHiveRecordLimited() throws Exception { Map<Writable, Writable> map = new MapWritable(); map.put(new Text("one"), new IntWritable(1)); map.put(new Text("two"), new IntWritable(2)); map.put(new Text("three"), new IntWritable(3)); HiveType tuple = new HiveType(map, TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo))); SerializationEventConverter eventConverter = new SerializationEventConverter(); SerializationFailure iaeFailure = new SerializationFailure(new IllegalArgumentException("garbage"), tuple, new ArrayList<String>()); String rawEvent = eventConverter.getRawEvent(iaeFailure); assertThat(rawEvent, startsWith("HiveType{[email protected]")); String timestamp = eventConverter.getTimestamp(iaeFailure); assertTrue(StringUtils.hasText(timestamp)); assertTrue(DateUtils.parseDate(timestamp).getTime().getTime() > 1L); String exceptionType = eventConverter.renderExceptionType(iaeFailure); assertEquals("illegal_argument_exception", exceptionType); String exceptionMessage = eventConverter.renderExceptionMessage(iaeFailure); assertEquals("garbage", exceptionMessage); String eventMessage = eventConverter.renderEventMessage(iaeFailure); assertEquals("Could not construct bulk entry from record", eventMessage); }
Example #6
Source Project: localization_nifi Author: wangrenlei File: TestNiFiOrcUtils.java License: Apache License 2.0 | 6 votes |
@Test public void test_getWritable() throws Exception { assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable); assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[]{1, 2, 3}) instanceof List); assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List); Map<String, Float> map = new HashMap<>(); map.put("Hello", 1.0f); map.put("World", 2.0f); Object writable = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map); assertTrue(writable instanceof MapWritable); MapWritable mapWritable = (MapWritable) writable; mapWritable.forEach((key, value) -> { assertTrue(key instanceof Text); assertTrue(value instanceof FloatWritable); }); }
Example #7
Source Project: 163-bigdate-note Author: jiaoqiyuan File: UDAFCollectAction.java License: GNU General Public License v3.0 | 6 votes |
@Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { //判断参数个数 if (parameters.length != 2) { throw new UDFArgumentTypeException(parameters.length - 1, "Two argument is excepted."); } ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); // ObjectInspector oi1 = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[1]); if (oi.getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, "Argument must be PRIMITIVE, but" + oi.getCategory().name() + " was passed."); } // PrimitiveObjectInspector inputOI = (PrimitiveObjectInspector) oi; // if (inputOI.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { // throw new UDFArgumentTypeException(0, "Argument must be String, but" // + inputOI.getPrimitiveCategory().name() // + " was passed."); // } return new AllActionsOfThisPeople30MinBefore(); }
Example #8
Source Project: multiple-dimension-spread Author: yahoojapan File: MDSSerde.java License: Apache License 2.0 | 6 votes |
private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ){ Set<String> columnNameSet = new HashSet<String>(); for( String columnName : projectionColumnNames.split(",") ){ columnNameSet.add( columnName ); } ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty ); String[] splitNames = columnNameProperty.split(","); ArrayList<String> projectionColumnNameList = new ArrayList<String>(); ArrayList<TypeInfo> projectionFieldTypeList = new ArrayList<TypeInfo>(); for( int i = 0 ; i < fieldTypes.size() ; i++ ){ if( columnNameSet.contains( splitNames[i] ) ){ projectionColumnNameList.add( splitNames[i] ); projectionFieldTypeList.add( fieldTypes.get(i) ); } filedIndexMap.put( splitNames[i] , i ); } StructTypeInfo rootType = new StructTypeInfo(); rootType.setAllStructFieldNames( projectionColumnNameList ); rootType.setAllStructFieldTypeInfos( projectionFieldTypeList ); return rootType; }
Example #9
Source Project: metacat Author: Netflix File: HiveTypeConverter.java License: Apache License 2.0 | 6 votes |
@Override public Type toMetacatType(final String type) { // Hack to fix presto "varchar" type coming in with no length which is required by Hive. final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( "varchar".equals(type.toLowerCase()) ? serdeConstants.STRING_TYPE_NAME : type); ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep // their original case if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) { final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi; oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector( structTypeInfo.getAllStructFieldNames(), objectInspector); } return getCanonicalType(oi); }
Example #10
Source Project: incubator-hivemall Author: apache File: HiveUtils.java License: Apache License 2.0 | 6 votes |
@Nullable public static String[] getConstStringArray(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!ObjectInspectorUtils.isConstantObjectInspector(oi)) { throw new UDFArgumentException("argument must be a constant value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } ConstantObjectInspector constOI = (ConstantObjectInspector) oi; if (constOI.getCategory() != Category.LIST) { throw new UDFArgumentException( "argument must be an array: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } final List<?> lst = (List<?>) constOI.getWritableConstantValue(); if (lst == null) { return null; } final int size = lst.size(); final String[] ary = new String[size]; for (int i = 0; i < size; i++) { Object o = lst.get(i); if (o != null) { ary[i] = o.toString(); } } return ary; }
Example #11
Source Project: elasticsearch-hadoop Author: elastic File: HiveUtils.java License: Apache License 2.0 | 6 votes |
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) { // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table) // use the class since StructType requires it ... List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ","); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES)); // create a standard writable Object Inspector - used later on by serialization/deserialization List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(); for (TypeInfo typeInfo : colTypes) { inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo)); } return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors); }
Example #12
Source Project: incubator-hivemall Author: apache File: JsonSerdeUtilsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testMapValues() throws SerDeException { List<String> columnNames = Arrays.asList("a,b".split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString("array<string>,map<string,int>"); Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} "); Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}"); Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}"); List<Object> expected1 = Arrays.<Object>asList(Arrays.<String>asList("aaa"), createHashMapStringInteger("bbb", 1)); List<Object> expected2 = Arrays.<Object>asList(Arrays.<String>asList("yyy"), createHashMapStringInteger("zzz", 123)); List<Object> expected3 = Arrays.<Object>asList(Arrays.<String>asList("a"), createHashMapStringInteger("x", 11, "y", 22, "z", null)); List<Object> result1 = JsonSerdeUtils.deserialize(text1, columnNames, columnTypes); List<Object> result2 = JsonSerdeUtils.deserialize(text2, columnNames, columnTypes); List<Object> result3 = JsonSerdeUtils.deserialize(text3, columnNames, columnTypes); Assert.assertEquals(expected1, result1); Assert.assertEquals(expected2, result2); Assert.assertEquals(expected3, result3); }
Example #13
Source Project: incubator-hivemall Author: apache File: JsonSerdeUtilsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testTopLevelArray() throws Exception { List<String> expected1 = Arrays.asList("Taro", "Tanaka"); Text json1 = new Text("[\"Taro\",\"Tanaka\"]"); TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("array<string>"); List<Object> deserialized1 = JsonSerdeUtils.deserialize(json1, type1); assertRecordEquals(expected1, deserialized1); Text serialized1 = JsonSerdeUtils.serialize(deserialized1, HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1)); Assert.assertEquals(json1, serialized1); List<Double> expected2 = Arrays.asList(1.1d, 2.2d, 3.3d); Text json2 = new Text("[1.1,2.2,3.3]"); TypeInfo type2 = TypeInfoUtils.getTypeInfoFromTypeString("array<double>"); List<Object> deserialized2 = JsonSerdeUtils.deserialize(json2, type2); assertRecordEquals(expected2, deserialized2); Text serialized2 = JsonSerdeUtils.serialize(deserialized2, HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type2)); Assert.assertEquals(json2, serialized2); }
Example #14
Source Project: incubator-hivemall Author: apache File: JsonSerdeUtilsTest.java License: Apache License 2.0 | 6 votes |
@Test public void testTopLevelPrimitive() throws Exception { Double expected1 = Double.valueOf(3.3); Text json1 = new Text("3.3"); TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("double"); Object deserialized1 = JsonSerdeUtils.deserialize(json1, type1); Assert.assertEquals(expected1, deserialized1); Text serialized1 = JsonSerdeUtils.serialize(deserialized1, HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1)); Assert.assertEquals(json1, serialized1); Boolean expected2 = Boolean.FALSE; Text json2 = new Text("false"); Boolean deserialized2 = JsonSerdeUtils.deserialize(json2); Assert.assertEquals(expected2, deserialized2); Text serialized2 = JsonSerdeUtils.serialize(deserialized2, PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); Assert.assertEquals(json2, serialized2); }
Example #15
Source Project: bigdata-tutorial Author: micmiu File: JSONCDHSerDe.java License: Apache License 2.0 | 6 votes |
/** * An initialization function used to gather information about the table. * Typically, a SerDe implementation will be interested in the list of * column names and their types. That information will be used to help perform * actual serialization and deserialization of data. */ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // Get a list of the table's column names. String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS); colNames = Arrays.asList(colNamesStr.split(",")); // Get a list of TypeInfos for the columns. This list lines up with // the list of column names. String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr); rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes); rowOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo); }
Example #16
Source Project: flink Author: apache File: HiveTableUtil.java License: Apache License 2.0 | 6 votes |
/** * Create a Flink's TableSchema from Hive table's columns and partition keys. */ public static TableSchema createTableSchema(List<FieldSchema> cols, List<FieldSchema> partitionKeys, Set<String> notNullColumns, UniqueConstraint primaryKey) { List<FieldSchema> allCols = new ArrayList<>(cols); allCols.addAll(partitionKeys); String[] colNames = new String[allCols.size()]; DataType[] colTypes = new DataType[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { FieldSchema fs = allCols.get(i); colNames[i] = fs.getName(); colTypes[i] = HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); if (notNullColumns.contains(colNames[i])) { colTypes[i] = colTypes[i].notNull(); } } TableSchema.Builder builder = TableSchema.builder().fields(colNames, colTypes); if (primaryKey != null) { builder.primaryKey(primaryKey.getName(), primaryKey.getColumns().toArray(new String[0])); } return builder.build(); }
Example #17
Source Project: flink Author: apache File: HiveStatsUtil.java License: Apache License 2.0 | 6 votes |
private static ColumnStatistics createHiveColumnStatistics( Map<String, CatalogColumnStatisticsDataBase> colStats, StorageDescriptor sd, ColumnStatisticsDesc desc, String hiveVersion) { List<ColumnStatisticsObj> colStatsList = new ArrayList<>(); for (FieldSchema field : sd.getCols()) { String hiveColName = field.getName(); String hiveColType = field.getType(); CatalogColumnStatisticsDataBase flinkColStat = colStats.get(field.getName()); if (null != flinkColStat) { ColumnStatisticsData statsData = getColumnStatisticsData( HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(hiveColType)), flinkColStat, hiveVersion); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(hiveColName, hiveColType, statsData); colStatsList.add(columnStatisticsObj); } } return new ColumnStatistics(desc, colStatsList); }
Example #18
Source Project: incubator-gobblin Author: apache File: OrcTestTools.java License: Apache License 2.0 | 6 votes |
/** * AvroRow version of writeAsOrcBinary */ private void writeAsOrcBinary(OrcRowIterator input, TypeInfo schema, Path outputPath) throws IOException { Configuration configuration = new Configuration(); // Note that it doesn't support schema evolution at all. // If the schema in realRow is inconsistent with given schema, writing into disk // would run into failure. ObjectInspector oi = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(schema); OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).inspector(oi); Writer writer = null; while (input.hasNext()) { AvroRow avroRow = (AvroRow) input.next(); if (writer == null) { options.inspector(avroRow.getInspector()); writer = OrcFile.createWriter(outputPath, options); } writer.addRow(avroRow.realRow); } if (writer != null) { writer.close(); } }
Example #19
Source Project: presto Author: prestosql File: TestHiveBucketing.java License: Apache License 2.0 | 5 votes |
private static int computeHive(BucketingVersion bucketingVersion, List<String> hiveTypeStrings, List<Object> hiveValues, List<TypeInfo> hiveTypeInfos) { ImmutableList.Builder<Entry<ObjectInspector, Object>> columnBindingsBuilder = ImmutableList.builder(); for (int i = 0; i < hiveTypeStrings.size(); i++) { Object javaValue = hiveValues.get(i); columnBindingsBuilder.add(Maps.immutableEntry( TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(hiveTypeInfos.get(i)), javaValue)); } return getHiveBucketHashCode(bucketingVersion, columnBindingsBuilder.build()); }
Example #20
Source Project: spork Author: sigmoidanalytics File: OrcStorage.java License: Apache License 2.0 | 5 votes |
private TypeInfo getTypeInfoFromLocation(String location, Job job) throws IOException { FileSystem fs = FileSystem.get(job.getConfiguration()); Path path = getFirstFile(location, fs); if (path == null) { log.info("Cannot find any ORC files from " + location + ". Probably multiple load store in script."); return null; } Reader reader = OrcFile.createReader(fs, path); ObjectInspector oip = (ObjectInspector)reader.getObjectInspector(); return TypeInfoUtils.getTypeInfoFromObjectInspector(oip); }
Example #21
Source Project: flink Author: flink-tpc-ds File: HiveStatsUtil.java License: Apache License 2.0 | 5 votes |
/** * Create a map of Flink column stats from the given Hive column stats. */ public static Map<String, CatalogColumnStatisticsDataBase> createCatalogColumnStats(@Nonnull List<ColumnStatisticsObj> hiveColStats) { checkNotNull(hiveColStats, "hiveColStats can not be null"); Map<String, CatalogColumnStatisticsDataBase> colStats = new HashMap<>(); for (ColumnStatisticsObj colStatsObj : hiveColStats) { CatalogColumnStatisticsDataBase columnStats = createTableColumnStats( HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(colStatsObj.getColType())), colStatsObj.getStatsData()); colStats.put(colStatsObj.getColName(), columnStats); } return colStats; }
Example #22
Source Project: incubator-iotdb Author: apache File: TsFileSerDe.java License: Apache License 2.0 | 5 votes |
@Override public void initialize(@Nullable Configuration conf, Properties tbl) throws SerDeException { final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); deviceId = tbl.getProperty(DEVICE_ID); if (columnNameProperty == null || columnNameProperty.isEmpty() || columnTypeProperty == null || columnTypeProperty.isEmpty()) { columnNames = Collections.emptyList(); columnTypes = Collections.emptyList(); } else { columnNames = StringInternUtils.internStringsInList( Arrays.asList(columnNameProperty.split(columnNameDelimiter))); columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } // Check column and types equals if (columnTypes.size() != columnNames.size()) { throw new TsFileSerDeException("len(columnNames) != len(columnTypes)"); } oi = createObjectInspector(); }
Example #23
Source Project: parquet-mr Author: apache File: TestHiveSchemaConverter.java License: Apache License 2.0 | 5 votes |
private List<TypeInfo> createHiveTypeInfoFrom(final String columnsTypeStr) { List<TypeInfo> columnTypes; if (columnsTypeStr.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnsTypeStr); } return columnTypes; }
Example #24
Source Project: dremio-oss Author: dremio File: HiveSchemaConverter.java License: Apache License 2.0 | 5 votes |
/** * iterates over all fields of a table and checks if any field exceeds * maximum allowed nested level * @param table * @param maxNestedLevels */ public static void checkFieldNestedLevels(final Table table, int maxNestedLevels) { for (FieldSchema hiveField : table.getSd().getCols()) { final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType()); int depth = findFieldDepth(typeInfo); if (depth > maxNestedLevels) { throw new ColumnNestedTooDeepException(hiveField.getName(), maxNestedLevels); } } }
Example #25
Source Project: dremio-oss Author: dremio File: ManagedHiveSchema.java License: Apache License 2.0 | 5 votes |
public ManagedHiveSchema(final JobConf jobConf, final HiveReaderProto.HiveTableXattr tableXattr) { final java.util.Properties tableProperties = new java.util.Properties(); HiveUtilities.addProperties(jobConf, tableProperties, HiveReaderProtoUtil.getTableProperties(tableXattr)); final String fieldNameProp = Optional.ofNullable(tableProperties.getProperty("columns")).orElse(""); final String fieldTypeProp = Optional.ofNullable(tableProperties.getProperty("columns.types")).orElse(""); varcharTruncationEnabled = HiveDatasetOptions .enforceVarcharWidth(HiveReaderProtoUtil.convertValuesToNonProtoAttributeValues(tableXattr.getDatasetOptionMap())); final Iterator<String> fieldNames = Splitter.on(",").trimResults().split(fieldNameProp).iterator(); final Iterator<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(fieldTypeProp).iterator(); final Map<String, ManagedSchemaField> schemaFieldMap = new HashMap<>(); final Map<String, TypeInfo> typeInfoMap = new HashMap<>(); while (fieldNames.hasNext() && fieldTypes.hasNext()) { final String fieldName = fieldNames.next(); final TypeInfo fieldType = fieldTypes.next(); ManagedSchemaField field; if (fieldType instanceof DecimalTypeInfo) { field = ManagedSchemaField.newFixedLenField(fieldName, fieldType.getTypeName(), ((DecimalTypeInfo) fieldType).getPrecision(), ((DecimalTypeInfo) fieldType).getScale()); typeInfoMap.put(fieldName, fieldType); } else if (fieldType instanceof BaseCharTypeInfo) { if (varcharTruncationEnabled) { field = ManagedSchemaField.newFixedLenField(fieldName, fieldType.getTypeName(), ((BaseCharTypeInfo) fieldType).getLength(), 0); typeInfoMap.put(fieldName, fieldType); } else { field = ManagedSchemaField.newUnboundedLenField(fieldName, fieldType.getTypeName()); } } else { // Extend ManagedSchemaField.java in case granular information has to be stored. // No mention of len and scale means it is unbounded. So, we store max values. field = ManagedSchemaField.newUnboundedLenField(fieldName, fieldType.getTypeName()); typeInfoMap.put(fieldName, fieldType); } schemaFieldMap.put(fieldName, field); } fieldInfo = CaseInsensitiveMap.newImmutableMap(schemaFieldMap); typeInfo = CaseInsensitiveMap.newImmutableMap(typeInfoMap); }
Example #26
Source Project: dremio-oss Author: dremio File: HiveMetadataUtils.java License: Apache License 2.0 | 5 votes |
private static boolean isFieldTypeVarchar(FieldSchema hiveField) { final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType()); if (typeInfo.getCategory() == Category.PRIMITIVE) { PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo; if (pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR || pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.CHAR) { return true; } } return false; }
Example #27
Source Project: dremio-oss Author: dremio File: HiveMetadataUtils.java License: Apache License 2.0 | 5 votes |
private static List<ColumnInfo> buildColumnInfo(final Table table, final InputFormat<?, ?> format, final boolean includeComplexParquetCols) { final List<ColumnInfo> columnInfos = new ArrayList<>(); for (FieldSchema hiveField : table.getSd().getCols()) { final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType()); Field f = HiveSchemaConverter.getArrowFieldFromHiveType(hiveField.getName(), typeInfo, format, includeComplexParquetCols); if (f != null) { columnInfos.add(getColumnInfo(typeInfo)); } } return columnInfos; }
Example #28
Source Project: dremio-oss Author: dremio File: HiveMetadataUtils.java License: Apache License 2.0 | 5 votes |
private static boolean isFieldTypeVarchar(FieldSchema hiveField) { final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType()); if (typeInfo.getCategory() == Category.PRIMITIVE) { PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo; if (pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR || pTypeInfo.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.CHAR) { return true; } } return false; }
Example #29
Source Project: elasticsearch-hadoop Author: elastic File: HiveSerializationEventConverterTest.java License: Apache License 2.0 | 5 votes |
@Test public void generateEventHiveRecord() throws Exception { Map<Writable, Writable> map = new LinkedMapWritable(); map.put(new Text("one"), new IntWritable(1)); map.put(new Text("two"), new IntWritable(2)); map.put(new Text("three"), new IntWritable(3)); HiveType tuple = new HiveType(map, TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo))); SerializationEventConverter eventConverter = new SerializationEventConverter(); SerializationFailure iaeFailure = new SerializationFailure(new IllegalArgumentException("garbage"), tuple, new ArrayList<String>()); String rawEvent = eventConverter.getRawEvent(iaeFailure); assertThat(rawEvent, startsWith("HiveType{object={one=1, two=2, three=3}, " + "inspector=org.apach[email protected]")); String timestamp = eventConverter.getTimestamp(iaeFailure); assertTrue(StringUtils.hasText(timestamp)); assertTrue(DateUtils.parseDate(timestamp).getTime().getTime() > 1L); String exceptionType = eventConverter.renderExceptionType(iaeFailure); assertEquals("illegal_argument_exception", exceptionType); String exceptionMessage = eventConverter.renderExceptionMessage(iaeFailure); assertEquals("garbage", exceptionMessage); String eventMessage = eventConverter.renderEventMessage(iaeFailure); assertEquals("Could not construct bulk entry from record", eventMessage); }
Example #30
Source Project: nifi Author: apache File: TestNiFiOrcUtils.java License: Apache License 2.0 | 5 votes |
@Test public void test_convertToORCObject() { Schema schema = SchemaBuilder.enumeration("myEnum").symbols("x", "y", "z"); List<Object> objects = Arrays.asList(new Utf8("Hello"), new GenericData.EnumSymbol(schema, "x")); objects.forEach((avroObject) -> { Object o = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,string>"), avroObject); assertTrue(o instanceof UnionObject); UnionObject uo = (UnionObject) o; assertTrue(uo.getObject() instanceof Text); }); }