Java Code Examples for org.apache.hadoop.hive.serde2.typeinfo.TypeInfo

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.TypeInfo. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: incubator-gobblin   Source File: HiveOrcSerDeManager.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extensible if there's other source-of-truth for fetching schema instead of interacting with HDFS.
 *
 * For purpose of initializing {@link org.apache.hadoop.hive.ql.io.orc.OrcSerde} object, it will require:
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMNS and
 * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMN_TYPES
 *
 */
protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit hiveUnit) throws IOException {
  TypeInfo schema = getSchemaFromLatestFile(path, this.fs);
  if (schema instanceof StructTypeInfo) {
    StructTypeInfo structTypeInfo = (StructTypeInfo) schema;
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMNS,
        Joiner.on(",").join(structTypeInfo.getAllStructFieldNames()));
    hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMN_TYPES,
        Joiner.on(",").join(
            structTypeInfo.getAllStructFieldTypeInfos().stream().map(x -> x.getTypeName())
                .collect(Collectors.toList())));
  } else {
    // Hive always uses a struct with a field for each of the top-level columns as the root object type.
    // So for here we assume to-be-registered ORC files follow this pattern.
    throw new IllegalStateException("A valid ORC schema should be an instance of struct");
  }
}
 
Example 2
Source Project: incubator-hivemall   Source File: AUCUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    if (HiveUtils.isNumberTypeInfo(typeInfo[0]) && HiveUtils.isIntegerTypeInfo(typeInfo[1])) {
        return new ClassificationEvaluator();
    } else {
        ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
        if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
            throw new UDFArgumentTypeException(0,
                "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
        }

        ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
        if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
            throw new UDFArgumentTypeException(1,
                "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
        }

        return new RankingEvaluator();
    }
}
 
Example 3
Source Project: nifi   Source File: PutORC.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema)
        throws IOException, SchemaNotFoundException {

    final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
    final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
    final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
    final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean();
    TypeInfo orcSchema = NiFiOrcUtils.getOrcSchema(schema, normalizeForHive);
    final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize);
    final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet()
            ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue()
            : NiFiOrcUtils.normalizeHiveTableName(schema.getIdentifier().getName().orElse("unknown"));
    final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean();

    return new ORCHDFSRecordWriter(orcWriter, schema, hiveTableName, hiveFieldNames);
}
 
Example 4
Source Project: Cobol-to-Hive   Source File: CobolDeserializer.java    License: Apache License 2.0 6 votes vote down vote up
private Object deserializeList(String columnName, ListTypeInfo columnType)
		throws RuntimeException {
	int size = Integer.parseInt(rowElements.get(propertiesList.get(fieldNo).get("dcol")));
	
	List<Object> listContents = new ArrayList<Object>();
	TypeInfo ti = columnType.getListElementTypeInfo();
	String tn = columnType.getTypeName();
	rowElements.add("");
	int tempfieldNo = fieldNo, fieldNoList=fieldNo; 
	for (int j = 0; j < size; j++) {
			listContents.add(worker(tn,ti));
			fieldNoList = fieldNo;
			fieldNo = tempfieldNo;
	}
	fieldNo = fieldNoList;
	return listContents;

}
 
Example 5
Source Project: incubator-hivemall   Source File: OnehotEncodingUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] argTypes)
        throws SemanticException {
    final int numFeatures = argTypes.length;
    if (numFeatures == 0) {
        throw new UDFArgumentException("_FUNC_ requires at least 1 argument");
    }
    for (int i = 0; i < numFeatures; i++) {
        if (argTypes[i] == null) {
            throw new UDFArgumentTypeException(i,
                "Null type is found. Only primitive type arguments are accepted.");
        }
        if (argTypes[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(i,
                "Only primitive type arguments are accepted but " + argTypes[i].getTypeName()
                        + " was passed as parameter 1.");
        }
    }

    return new GenericUDAFOnehotEncodingEvaluator();
}
 
Example 6
Source Project: nifi   Source File: TestNiFiOrcUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_getPrimitiveOrcTypeFromPrimitiveAvroType() throws Exception {
    // Expected ORC types
    TypeInfo[] expectedTypes = {
            TypeInfoCreator.createInt(),
            TypeInfoCreator.createLong(),
            TypeInfoCreator.createBoolean(),
            TypeInfoCreator.createFloat(),
            TypeInfoCreator.createDouble(),
            TypeInfoCreator.createBinary(),
            TypeInfoCreator.createString(),
    };

    Schema testSchema = buildPrimitiveAvroSchema();
    List<Schema.Field> fields = testSchema.getFields();
    for (int i = 0; i < fields.size(); i++) {
        assertEquals(expectedTypes[i], NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(fields.get(i).schema().getType()));
    }
}
 
Example 7
Source Project: hadoop-etl-udfs   Source File: ExaParquetWriterImpl.java    License: MIT License 6 votes vote down vote up
public ExaParquetWriterImpl(final List<String> colNames,
                            final List<TypeInfo> colTypes,
                            final Configuration conf,
                            final Path path,
                            final String compressionType,
                            final ExaIterator exa,
                            final int firstColumnIndex,
                            final List<Integer> dynamicPartitionExaColNums) throws Exception {
    this(HiveSchemaConverter.convert(colNames, colTypes),
            colNames.size(),
            conf,
            path,
            compressionType,
            exa,
            firstColumnIndex,
            dynamicPartitionExaColNums);
}
 
Example 8
Source Project: nifi   Source File: TestNiFiOrcUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_getOrcField_record() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("int").type().intType().noDefault();
    builder.name("long").type().longType().longDefault(1L);
    builder.name("array").type().array().items().stringType().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema);
    assertEquals(
            TypeInfoFactory.getStructTypeInfo(
                    Arrays.asList("int", "long", "array"),
                    Arrays.asList(
                            TypeInfoCreator.createInt(),
                            TypeInfoCreator.createLong(),
                            TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
            orcType);
}
 
Example 9
public HiveVectorizedReaderSetting( final FileSplit split , final JobConf job , final HiveReaderSetting hiveReaderConfig ) throws IOException{
  this.hiveReaderConfig = hiveReaderConfig;

  rbCtx = Utilities.getVectorizedRowBatchCtx( job );
  partitionValues = new Object[rbCtx.getPartitionColumnCount()];
  if( 0 < partitionValues.length ){
    rbCtx.getPartitionValues( rbCtx, job, split, partitionValues );
  }

  TypeInfo[] typeInfos = rbCtx.getRowColumnTypeInfos();
  columnNames = rbCtx.getRowColumnNames();
  needColumnIds = createNeedColumnId( ColumnProjectionUtils.getReadColumnIDs( job ) );

  projectionColumn = new boolean[columnNames.length];
  assignors = new IColumnVectorAssignor[columnNames.length];
  for( int id : needColumnIds ){
    projectionColumn[id] = true;
    assignors[id] = ColumnVectorAssignorFactory.create( typeInfos[id] );
  }
}
 
Example 10
Source Project: nifi   Source File: NiFiOrcUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static TypeInfo getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type avroType) throws IllegalArgumentException {
    if (avroType == null) {
        throw new IllegalArgumentException("Avro type is null");
    }
    switch (avroType) {
        case INT:
            return TypeInfoFactory.getPrimitiveTypeInfo("int");
        case LONG:
            return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
        case BOOLEAN:
        case NULL: // ORC has no null type, so just pick the smallest. All values are necessarily null.
            return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
        case BYTES:
            return TypeInfoFactory.getPrimitiveTypeInfo("binary");
        case DOUBLE:
            return TypeInfoFactory.getPrimitiveTypeInfo("double");
        case FLOAT:
            return TypeInfoFactory.getPrimitiveTypeInfo("float");
        case STRING:
            return TypeInfoFactory.getPrimitiveTypeInfo("string");
        default:
            throw new IllegalArgumentException("Avro type " + avroType.getName() + " is not a primitive type");
    }
}
 
Example 11
Source Project: flink   Source File: HiveTypeUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TypeInfo visit(CharType charType) {
	// Flink and Hive have different length limit for CHAR. Promote it to STRING if it exceeds the limits of
	// Hive and we're told not to check precision. This can be useful when calling Hive UDF to process data.
	if (charType.getLength() > HiveChar.MAX_CHAR_LENGTH || charType.getLength() < 1) {
		if (checkPrecision) {
			throw new CatalogException(
					String.format("HiveCatalog doesn't support char type with length of '%d'. " +
									"The supported length is [%d, %d]",
							charType.getLength(), 1, HiveChar.MAX_CHAR_LENGTH));
		} else {
			return TypeInfoFactory.stringTypeInfo;
		}
	}
	return TypeInfoFactory.getCharTypeInfo(charType.getLength());
}
 
Example 12
Source Project: Cobol-to-Hive   Source File: CobolGroupField.java    License: Apache License 2.0 6 votes vote down vote up
public List<TypeInfo> getHiveColumnTypes() {
	List<TypeInfo> hiveColumnNames = new ArrayList<TypeInfo>();
	int count = occurs;
	while(count>0) {
		for (CobolField cf : subfields) {
			if (cf.getType().isInGroup(CobolFieldType.Group.ELEMENTARY)) {
				hiveColumnNames.add(cf.getTypeInfo());
			} else {
				hiveColumnNames.addAll(((CobolGroupField) cf)
						.getHiveColumnTypes());
			}
		}
		count--;
	}
	return hiveColumnNames;
}
 
Example 13
Source Project: multiple-dimension-spread   Source File: MDSSerde.java    License: Apache License 2.0 6 votes vote down vote up
private StructTypeInfo getAllReadTypeInfo( final String columnNameProperty , final String columnTypeProperty ){
  ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
  ArrayList<String> columnNames = new ArrayList<String>();
  if ( columnNameProperty != null && 0 < columnNameProperty.length() ) {
    String[] columnNameArray = columnNameProperty.split(",");
    for( int i = 0 ; i < columnNameArray.length ; i++ ){
      columnNames.add( columnNameArray[i] );
      filedIndexMap.put( columnNameArray[i] , i );
    }
  }
  StructTypeInfo rootType = new StructTypeInfo();

  rootType.setAllStructFieldNames( columnNames );
  rootType.setAllStructFieldTypeInfos( fieldTypes );

  return rootType;
}
 
Example 14
Source Project: incubator-hivemall   Source File: MAPUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(0,
            "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
    }
    ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(1,
            "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
    }

    return new Evaluator();
}
 
Example 15
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull final TypeInfo[] typeInfo)
        throws SemanticException {
    switch (typeInfo.length) {
        case 1: {
            if (!HiveUtils.isIntegerTypeInfo(typeInfo[0])) {
                throw new UDFArgumentTypeException(0, "Expected INT for yhat: " + typeInfo[0]);
            }
            return new RfEvaluatorV1();
        }
        case 3:
            if (!HiveUtils.isFloatingPointTypeInfo(typeInfo[2])) {
                throw new UDFArgumentTypeException(2,
                    "Expected DOUBLE or FLOAT for model_weight: " + typeInfo[2]);
            }
            /* fall through */
        case 2: {// typeInfo.length == 2 || typeInfo.length == 3
            if (!HiveUtils.isIntegerTypeInfo(typeInfo[0])) {
                throw new UDFArgumentTypeException(0, "Expected INT for yhat: " + typeInfo[0]);
            }
            if (!HiveUtils.isFloatingPointListTypeInfo(typeInfo[1])) {
                throw new UDFArgumentTypeException(1,
                    "ARRAY<double> is expected for a posteriori: " + typeInfo[1]);
            }
            return new RfEvaluatorV2();
        }
        default:
            throw new UDFArgumentLengthException(
                "Expected 1~3 arguments but got " + typeInfo.length);
    }
}
 
Example 16
Source Project: elasticsearch-hadoop   Source File: HiveTypeToJsonTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testStruct() {
    List<String> names = Arrays.asList(new String[] { "one", "two" });
    List<TypeInfo> types = Arrays.asList(new TypeInfo[] { stringTypeInfo, intTypeInfo });
    assertEquals("{\"one\":\"first\",\"two\":2}",
            hiveTypeToJson(new MyHiveType(Arrays.asList(new Object[] { new Text("first"), new IntWritable(2) }),
                    getStructTypeInfo(names, types))));
}
 
Example 17
Source Project: flink   Source File: HiveTypeUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TypeInfo visit(VarBinaryType varBinaryType) {
	// Flink's BytesType is defined as VARBINARY(Integer.MAX_VALUE)
	// We don't have more information in LogicalTypeRoot to distinguish BytesType and a VARBINARY(Integer.MAX_VALUE) instance
	// Thus always treat VARBINARY(Integer.MAX_VALUE) as BytesType
	if (varBinaryType.getLength() == VarBinaryType.MAX_LENGTH) {
		return TypeInfoFactory.binaryTypeInfo;
	}
	return defaultMethod(varBinaryType);
}
 
Example 18
Source Project: parquet-mr   Source File: TestHiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private void testConversion(final String columnNamesStr, final String columnsTypeStr, final String expectedSchema) throws Exception {
  final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
  final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
  final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
  final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema);
  assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound);
}
 
Example 19
Source Project: localization_nifi   Source File: TestNiFiOrcUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example 20
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 1 && typeInfo.length != 2) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes one or two arguments");
    }
    if (typeInfo.length == 2 && !HiveUtils.isStringTypeInfo(typeInfo[1])) {
        throw new UDFArgumentTypeException(1,
            "The second argument type expected to be const string: " + typeInfo[1]);
    }

    return new HLLEvaluator();
}
 
Example 21
Source Project: streamx   Source File: HiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
public static TypeInfo convertStruct(Schema schema) {
  final List<Field> fields = schema.fields();
  final List<String> names = new ArrayList<>(fields.size());
  final List<TypeInfo> types = new ArrayList<>(fields.size());
  for (Field field : fields) {
    names.add(field.name());
    types.add(convert(field.schema()));
  }
  return TypeInfoFactory.getStructTypeInfo(names, types);
}
 
Example 22
Source Project: incubator-hivemall   Source File: JsonSerdeUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Nonnull
private static Object parseObject(@Nonnull final JsonParser p,
        @CheckForNull final List<String> columnNames,
        @CheckForNull final List<TypeInfo> columnTypes)
        throws JsonParseException, IOException, SerDeException {
    Preconditions.checkNotNull(columnNames, "columnNames MUST NOT be null in parseObject",
        SerDeException.class);
    Preconditions.checkNotNull(columnTypes, "columnTypes MUST NOT be null in parseObject",
        SerDeException.class);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException(
            "Size of columnNames and columnTypes does not match. #columnNames="
                    + columnNames.size() + ", #columnTypes=" + columnTypes.size());
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    final HCatSchema schema;
    try {
        schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
    } catch (HCatException e) {
        throw new SerDeException(e);
    }

    final List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(), null));
    JsonToken token;
    while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) {
        // iterate through each token, and create appropriate object here.
        populateRecord(r, token, p, schema);
    }

    if (columnTypes.size() == 1) {
        return r.get(0);
    }
    return r;
}
 
Example 23
Source Project: presto   Source File: HiveBucketing.java    License: Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Optional<Set<Integer>> getHiveBuckets(BucketingVersion bucketingVersion, int bucketCount, List<TypeInfo> types, List<List<NullableValue>> values)
{
    long explorationCount;
    try {
        // explorationCount is the number of combinations of discrete values allowed for bucketing columns.
        // After computing the bucket for every combination, we get a complete set of buckets that need to be read.
        explorationCount = values.stream()
                .mapToLong(List::size)
                .reduce(1, Math::multiplyExact);
    }
    catch (ArithmeticException e) {
        return Optional.empty();
    }
    // explorationLimit is the maximum number of combinations for which the bucket numbers will be computed.
    // If the number of combinations highly exceeds the bucket count, then probably all buckets would be hit.
    // In such case, the bucket filter isn't created and all buckets will be read.
    // The threshold is set to bucketCount * BUCKETS_EXPLORATION_LIMIT_FACTOR.
    // The threshold doesn't apply if the number of combinations is low, that is
    // within BUCKETS_EXPLORATION_GUARANTEED_LIMIT.
    long explorationLimit = Math.max(bucketCount * BUCKETS_EXPLORATION_LIMIT_FACTOR, BUCKETS_EXPLORATION_GUARANTEED_LIMIT);
    if (explorationCount > explorationLimit) {
        return Optional.empty();
    }

    Set<Integer> buckets = new HashSet<>();
    for (List<NullableValue> combination : cartesianProduct(values)) {
        buckets.add(getBucketNumber(bucketingVersion.getBucketHashCode(types, combination.stream().map(NullableValue::getValue).toArray()), bucketCount));
        if (buckets.size() >= bucketCount) {
            return Optional.empty();
        }
    }

    return Optional.of(ImmutableSet.copyOf(buckets));
}
 
Example 24
Source Project: nifi   Source File: TestNiFiOrcUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_nested_map() {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("map").type().map().values().map().values().doubleType().noDefault();
    RecordSchema testSchema = AvroTypeUtil.createSchema(builder.endRecord());
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").get().getDataType(), false);
    assertEquals(
            TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(),
                    TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())),
            orcType);
}
 
Example 25
Source Project: presto   Source File: HiveBucketingV2.java    License: Apache License 2.0 5 votes vote down vote up
static int getBucketHashCode(List<TypeInfo> types, Page page, int position)
{
    checkArgument(types.size() == page.getChannelCount());
    int result = 0;
    for (int i = 0; i < page.getChannelCount(); i++) {
        int fieldHash = hash(types.get(i), page.getBlock(i), position);
        result = result * 31 + fieldHash;
    }
    return result;
}
 
Example 26
Source Project: parquet-mr   Source File: ParquetHiveSerDe.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {

  final TypeInfo rowTypeInfo;
  final List<String> columnNames;
  final List<TypeInfo> columnTypes;
  // Get column names and sort order
  final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }
  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }
  if (columnNames.size() != columnTypes.size()) {
    throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
      "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
      columnTypes);
  }
  // Create row related objects
  rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
  this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

  // Stats part
  stats = new SerDeStats();
  serializedSize = 0;
  deserializedSize = 0;
  status = LAST_OPERATION.UNKNOWN;
}
 
Example 27
Source Project: presto   Source File: HiveBucketingV1.java    License: Apache License 2.0 5 votes vote down vote up
static int getBucketHashCode(List<TypeInfo> types, Page page, int position)
{
    checkArgument(types.size() == page.getChannelCount());
    int result = 0;
    for (int i = 0; i < page.getChannelCount(); i++) {
        int fieldHash = hash(types.get(i), page.getBlock(i), position);
        result = result * 31 + fieldHash;
    }
    return result;
}
 
Example 28
Source Project: kite   Source File: TestSchemaConversion.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleRecord() {
  TypeInfo type = HiveSchemaConverter.convert(SIMPLE_RECORD);

  Assert.assertTrue("Record should be converted to struct",
      type instanceof StructTypeInfo);
  Assert.assertEquals("Field names should match",
      Lists.newArrayList("id", "name"),
      ((StructTypeInfo) type).getAllStructFieldNames());
  Assert.assertEquals("Field types should match",
      Lists.newArrayList(
          INT_TYPE_INFO,
          STRING_TYPE_INFO),
      ((StructTypeInfo) type).getAllStructFieldTypeInfos());
}
 
Example 29
Source Project: presto   Source File: HiveBucketingV1.java    License: Apache License 2.0 5 votes vote down vote up
private static int hashOfMap(MapTypeInfo type, Block singleMapBlock)
{
    TypeInfo keyTypeInfo = type.getMapKeyTypeInfo();
    TypeInfo valueTypeInfo = type.getMapValueTypeInfo();
    int result = 0;
    for (int i = 0; i < singleMapBlock.getPositionCount(); i += 2) {
        result += hash(keyTypeInfo, singleMapBlock, i) ^ hash(valueTypeInfo, singleMapBlock, i + 1);
    }
    return result;
}
 
Example 30
Source Project: incubator-hivemall   Source File: MinByUDAF.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] argTypes)
        throws SemanticException {
    if (argTypes.length != 2) {
        throw new UDFArgumentLengthException(
            "Exactly two arguments are expected: " + argTypes.length);
    }
    ObjectInspector yOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(argTypes[1]);
    if (!ObjectInspectorUtils.compareSupported(yOI)) {
        throw new UDFArgumentTypeException(1,
            "Cannot support comparison of map<> type or complex type containing map<>.");
    }
    return new Evaluator();
}