org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_getOrcField_primitive() {
    // Expected ORC types
    TypeInfo[] expectedTypes = {
            TypeInfoFactory.getPrimitiveTypeInfo("int"),
            TypeInfoFactory.getPrimitiveTypeInfo("bigint"),
            TypeInfoFactory.getPrimitiveTypeInfo("boolean"),
            TypeInfoFactory.getPrimitiveTypeInfo("float"),
            TypeInfoFactory.getPrimitiveTypeInfo("double"),
            TypeInfoFactory.getPrimitiveTypeInfo("binary"),
            TypeInfoFactory.getPrimitiveTypeInfo("string")
    };

    // Build a fake Avro record with all types
    RecordSchema testSchema = buildPrimitiveRecordSchema();
    List<RecordField> fields = testSchema.getFields();
    for (int i = 0; i < fields.size(); i++) {
        assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).getDataType(), false));
    }
}
 
Example #2
Source File: NiFiOrcUtils.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
public static TypeInfo getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type avroType) throws IllegalArgumentException {
    if (avroType == null) {
        throw new IllegalArgumentException("Avro type is null");
    }
    switch (avroType) {
        case INT:
            return TypeInfoFactory.getPrimitiveTypeInfo("int");
        case LONG:
            return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
        case BOOLEAN:
            return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
        case BYTES:
            return TypeInfoFactory.getPrimitiveTypeInfo("binary");
        case DOUBLE:
            return TypeInfoFactory.getPrimitiveTypeInfo("double");
        case FLOAT:
            return TypeInfoFactory.getPrimitiveTypeInfo("float");
        case STRING:
            return TypeInfoFactory.getPrimitiveTypeInfo("string");
        default:
            throw new IllegalArgumentException("Avro type " + avroType.getName() + " is not a primitive type");
    }
}
 
Example #3
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_getOrcField_primitive() throws Exception {
    // Expected ORC types
    TypeInfo[] expectedTypes = {
            TypeInfoFactory.getPrimitiveTypeInfo("int"),
            TypeInfoFactory.getPrimitiveTypeInfo("bigint"),
            TypeInfoFactory.getPrimitiveTypeInfo("boolean"),
            TypeInfoFactory.getPrimitiveTypeInfo("float"),
            TypeInfoFactory.getPrimitiveTypeInfo("double"),
            TypeInfoFactory.getPrimitiveTypeInfo("binary"),
            TypeInfoFactory.getPrimitiveTypeInfo("string")
    };

    // Build a fake Avro record with all types
    Schema testSchema = buildPrimitiveAvroSchema();
    List<Schema.Field> fields = testSchema.getFields();
    for (int i = 0; i < fields.size(); i++) {
        assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema()));
    }

}
 
Example #4
Source File: TestSchemaConversion.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnion() {
  TypeInfo type = HiveSchemaConverter.convert(SchemaBuilder.builder().unionOf()
      .bytesType().and()
      .fixed("fixed").size(12).and()
      .doubleType().and()
      .longType()
      .endUnion());

  Assert.assertEquals("Union should be converted to union",
      TypeInfoFactory.getUnionTypeInfo(Lists.newArrayList(
          BINARY_TYPE_INFO,
          BINARY_TYPE_INFO,
          DOUBLE_TYPE_INFO,
          LONG_TYPE_INFO)),
      type);
}
 
Example #5
Source File: NiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
public static TypeInfo getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type avroType) throws IllegalArgumentException {
    if (avroType == null) {
        throw new IllegalArgumentException("Avro type is null");
    }
    switch (avroType) {
        case INT:
            return TypeInfoFactory.getPrimitiveTypeInfo("int");
        case LONG:
            return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
        case BOOLEAN:
        case NULL: // ORC has no null type, so just pick the smallest. All values are necessarily null.
            return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
        case BYTES:
            return TypeInfoFactory.getPrimitiveTypeInfo("binary");
        case DOUBLE:
            return TypeInfoFactory.getPrimitiveTypeInfo("double");
        case FLOAT:
            return TypeInfoFactory.getPrimitiveTypeInfo("float");
        case STRING:
            return TypeInfoFactory.getPrimitiveTypeInfo("string");
        default:
            throw new IllegalArgumentException("Avro type " + avroType.getName() + " is not a primitive type");
    }
}
 
Example #6
Source File: NiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
public static TypeInfo getPrimitiveOrcTypeFromPrimitiveFieldType(DataType rawDataType) throws IllegalArgumentException {
    if (rawDataType == null) {
        throw new IllegalArgumentException("Avro type is null");
    }
    RecordFieldType fieldType = rawDataType.getFieldType();
    if (RecordFieldType.INT.equals(fieldType)) {
        return TypeInfoFactory.getPrimitiveTypeInfo("int");
    }
    if (RecordFieldType.LONG.equals(fieldType)) {
        return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
    }
    if (RecordFieldType.BOOLEAN.equals(fieldType)) {
        return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
    }
    if (RecordFieldType.DOUBLE.equals(fieldType)) {
        return TypeInfoFactory.getPrimitiveTypeInfo("double");
    }
    if (RecordFieldType.FLOAT.equals(fieldType)) {
        return TypeInfoFactory.getPrimitiveTypeInfo("float");
    }
    if (RecordFieldType.STRING.equals(fieldType)) {
        return TypeInfoFactory.getPrimitiveTypeInfo("string");
    }

    throw new IllegalArgumentException("Field type " + fieldType.name() + " is not a primitive type");
}
 
Example #7
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 6 votes vote down vote up
@Test
public void pushdownTuple() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IndexSearchCondition> sConditions = handler.getSearchConditions(conf);
        assertEquals(sConditions.size(), 1);
        AccumuloPredicateHandler.PushdownTuple tuple = new AccumuloPredicateHandler.PushdownTuple(sConditions.get(0));
        byte [] expectedVal = new byte[4];
        ByteBuffer.wrap(expectedVal).putInt(5);
        assertEquals(tuple.getConstVal(), expectedVal);
        assertEquals(tuple.getcOpt().getClass(), Equal.class);
        assertEquals(tuple.getpCompare().getClass(), IntCompare.class);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
 
Example #8
Source File: TestSchemaConversion.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertSchemaWithComplexRecord() {
  // convertSchema returns a list of FieldSchema objects rather than TypeInfo
  List<FieldSchema> fields = HiveSchemaConverter.convertSchema(COMPLEX_RECORD);

  Assert.assertEquals("Field names should match",
      Lists.newArrayList("groupName", "simpleRecords"),
      Lists.transform(fields, GET_NAMES));
  Assert.assertEquals("Field types should match",
      Lists.newArrayList(
          STRING_TYPE_INFO.toString(),
          TypeInfoFactory.getListTypeInfo(
              TypeInfoFactory.getStructTypeInfo(
                  Lists.newArrayList("id", "name"),
                  Lists.newArrayList(
                      INT_TYPE_INFO,
                      STRING_TYPE_INFO))).toString()),
      Lists.transform(fields, GET_TYPE_STRINGS));
}
 
Example #9
Source File: TryCastUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testList() throws IOException, HiveException {
    // try_cast(array(1.0,2.0,3.0), 'array<string>');
    TryCastUDF udf = new TryCastUDF();

    udf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
            PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
                TypeInfoFactory.stringTypeInfo, new Text("array<string>"))});

    DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject(
        WritableUtils.toWritableList(new double[] {0.1, 1.1, 2.1}))};

    Object result = udf.evaluate(args);

    Assert.assertEquals(WritableUtils.val("0.1", "1.1", "2.1"), result);

    udf.close();
}
 
Example #10
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public TypeInfo visit(VarCharType varCharType) {
	// Flink's StringType is defined as VARCHAR(Integer.MAX_VALUE)
	// We don't have more information in LogicalTypeRoot to distinguish StringType and a VARCHAR(Integer.MAX_VALUE) instance
	// Thus always treat VARCHAR(Integer.MAX_VALUE) as StringType
	if (varCharType.getLength() == Integer.MAX_VALUE) {
		return TypeInfoFactory.stringTypeInfo;
	}
	if (varCharType.getLength() > HiveVarchar.MAX_VARCHAR_LENGTH) {
		throw new CatalogException(
				String.format("HiveCatalog doesn't support varchar type with length of '%d'. " +
							"The maximum length is %d",
							varCharType.getLength(), HiveVarchar.MAX_VARCHAR_LENGTH));
	}
	return TypeInfoFactory.getVarcharTypeInfo(varCharType.getLength());
}
 
Example #11
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_getOrcField_record() {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("Int").type().intType().noDefault();
    builder.name("Long").type().longType().longDefault(1L);
    builder.name("Array").type().array().items().stringType().noDefault();
    RecordSchema testSchema = AvroTypeUtil.createSchema(builder.endRecord());
    // Normalize field names for Hive, assert that their names are now lowercase
    TypeInfo orcType = NiFiOrcUtils.getOrcSchema(testSchema, true);
    assertEquals(
            TypeInfoFactory.getStructTypeInfo(
                    Arrays.asList("int", "long", "array"),
                    Arrays.asList(
                            TypeInfoCreator.createInt(),
                            TypeInfoCreator.createLong(),
                            TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
            orcType);
}
 
Example #12
Source File: BlurObjectInspectorGenerator.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
public BlurObjectInspectorGenerator(Collection<ColumnDefinition> colDefs, BlurColumnNameResolver columnNameResolver)
    throws SerDeException {
  _columnNameResolver = columnNameResolver;
  List<ColumnDefinition> colDefList = new ArrayList<ColumnDefinition>(colDefs);
  Collections.sort(colDefList, COMPARATOR);

  _columnNames.add(ROWID);
  _columnTypes.add(TypeInfoFactory.stringTypeInfo);

  _columnNames.add(RECORDID);
  _columnTypes.add(TypeInfoFactory.stringTypeInfo);

  for (ColumnDefinition columnDefinition : colDefList) {
    String hiveColumnName = _columnNameResolver.fromBlurToHive(columnDefinition.getColumnName());
    _columnNames.add(hiveColumnName);
    _columnTypes.add(getTypeInfo(columnDefinition));
  }
  _objectInspector = createObjectInspector();
}
 
Example #13
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 6 votes vote down vote up
@Test()
public void rangeEqual() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqual(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertTrue(range.contains(new Key(new Text("aaa"))));
        assertTrue(range.afterEndKey(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #14
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * An initialization function used to gather information about the table.
 * Typically, a SerDe implementation will be interested in the list of
 * column names and their types. That information will be used to help perform
 * actual serialization and deserialization of data.
 */
@Override
public void initialize(Configuration conf, Properties tbl)
		throws SerDeException {
	// Get a list of the table's column names.
	String colNamesStr = tbl.getProperty(serdeConstants.LIST_COLUMNS);
	colNames = Arrays.asList(colNamesStr.split(","));

	// Get a list of TypeInfos for the columns. This list lines up with
	// the list of column names.
	String colTypesStr = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
	List<TypeInfo> colTypes =
			TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);

	rowTypeInfo =
			(StructTypeInfo) TypeInfoFactory.getStructTypeInfo(colNames, colTypes);
	rowOI =
			TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(rowTypeInfo);
}
 
Example #15
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public TypeInfo visit(CharType charType) {
	// Flink and Hive have different length limit for CHAR. Promote it to STRING if it exceeds the limits of
	// Hive and we're told not to check precision. This can be useful when calling Hive UDF to process data.
	if (charType.getLength() > HiveChar.MAX_CHAR_LENGTH || charType.getLength() < 1) {
		if (checkPrecision) {
			throw new CatalogException(
					String.format("HiveCatalog doesn't support char type with length of '%d'. " +
									"The supported length is [%d, %d]",
							charType.getLength(), 1, HiveChar.MAX_CHAR_LENGTH));
		} else {
			return TypeInfoFactory.stringTypeInfo;
		}
	}
	return TypeInfoFactory.getCharTypeInfo(charType.getLength());
}
 
Example #16
Source File: HiveTypeUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public TypeInfo visit(VarCharType varCharType) {
	// Flink's StringType is defined as VARCHAR(Integer.MAX_VALUE)
	// We don't have more information in LogicalTypeRoot to distinguish StringType and a VARCHAR(Integer.MAX_VALUE) instance
	// Thus always treat VARCHAR(Integer.MAX_VALUE) as StringType
	if (varCharType.getLength() == Integer.MAX_VALUE) {
		return TypeInfoFactory.stringTypeInfo;
	}
	// Flink and Hive have different length limit for VARCHAR. Promote it to STRING if it exceeds the limits of
	// Hive and we're told not to check precision. This can be useful when calling Hive UDF to process data.
	if (varCharType.getLength() > HiveVarchar.MAX_VARCHAR_LENGTH || varCharType.getLength() < 1) {
		if (checkPrecision) {
			throw new CatalogException(
					String.format("HiveCatalog doesn't support varchar type with length of '%d'. " +
									"The supported length is [%d, %d]",
							varCharType.getLength(), 1, HiveVarchar.MAX_VARCHAR_LENGTH));
		} else {
			return TypeInfoFactory.stringTypeInfo;
		}
	}
	return TypeInfoFactory.getVarcharTypeInfo(varCharType.getLength());
}
 
Example #17
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void test_getOrcField_record() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("int").type().intType().noDefault();
    builder.name("long").type().longType().longDefault(1L);
    builder.name("array").type().array().items().stringType().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema);
    assertEquals(
            TypeInfoFactory.getStructTypeInfo(
                    Arrays.asList("int", "long", "array"),
                    Arrays.asList(
                            TypeInfoCreator.createInt(),
                            TypeInfoCreator.createLong(),
                            TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
            orcType);
}
 
Example #18
Source File: IndexRSerde.java    From indexr with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);

    if (Strings.isEmpty(columnNameProperty)) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (Strings.isEmpty(columnTypeProperty)) {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(StringUtils.repeat("string", ":", columnNames.size()));
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("IndexRHiveSerde initialization failed. Number of column " +
                "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
                columnTypes);
    }

    TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);

    stats = new SerDeStats();
    serdeSize = 0;
}
 
Example #19
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_nested_map() {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("map").type().map().values().map().values().doubleType().noDefault();
    RecordSchema testSchema = AvroTypeUtil.createSchema(builder.endRecord());
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").get().getDataType(), false);
    assertEquals(
            TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(),
                    TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())),
            orcType);
}
 
Example #20
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test()
public void rangeGreaterThan() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children);
    assertNotNull(node);
    String filterExpr = Utilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 1);
        Range range = ranges.iterator().next();
        assertTrue(range.isStartKeyInclusive());
        assertFalse(range.isEndKeyInclusive());
        assertFalse(range.contains(new Key(new Text("aaa"))));
        assertFalse(range.afterEndKey(new Key(new Text("ccccc"))));
        assertTrue(range.contains(new Key(new Text("aab"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aa"))));
        assertTrue(range.beforeStartKey(new Key(new Text("aaa"))));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #21
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static TypeInfo buildPrimitiveOrcSchema() {
    return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
            Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createLong(),
                    TypeInfoCreator.createBoolean(),
                    TypeInfoCreator.createFloat(),
                    TypeInfoCreator.createDouble(),
                    TypeInfoCreator.createBinary(),
                    TypeInfoCreator.createString()));
}
 
Example #22
Source File: PredicateHandlerTest.java    From accumulo-hive-storage-manager with Apache License 2.0 5 votes vote down vote up
@Test
public void multiRange() {
    setup();
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);

    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);


    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);

    String filterExpr = Utilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        Collection<Range> ranges = handler.getRanges(conf);
        assertEquals(ranges.size(), 2);
        Iterator<Range> itr = ranges.iterator();
        Range range1 = itr.next();
        Range range2 = itr.next();
        assertNull(range1.clip(range2, true));
    } catch (Exception e) {
        fail("Error getting search conditions");
    }
}
 
Example #23
Source File: TimeBasedPartitioner.java    From streamx with Apache License 2.0 5 votes vote down vote up
private void addToPartitionFields(String pathFormat, boolean hiveIntegration) {
  if (hiveIntegration && !verifyDateTimeFormat(pathFormat)) {
    throw new ConfigException(HdfsSinkConnectorConfig.PATH_FORMAT_CONFIG, pathFormat,
                              "Path format doesn't meet the requirements for Hive integration, "
                              + "which require prefixing each DateTime component with its name.");
  }
  for (String field: pathFormat.split("/")) {
    String[] parts = field.split("=");
    FieldSchema fieldSchema = new FieldSchema(parts[0].replace("'", ""), TypeInfoFactory.stringTypeInfo.toString(), "");
    partitionFields.add(fieldSchema);
  }
}
 
Example #24
Source File: TestSchemaConversion.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testArray() {
  TypeInfo type = HiveSchemaConverter.convert(SchemaBuilder.array()
      .items().floatType());

  Assert.assertEquals("Array should be converted to list",
      TypeInfoFactory.getListTypeInfo(FLOAT_TYPE_INFO),
      type);
}
 
Example #25
Source File: HiveSchemaConverter.java    From streamx with Apache License 2.0 5 votes vote down vote up
public static TypeInfo convertStruct(Schema schema) {
  final List<Field> fields = schema.fields();
  final List<String> names = new ArrayList<>(fields.size());
  final List<TypeInfo> types = new ArrayList<>(fields.size());
  for (Field field : fields) {
    names.add(field.name());
    types.add(convert(field.schema()));
  }
  return TypeInfoFactory.getStructTypeInfo(names, types);
}
 
Example #26
Source File: CatalogThriftHiveMetastore.java    From metacat with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public PartitionsByExprResult get_partitions_by_expr(final PartitionsByExprRequest req) throws TException {
    return requestWrapper("get_partitions_by_expr", new Object[]{req},
        () -> {
            try {
                String filter = null;
                if (req.getExpr() != null) {
                    filter = Utilities.deserializeExpressionFromKryo(req.getExpr()).getExprString();
                    if (filter == null) {
                        throw new MetaException("Failed to deserialize expression - ExprNodeDesc not present");
                    }
                }
                //TODO: We need to handle the case for 'hasUnknownPartitions'
                return new PartitionsByExprResult(
                    getPartitionsByFilter(req.getDbName(), req.getTblName(), filter, req.getMaxParts()),
                    false);
            } catch (Exception e) {
                //
                // If there is an exception with filtering, fallback to getting all partition names and then
                // apply the filter.
                //
                final List<String> partitionNames = Lists.newArrayList(
                    get_partition_names(req.getDbName(), req.getTblName(), (short) -1));
                final Table table = get_table(req.getDbName(), req.getTblName());
                final List<String> columnNames = new ArrayList<>();
                final List<PrimitiveTypeInfo> typeInfos = new ArrayList<>();
                for (FieldSchema fs : table.getPartitionKeys()) {
                    columnNames.add(fs.getName());
                    typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
                }
                final boolean hasUnknownPartitions = new PartitionExpressionForMetastore().filterPartitionsByExpr(
                    columnNames, typeInfos, req.getExpr(), req.getDefaultPartitionName(), partitionNames);

                return new PartitionsByExprResult(get_partitions_by_names(
                    req.getDbName(), req.getTblName(), partitionNames), hasUnknownPartitions);
            }
        });
}
 
Example #27
Source File: TestObjectInspector.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that after copying a lazy double object, calling materialize on the original and the
 * copy doesn't advance the tree reader twice
 * @throws Exception
 */
@Test
public void TestCopyDouble() throws Exception {
  ReaderWriterProfiler.setProfilerOptions(null);
  OrcLazyDouble lazyDouble = new OrcLazyDouble(new LazyDoubleTreeReader(0, 0) {
    int nextCalls = 0;

    @Override
    public Object next(Object previous) throws IOException {
      if (nextCalls == 0) {
        return new DoubleWritable(1.0);
      }

      throw new IOException("next should only be called once");
    }

    @Override
    protected boolean seekToRow(long currentRow) throws IOException {
      return true;
    }
  });

  DoubleObjectInspector doubleOI = (DoubleObjectInspector)
      OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.doubleTypeInfo);

  OrcLazyDouble lazyDouble2 = (OrcLazyDouble) doubleOI.copyObject(lazyDouble);

  Assert.assertEquals(1.0, ((DoubleWritable) lazyDouble.materialize()).get());
  Assert.assertEquals(1.0, ((DoubleWritable) lazyDouble2.materialize()).get());
}
 
Example #28
Source File: TestObjectInspector.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that after copying a lazy binary object, calling materialize on the original and the
 * copy doesn't advance the tree reader twice
 * @throws Exception
 */
@Test
public void TestCopyBinary() throws Exception {
  ReaderWriterProfiler.setProfilerOptions(null);
  OrcLazyBinary lazyBinary = new OrcLazyBinary(new LazyBinaryTreeReader(0, 0) {
    int nextCalls = 0;

    @Override
    public Object next(Object previous) throws IOException {
      if (nextCalls == 0) {
        nextCalls++;
        return new BytesWritable("a".getBytes());
      }

      throw new IOException("next should only be called once");
    }

    @Override
    protected boolean seekToRow(long currentRow) throws IOException {
      return true;
    }
  });

  BinaryObjectInspector binaryOI = (BinaryObjectInspector)
      OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.binaryTypeInfo);

  OrcLazyBinary lazyBinary2 = (OrcLazyBinary) binaryOI.copyObject(lazyBinary);

  Assert.assertEquals("a", new String(((BytesWritable) lazyBinary.materialize()).getBytes()));
  Assert.assertEquals("a", new String(((BytesWritable) lazyBinary2.materialize()).getBytes()));
}
 
Example #29
Source File: NiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException {
    List<RecordField> recordFields = recordSchema.getFields();
    if (recordFields != null) {
        List<String> orcFieldNames = new ArrayList<>(recordFields.size());
        List<TypeInfo> orcFields = new ArrayList<>(recordFields.size());
        recordFields.forEach(recordField -> {
            String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName();
            orcFieldNames.add(fieldName);
            orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames));
        });
        return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields);
    }
    return null;
}
 
Example #30
Source File: TestObjectInspector.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that after copying a lazy boolean object, calling materialize on the original and the
 * copy doesn't advance the tree reader twice
 * @throws Exception
 */
@Test
public void TestCopyBoolean() throws Exception {
  ReaderWriterProfiler.setProfilerOptions(null);
  OrcLazyBoolean lazyBoolean = new OrcLazyBoolean(new LazyBooleanTreeReader(0, 0) {
    int nextCalls = 0;

    @Override
    public Object next(Object previous) throws IOException {
      if (nextCalls == 0) {
        return new BooleanWritable(true);
      }

      throw new IOException("next should only be called once");
    }

    @Override
    protected boolean seekToRow(long currentRow) throws IOException {
      return true;
    }
  });

  BooleanObjectInspector booleanOI = (BooleanObjectInspector)
      OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.booleanTypeInfo);

  OrcLazyBoolean lazyBoolean2 = (OrcLazyBoolean) booleanOI.copyObject(lazyBoolean);

  Assert.assertEquals(true, ((BooleanWritable) lazyBoolean.materialize()).get());
  Assert.assertEquals(true, ((BooleanWritable) lazyBoolean2.materialize()).get());
}