Java Code Examples for org.apache.orc.TypeDescription#createList()

The following examples show how to use org.apache.orc.TypeDescription#createList() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ORCSchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static TypeDescription convert(Integer fieldId, Type type, boolean isRequired) {
  final TypeDescription orcType;

  switch (type.typeId()) {
    case BOOLEAN:
      orcType = TypeDescription.createBoolean();
      break;
    case INTEGER:
      orcType = TypeDescription.createInt();
      break;
    case TIME:
      orcType = TypeDescription.createLong();
      orcType.setAttribute(ICEBERG_LONG_TYPE_ATTRIBUTE, LongType.TIME.toString());
      break;
    case LONG:
      orcType = TypeDescription.createLong();
      orcType.setAttribute(ICEBERG_LONG_TYPE_ATTRIBUTE, LongType.LONG.toString());
      break;
    case FLOAT:
      orcType = TypeDescription.createFloat();
      break;
    case DOUBLE:
      orcType = TypeDescription.createDouble();
      break;
    case DATE:
      orcType = TypeDescription.createDate();
      break;
    case TIMESTAMP:
      Types.TimestampType tsType = (Types.TimestampType) type;
      if (tsType.shouldAdjustToUTC()) {
        orcType = TypeDescription.createTimestampInstant();
      } else {
        orcType = TypeDescription.createTimestamp();
      }
      break;
    case STRING:
      orcType = TypeDescription.createString();
      break;
    case UUID:
      orcType = TypeDescription.createBinary();
      orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.UUID.toString());
      break;
    case FIXED:
      orcType = TypeDescription.createBinary();
      orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.FIXED.toString());
      orcType.setAttribute(ICEBERG_FIELD_LENGTH, Integer.toString(((Types.FixedType) type).length()));
      break;
    case BINARY:
      orcType = TypeDescription.createBinary();
      orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.BINARY.toString());
      break;
    case DECIMAL: {
      Types.DecimalType decimal = (Types.DecimalType) type;
      orcType = TypeDescription.createDecimal()
          .withScale(decimal.scale())
          .withPrecision(decimal.precision());
      break;
    }
    case STRUCT: {
      orcType = TypeDescription.createStruct();
      for (Types.NestedField field : type.asStructType().fields()) {
        TypeDescription childType = convert(field.fieldId(), field.type(), field.isRequired());
        orcType.addField(field.name(), childType);
      }
      break;
    }
    case LIST: {
      Types.ListType list = (Types.ListType) type;
      TypeDescription elementType = convert(list.elementId(), list.elementType(),
          list.isElementRequired());
      orcType = TypeDescription.createList(elementType);
      break;
    }
    case MAP: {
      Types.MapType map = (Types.MapType) type;
      TypeDescription keyType = convert(map.keyId(), map.keyType(), true);
      TypeDescription valueType = convert(map.valueId(), map.valueType(), map.isValueRequired());
      orcType = TypeDescription.createMap(keyType, valueType);
      break;
    }
    default:
      throw new IllegalArgumentException("Unhandled type " + type.typeId());
  }

  // Set Iceberg column attributes for mapping
  orcType.setAttribute(ICEBERG_ID_ATTRIBUTE, String.valueOf(fieldId));
  orcType.setAttribute(ICEBERG_REQUIRED_ATTRIBUTE, String.valueOf(isRequired));
  return orcType;
}
 
Example 2
Source File: TypeConversion.java    From iceberg with Apache License 2.0 4 votes vote down vote up
static TypeDescription toOrc(Integer fieldId,
                             Type type,
                             ColumnIdMap columnIds) {
  TypeDescription result;
  switch (type.typeId()) {
    case BOOLEAN:
      result = TypeDescription.createBoolean();
      break;
    case INTEGER:
      result = TypeDescription.createInt();
      break;
    case LONG:
      result = TypeDescription.createLong();
      break;
    case FLOAT:
      result = TypeDescription.createFloat();
      break;
    case DOUBLE:
      result = TypeDescription.createDouble();
      break;
    case DATE:
      result = TypeDescription.createDate();
      break;
    case TIME:
      result = TypeDescription.createInt();
      break;
    case TIMESTAMP:
      result = TypeDescription.createTimestamp();
      break;
    case STRING:
      result = TypeDescription.createString();
      break;
    case UUID:
      result = TypeDescription.createBinary();
      break;
    case FIXED:
      result = TypeDescription.createBinary();
      break;
    case BINARY:
      result = TypeDescription.createBinary();
      break;
    case DECIMAL: {
      Types.DecimalType decimal = (Types.DecimalType) type;
      result = TypeDescription.createDecimal()
          .withScale(decimal.scale())
          .withPrecision(decimal.precision());
      break;
    }
    case STRUCT: {
      result = TypeDescription.createStruct();
      for(Types.NestedField field: type.asStructType().fields()) {
        result.addField(field.name(), toOrc(field.fieldId(), field.type(), columnIds));
      }
      break;
    }
    case LIST: {
      Types.ListType list = (Types.ListType) type;
      result = TypeDescription.createList(toOrc(list.elementId(), list.elementType(),
          columnIds));
      break;
    }
    case MAP: {
      Types.MapType map = (Types.MapType) type;
      TypeDescription key = toOrc(map.keyId(),map.keyType(), columnIds);
      result = TypeDescription.createMap(key,
          toOrc(map.valueId(), map.valueType(), columnIds));
      break;
    }
    default:
      throw new IllegalArgumentException("Unhandled type " + type.typeId());
  }
  if (fieldId != null) {
    columnIds.put(result, fieldId);
  }
  return result;
}
 
Example 3
Source File: OrcSplitReaderUtil.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * See {@code org.apache.flink.table.catalog.hive.util.HiveTypeUtil}.
 */
public static TypeDescription logicalTypeToOrcType(LogicalType type) {
	type = type.copy(true);
	switch (type.getTypeRoot()) {
		case CHAR:
			return TypeDescription.createChar().withMaxLength(((CharType) type).getLength());
		case VARCHAR:
			int len = ((VarCharType) type).getLength();
			if (len == VarCharType.MAX_LENGTH) {
				return TypeDescription.createString();
			} else {
				return TypeDescription.createVarchar().withMaxLength(len);
			}
		case BOOLEAN:
			return TypeDescription.createBoolean();
		case VARBINARY:
			if (type.equals(DataTypes.BYTES().getLogicalType())) {
				return TypeDescription.createBinary();
			} else {
				throw new UnsupportedOperationException(
						"Not support other binary type: " + type);
			}
		case DECIMAL:
			DecimalType decimalType = (DecimalType) type;
			return TypeDescription.createDecimal()
					.withScale(decimalType.getScale())
					.withPrecision(decimalType.getPrecision());
		case TINYINT:
			return TypeDescription.createByte();
		case SMALLINT:
			return TypeDescription.createShort();
		case INTEGER:
			return TypeDescription.createInt();
		case BIGINT:
			return TypeDescription.createLong();
		case FLOAT:
			return TypeDescription.createFloat();
		case DOUBLE:
			return TypeDescription.createDouble();
		case DATE:
			return TypeDescription.createDate();
		case TIMESTAMP_WITHOUT_TIME_ZONE:
			return TypeDescription.createTimestamp();
		case ARRAY:
			ArrayType arrayType = (ArrayType) type;
			return TypeDescription.createList(logicalTypeToOrcType(arrayType.getElementType()));
		case MAP:
			MapType mapType = (MapType) type;
			return TypeDescription.createMap(
					logicalTypeToOrcType(mapType.getKeyType()),
					logicalTypeToOrcType(mapType.getValueType()));
		case ROW:
			RowType rowType = (RowType) type;
			TypeDescription struct = TypeDescription.createStruct();
			for (int i = 0; i < rowType.getFieldCount(); i++) {
				struct.addField(
						rowType.getFieldNames().get(i),
						logicalTypeToOrcType(rowType.getChildren().get(i)));
			}
			return struct;
		default:
			throw new UnsupportedOperationException("Unsupported type: " + type);
	}
}
 
Example 4
Source File: OrcKeyComparatorTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testComplexRecordArray() throws Exception {
  OrcKeyComparator comparator = new OrcKeyComparator();
  Configuration conf = new Configuration();

  TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
  TypeDescription schema =
      TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", listSchema);

  conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
  Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
  comparator.setConf(conf);

  // base record
  OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
  record0.setFieldValue("a", new IntWritable(1));
  OrcList orcList0 = createOrcList(3, listSchema, 3);
  record0.setFieldValue("b", orcList0);

  // the same as base but different object, expecting equal to each other.
  OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
  record1.setFieldValue("a", new IntWritable(1));
  OrcList orcList1 = createOrcList(3, listSchema, 3);
  record1.setFieldValue("b", orcList1);

  // Diff in int field
  OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
  record2.setFieldValue("a", new IntWritable(2));
  OrcList orcList2 = createOrcList(3, listSchema, 3);
  record2.setFieldValue("b", orcList2);

  // Diff in array field: 1
  OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
  record3.setFieldValue("a", new IntWritable(1));
  OrcList orcList3 = createOrcList(3, listSchema, 5);
  record3.setFieldValue("b", orcList3);

  // Diff in array field: 2
  OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
  record4.setFieldValue("a", new IntWritable(1));
  OrcList orcList4 = createOrcList(4, listSchema, 3);
  record4.setFieldValue("b", orcList4);

  OrcKey orcKey0 = new OrcKey();
  orcKey0.key = record0;
  OrcKey orcKey1 = new OrcKey();
  orcKey1.key = record1;
  OrcKey orcKey2 = new OrcKey();
  orcKey2.key = record2;
  OrcKey orcKey3 = new OrcKey();
  orcKey3.key = record3;
  OrcKey orcKey4 = new OrcKey();
  orcKey4.key = record4;

  Assert.assertTrue(comparator.compare(orcKey0, orcKey1) == 0);
  Assert.assertTrue(comparator.compare(orcKey1, orcKey2) < 0);
  Assert.assertTrue(comparator.compare(orcKey1, orcKey3) < 0);
  Assert.assertTrue(comparator.compare(orcKey1, orcKey4) < 0);
}
 
Example 5
Source File: OrcKeyComparatorTest.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Test
public void testComplexRecordUnion() throws Exception {
  OrcKeyComparator comparator = new OrcKeyComparator();
  Configuration conf = new Configuration();

  TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());

  TypeDescription nestedRecordSchema = TypeDescription.createStruct()
      .addField("x", TypeDescription.createInt())
      .addField("y", TypeDescription.createInt());

  TypeDescription unionSchema = TypeDescription.createUnion()
      .addUnionChild(TypeDescription.createInt())
      .addUnionChild(listSchema)
      .addUnionChild(nestedRecordSchema);

  TypeDescription schema =
      TypeDescription.createStruct()
          .addField("a", TypeDescription.createInt())
          .addField("b", unionSchema);

  conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
  Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
  comparator.setConf(conf);

  // base record
  OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
  record0.setFieldValue("a", new IntWritable(1));
  OrcStruct nestedRecord0 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
  OrcUnion orcUnion0 = createOrcUnion(unionSchema, nestedRecord0);
  record0.setFieldValue("b", orcUnion0);

  // same content as base record in diff objects.
  OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
  record1.setFieldValue("a", new IntWritable(1));
  OrcStruct nestedRecord1 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
  OrcUnion orcUnion1 = createOrcUnion(unionSchema, nestedRecord1);
  record1.setFieldValue("b", orcUnion1);

  // diff records inside union, record0 == record1 < 2
  OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
  record2.setFieldValue("a", new IntWritable(1));
  OrcStruct nestedRecord2 = createSimpleOrcStruct(nestedRecordSchema, 2, 2);
  OrcUnion orcUnion2 = createOrcUnion(unionSchema, nestedRecord2);
  record2.setFieldValue("b", orcUnion2);


  // differ in list inside union, record3 < record4 == record5
  OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
  record3.setFieldValue("a", new IntWritable(1));
  OrcList orcList3 = createOrcList(5, listSchema, 2);
  OrcUnion orcUnion3 = createOrcUnion(unionSchema, orcList3);
  record3.setFieldValue("b", orcUnion3);

  OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
  record4.setFieldValue("a", new IntWritable(1));
  OrcList orcList4 = createOrcList(6, listSchema, 2);
  OrcUnion orcUnion4 = createOrcUnion(unionSchema, orcList4);
  record4.setFieldValue("b", orcUnion4);

  OrcStruct record5 = (OrcStruct) OrcStruct.createValue(schema);
  record5.setFieldValue("a", new IntWritable(1));
  OrcList orcList5 = createOrcList(6, listSchema, 2);
  OrcUnion orcUnion5 = createOrcUnion(unionSchema, orcList5);
  record5.setFieldValue("b", orcUnion5);


  OrcKey orcKey0 = new OrcKey();
  orcKey0.key = record0;
  OrcKey orcKey1 = new OrcKey();
  orcKey1.key = record1;
  OrcKey orcKey2 = new OrcKey();
  orcKey2.key = record2;
  OrcKey orcKey3 = new OrcKey();
  orcKey3.key = record3;
  OrcKey orcKey4 = new OrcKey();
  orcKey4.key = record4;
  OrcKey orcKey5 = new OrcKey();
  orcKey5.key = record5;

  Assert.assertEquals(orcUnion0, orcUnion1);
  // Int value in orcKey2 is larger
  Assert.assertTrue(comparator.compare(orcKey0, orcKey2) < 0);
  Assert.assertTrue(comparator.compare(orcKey3, orcKey4) < 0 );
  Assert.assertTrue(comparator.compare(orcKey3, orcKey5) < 0);
  Assert.assertTrue(comparator.compare(orcKey4, orcKey5) == 0);
}