Java Code Examples for org.apache.orc.TypeDescription#getCategory()

The following examples show how to use org.apache.orc.TypeDescription#getCategory() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrcSchemaWithTypeVisitor.java    From iceberg with Apache License 2.0 6 votes vote down vote up
public static <T> T visit(Type iType, TypeDescription schema, OrcSchemaWithTypeVisitor<T> visitor) {
  switch (schema.getCategory()) {
    case STRUCT:
      return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);

    case UNION:
      throw new UnsupportedOperationException("Cannot handle " + schema);

    case LIST:
      Types.ListType list = iType != null ? iType.asListType() : null;
      return visitor.list(
          list, schema,
          visit(list.elementType(), schema.getChildren().get(0), visitor));

    case MAP:
      Types.MapType map = iType != null ? iType.asMapType() : null;
      return visitor.map(
          map, schema,
          visit(map != null ? map.keyType() : null, schema.getChildren().get(0), visitor),
          visit(map != null ? map.valueType() : null, schema.getChildren().get(1), visitor));

    default:
      return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
  }
}
 
Example 2
Source File: VectorColumnFiller.java    From secor with Apache License 2.0 5 votes vote down vote up
public static JsonConverter createConverter(TypeDescription schema) {
    switch (schema.getCategory()) {
    case BYTE:
    case SHORT:
    case INT:
    case LONG:
        return new LongColumnConverter();
    case FLOAT:
    case DOUBLE:
        return new DoubleColumnConverter();
    case CHAR:
    case VARCHAR:
    case STRING:
        return new StringColumnConverter();
    case DECIMAL:
        return new DecimalColumnConverter();
    case TIMESTAMP:
        return new TimestampColumnConverter();
    case BINARY:
        return new BinaryColumnConverter();
    case BOOLEAN:
        return new BooleanColumnConverter();
    case STRUCT:
        return new StructColumnConverter(schema);
    case LIST:
        return new ListColumnConverter(schema);
    case MAP:
        return new MapColumnConverter(schema);
    case UNION:
        return new UnionColumnConverter(schema);
    default:
        throw new IllegalArgumentException("Unhandled type " + schema);
    }
}
 
Example 3
Source File: JsonFieldFiller.java    From secor with Apache License 2.0 5 votes vote down vote up
public static void processRow(JSONWriter writer, VectorizedRowBatch batch,
        TypeDescription schema, int row) throws JSONException {
    if (schema.getCategory() == TypeDescription.Category.STRUCT) {
        List<TypeDescription> fieldTypes = schema.getChildren();
        List<String> fieldNames = schema.getFieldNames();
        writer.object();
        for (int c = 0; c < batch.cols.length; ++c) {
            writer.key(fieldNames.get(c));
            setValue(writer, batch.cols[c], fieldTypes.get(c), row);
        }
        writer.endObject();
    } else {
        setValue(writer, batch.cols[0], schema, row);
    }
}
 
Example 4
Source File: SparkOrcWriter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Converter buildConverter(TypeDescription schema) {
  switch (schema.getCategory()) {
    case BOOLEAN:
      return new BooleanConverter();
    case BYTE:
      return new ByteConverter();
    case SHORT:
      return new ShortConverter();
    case DATE:
    case INT:
      return new IntConverter();
    case LONG:
      return new LongConverter();
    case FLOAT:
      return new FloatConverter();
    case DOUBLE:
      return new DoubleConverter();
    case BINARY:
      return new BytesConverter();
    case STRING:
    case CHAR:
    case VARCHAR:
      return new StringConverter();
    case DECIMAL:
      return schema.getPrecision() <= 18
          ? new Decimal18Converter(schema)
          : new Decimal38Converter(schema);
    case TIMESTAMP:
      return new TimestampConverter();
    case STRUCT:
      return new StructConverter(schema);
    case LIST:
      return new ListConverter(schema);
    case MAP:
      return new MapConverter(schema);
  }
  throw new IllegalArgumentException("Unhandled type " + schema);
}
 
Example 5
Source File: ORCSchemaUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Map<Integer, OrcField> icebergToOrcMapping(String name, TypeDescription orcType) {
  Map<Integer, OrcField> icebergToOrc = Maps.newHashMap();
  switch (orcType.getCategory()) {
    case STRUCT:
      List<String> childrenNames = orcType.getFieldNames();
      List<TypeDescription> children = orcType.getChildren();
      for (int i = 0; i < children.size(); i++) {
        icebergToOrc.putAll(icebergToOrcMapping(childrenNames.get(i), children.get(i)));
      }
      break;
    case LIST:
      icebergToOrc.putAll(icebergToOrcMapping("element", orcType.getChildren().get(0)));
      break;
    case MAP:
      icebergToOrc.putAll(icebergToOrcMapping("key", orcType.getChildren().get(0)));
      icebergToOrc.putAll(icebergToOrcMapping("value", orcType.getChildren().get(1)));
      break;
  }

  if (orcType.getId() > 0) {
    // Only add to non-root types.
    icebergID(orcType)
        .ifPresent(integer -> icebergToOrc.put(integer, new OrcField(name, orcType)));
  }

  return icebergToOrc;
}
 
Example 6
Source File: SparkOrcReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
static int getArrayElementSize(TypeDescription type) {
  switch (type.getCategory()) {
    case BOOLEAN:
    case BYTE:
      return 1;
    case SHORT:
      return 2;
    case INT:
    case FLOAT:
      return 4;
    default:
      return 8;
  }
}
 
Example 7
Source File: GenericOrcWriter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Converter[] buildConverters(TypeDescription schema) {
  if (schema.getCategory() != TypeDescription.Category.STRUCT) {
    throw new IllegalArgumentException("Top level must be a struct " + schema);
  }

  List<TypeDescription> children = schema.getChildren();
  Converter[] result = new Converter[children.size()];
  for (int c = 0; c < children.size(); ++c) {
    result[c] = buildConverter(children.get(c));
  }
  return result;
}
 
Example 8
Source File: SparkOrcWriter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Converter buildConverter(TypeDescription schema) {
  switch (schema.getCategory()) {
    case BOOLEAN:
      return new BooleanConverter();
    case BYTE:
      return new ByteConverter();
    case SHORT:
      return new ShortConverter();
    case DATE:
    case INT:
      return new IntConverter();
    case LONG:
      return new LongConverter();
    case FLOAT:
      return new FloatConverter();
    case DOUBLE:
      return new DoubleConverter();
    case BINARY:
      return new BytesConverter();
    case STRING:
    case CHAR:
    case VARCHAR:
      return new StringConverter();
    case DECIMAL:
      return schema.getPrecision() <= 18 ?
          new Decimal18Converter(schema) :
          new Decimal38Converter(schema);
    case TIMESTAMP_INSTANT:
      return new TimestampTzConverter();
    case STRUCT:
      return new StructConverter(schema);
    case LIST:
      return new ListConverter(schema);
    case MAP:
      return new MapConverter(schema);
  }
  throw new IllegalArgumentException("Unhandled type " + schema);
}
 
Example 9
Source File: SparkOrcWriter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Converter[] buildConverters(TypeDescription schema) {
  if (schema.getCategory() != TypeDescription.Category.STRUCT) {
    throw new IllegalArgumentException("Top level must be a struct " + schema);
  }
  List<TypeDescription> children = schema.getChildren();
  Converter[] result = new Converter[children.size()];
  for (int c = 0; c < children.size(); ++c) {
    result[c] = buildConverter(children.get(c));
  }
  return result;
}
 
Example 10
Source File: OrcBatchReader.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Converts an ORC schema to a Flink TypeInformation.
 *
 * @param schema The ORC schema.
 * @return The TypeInformation that corresponds to the ORC schema.
 */
static TypeInformation schemaToTypeInfo(TypeDescription schema) {
	switch (schema.getCategory()) {
		case BOOLEAN:
			return BasicTypeInfo.BOOLEAN_TYPE_INFO;
		case BYTE:
			return BasicTypeInfo.BYTE_TYPE_INFO;
		case SHORT:
			return BasicTypeInfo.SHORT_TYPE_INFO;
		case INT:
			return BasicTypeInfo.INT_TYPE_INFO;
		case LONG:
			return BasicTypeInfo.LONG_TYPE_INFO;
		case FLOAT:
			return BasicTypeInfo.FLOAT_TYPE_INFO;
		case DOUBLE:
			return BasicTypeInfo.DOUBLE_TYPE_INFO;
		case DECIMAL:
			return BasicTypeInfo.BIG_DEC_TYPE_INFO;
		case STRING:
		case CHAR:
		case VARCHAR:
			return BasicTypeInfo.STRING_TYPE_INFO;
		case DATE:
			return SqlTimeTypeInfo.DATE;
		case TIMESTAMP:
			return SqlTimeTypeInfo.TIMESTAMP;
		case BINARY:
			return PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO;
		case STRUCT:
			List<TypeDescription> fieldSchemas = schema.getChildren();
			TypeInformation[] fieldTypes = new TypeInformation[fieldSchemas.size()];
			for (int i = 0; i < fieldSchemas.size(); i++) {
				fieldTypes[i] = schemaToTypeInfo(fieldSchemas.get(i));
			}
			String[] fieldNames = schema.getFieldNames().toArray(new String[]{});
			return new RowTypeInfo(fieldTypes, fieldNames);
		case LIST:
			TypeDescription elementSchema = schema.getChildren().get(0);
			TypeInformation<?> elementType = schemaToTypeInfo(elementSchema);
			// arrays of primitive types are handled as object arrays to support null values
			return ObjectArrayTypeInfo.getInfoFor(elementType);
		case MAP:
			TypeDescription keySchema = schema.getChildren().get(0);
			TypeDescription valSchema = schema.getChildren().get(1);
			TypeInformation<?> keyType = schemaToTypeInfo(keySchema);
			TypeInformation<?> valType = schemaToTypeInfo(valSchema);
			return new MapTypeInfo<>(keyType, valType);
		case UNION:
			throw new UnsupportedOperationException("UNION type is not supported yet.");
		default:
			throw new IllegalArgumentException("Unknown type " + schema);
	}
}
 
Example 11
Source File: OrcBatchReader.java    From flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private static Function<Object, Object> getCopyFunction(TypeDescription schema) {
	// check the type of the vector to decide how to read it.
	switch (schema.getCategory()) {
		case BOOLEAN:
		case BYTE:
		case SHORT:
		case INT:
		case LONG:
		case FLOAT:
		case DOUBLE:
		case CHAR:
		case VARCHAR:
		case STRING:
		case DECIMAL:
			return OrcBatchReader::returnImmutable;
		case DATE:
			return OrcBatchReader::copyDate;
		case TIMESTAMP:
			return OrcBatchReader::copyTimestamp;
		case BINARY:
			return OrcBatchReader::copyBinary;
		case STRUCT:
			List<TypeDescription> fieldTypes = schema.getChildren();
			Function<Object, Object>[] copyFields = new Function[fieldTypes.size()];
			for (int i = 0; i < fieldTypes.size(); i++) {
				copyFields[i] = getCopyFunction(fieldTypes.get(i));
			}
			return new CopyStruct(copyFields);
		case LIST:
			TypeDescription entryType = schema.getChildren().get(0);
			Function<Object, Object> copyEntry = getCopyFunction(entryType);
			Class entryClass = getClassForType(entryType);
			return new CopyList(copyEntry, entryClass);
		case MAP:
			TypeDescription keyType = schema.getChildren().get(0);
			TypeDescription valueType = schema.getChildren().get(1);
			Function<Object, Object> copyKey = getCopyFunction(keyType);
			Function<Object, Object> copyValue = getCopyFunction(valueType);
			return new CopyMap(copyKey, copyValue);
		case UNION:
			throw new UnsupportedOperationException("UNION type not supported yet");
		default:
			throw new IllegalArgumentException("Unknown type " + schema);
	}
}
 
Example 12
Source File: OrcBatchReader.java    From flink with Apache License 2.0 4 votes vote down vote up
private static Class<?> getClassForType(TypeDescription schema) {

		// check the type of the vector to decide how to read it.
		switch (schema.getCategory()) {
			case BOOLEAN:
				return Boolean.class;
			case BYTE:
				return Byte.class;
			case SHORT:
				return Short.class;
			case INT:
				return Integer.class;
			case LONG:
				return Long.class;
			case FLOAT:
				return Float.class;
			case DOUBLE:
				return Double.class;
			case CHAR:
			case VARCHAR:
			case STRING:
				return String.class;
			case DATE:
				return Date.class;
			case TIMESTAMP:
				return Timestamp.class;
			case BINARY:
				return byte[].class;
			case DECIMAL:
				return BigDecimal.class;
			case STRUCT:
				return Row.class;
			case LIST:
				Class<?> childClass = getClassForType(schema.getChildren().get(0));
				return Array.newInstance(childClass, 0).getClass();
			case MAP:
				return HashMap.class;
			case UNION:
				throw new UnsupportedOperationException("UNION type not supported yet");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}
 
Example 13
Source File: SparkOrcReader.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static void printRow(SpecializedGetters row, int ord, TypeDescription schema) {
  switch (schema.getCategory()) {
    case BOOLEAN:
      System.out.print(row.getBoolean(ord));
      break;
    case BYTE:
      System.out.print(row.getByte(ord));
      break;
    case SHORT:
      System.out.print(row.getShort(ord));
      break;
    case INT:
      System.out.print(row.getInt(ord));
      break;
    case LONG:
      System.out.print(row.getLong(ord));
      break;
    case FLOAT:
      System.out.print(row.getFloat(ord));
      break;
    case DOUBLE:
      System.out.print(row.getDouble(ord));
      break;
    case CHAR:
    case VARCHAR:
    case STRING:
      System.out.print("\"" + row.getUTF8String(ord) + "\"");
      break;
    case BINARY: {
      byte[] bin = row.getBinary(ord);
      if (bin == null) {
        System.out.print("null");
      } else {
        System.out.print("[");
        for (int i = 0; i < bin.length; ++i) {
          if (i != 0) {
            System.out.print(", ");
          }
          int v = bin[i] & 0xff;
          if (v < 16) {
            System.out.print("0" + Integer.toHexString(v));
          } else {
            System.out.print(Integer.toHexString(v));
          }
        }
        System.out.print("]");
      }
      break;
    }
    case DECIMAL:
      System.out.print(row.getDecimal(ord, schema.getPrecision(), schema.getScale()));
      break;
    case DATE:
      System.out.print("\"" + new DateWritable(row.getInt(ord)) + "\"");
      break;
    case TIMESTAMP:
      System.out.print("\"" + new Timestamp(row.getLong(ord)) + "\"");
      break;
    case STRUCT:
      printRow(row.getStruct(ord, schema.getChildren().size()), schema);
      break;
    case LIST: {
      TypeDescription child = schema.getChildren().get(0);
      System.out.print("[");
      ArrayData list = row.getArray(ord);
      for(int e=0; e < list.numElements(); ++e) {
        if (e != 0) {
          System.out.print(", ");
        }
        printRow(list, e, child);
      }
      System.out.print("]");
      break;
    }
    case MAP: {
      TypeDescription keyType = schema.getChildren().get(0);
      TypeDescription valueType = schema.getChildren().get(1);
      MapData map = row.getMap(ord);
      ArrayData keys = map.keyArray();
      ArrayData values = map.valueArray();
      System.out.print("[");
      for(int e=0; e < map.numElements(); ++e) {
        if (e != 0) {
          System.out.print(", ");
        }
        printRow(keys, e, keyType);
        System.out.print(": ");
        printRow(values, e, valueType);
      }
      System.out.print("]");
      break;
    }
    default:
      throw new IllegalArgumentException("Unhandled type " + schema);
  }
}
 
Example 14
Source File: TypeConversion.java    From iceberg with Apache License 2.0 4 votes vote down vote up
Type convertOrcToType(TypeDescription schema, ColumnIdMap columnIds) {
  switch (schema.getCategory()) {
    case BOOLEAN:
      return Types.BooleanType.get();
    case BYTE:
    case SHORT:
    case INT:
      return Types.IntegerType.get();
    case LONG:
      return Types.LongType.get();
    case FLOAT:
      return Types.FloatType.get();
    case DOUBLE:
      return Types.DoubleType.get();
    case STRING:
    case CHAR:
    case VARCHAR:
      return Types.StringType.get();
    case BINARY:
      return Types.BinaryType.get();
    case DATE:
      return Types.DateType.get();
    case TIMESTAMP:
      return Types.TimestampType.withoutZone();
    case DECIMAL:
      return Types.DecimalType.of(schema.getPrecision(), schema.getScale());
    case STRUCT: {
      List<String> fieldNames = schema.getFieldNames();
      List<TypeDescription> fieldTypes = schema.getChildren();
      List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
      for (int c=0; c < fieldNames.size(); ++c) {
        String name = fieldNames.get(c);
        TypeDescription type = fieldTypes.get(c);
        fields.add(Types.NestedField.optional(columnIds.get(type), name,
            convertOrcToType(type, columnIds)));
      }
      return Types.StructType.of(fields);
    }
    case LIST: {
      TypeDescription child = schema.getChildren().get(0);
      return Types.ListType.ofOptional(columnIds.get(child),
          convertOrcToType(child, columnIds));
    }
    case MAP: {
      TypeDescription key = schema.getChildren().get(0);
      TypeDescription value = schema.getChildren().get(1);
      return Types.MapType.ofOptional(columnIds.get(key), columnIds.get(value),
          convertOrcToType(key, columnIds), convertOrcToType(value, columnIds));
    }
    default:
      // We don't have an answer for union types.
      throw new IllegalArgumentException("Can't handle " + schema);
  }
}
 
Example 15
Source File: GenericOrcWriter.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static Converter buildConverter(TypeDescription schema) {
  switch (schema.getCategory()) {
    case BOOLEAN:
      return new BooleanConverter();
    case BYTE:
      return new ByteConverter();
    case SHORT:
      return new ShortConverter();
    case DATE:
      return new DateConverter();
    case INT:
      return new IntConverter();
    case LONG:
      String longAttributeValue = schema.getAttributeValue(ORCSchemaUtil.ICEBERG_LONG_TYPE_ATTRIBUTE);
      ORCSchemaUtil.LongType longType = longAttributeValue == null ? ORCSchemaUtil.LongType.LONG :
          ORCSchemaUtil.LongType.valueOf(longAttributeValue);
      switch (longType) {
        case TIME:
          return new TimeConverter();
        case LONG:
          return new LongConverter();
        default:
          throw new IllegalStateException("Unhandled Long type found in ORC type attribute: " + longType);
      }
    case FLOAT:
      return new FloatConverter();
    case DOUBLE:
      return new DoubleConverter();
    case BINARY:
      String binaryAttributeValue = schema.getAttributeValue(ORCSchemaUtil.ICEBERG_BINARY_TYPE_ATTRIBUTE);
      ORCSchemaUtil.BinaryType binaryType = binaryAttributeValue == null ? ORCSchemaUtil.BinaryType.BINARY :
          ORCSchemaUtil.BinaryType.valueOf(binaryAttributeValue);
      switch (binaryType) {
        case UUID:
          return new UUIDConverter();
        case FIXED:
          return new FixedConverter();
        case BINARY:
          return new BytesConverter();
        default:
          throw new IllegalStateException("Unhandled Binary type found in ORC type attribute: " + binaryType);
      }
    case STRING:
    case CHAR:
    case VARCHAR:
      return new StringConverter();
    case DECIMAL:
      return schema.getPrecision() <= 18 ? new Decimal18Converter(schema) : new Decimal38Converter(schema);
    case TIMESTAMP:
      return new TimestampConverter();
    case TIMESTAMP_INSTANT:
      return new TimestampTzConverter();
    case STRUCT:
      return new StructConverter(schema);
    case LIST:
      return new ListConverter(schema);
    case MAP:
      return new MapConverter(schema);
  }
  throw new IllegalArgumentException("Unhandled type " + schema);
}
 
Example 16
Source File: ORCSchemaUtil.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private static Types.NestedField convertOrcToIceberg(TypeDescription orcType, String name,
                                                     TypeUtil.NextID nextID) {

  final int icebergID = icebergID(orcType).orElseGet(nextID::get);
  final boolean isRequired = isRequired(orcType);

  switch (orcType.getCategory()) {
    case BOOLEAN:
      return getIcebergType(icebergID, name, Types.BooleanType.get(), isRequired);
    case BYTE:
    case SHORT:
    case INT:
      return getIcebergType(icebergID, name, Types.IntegerType.get(), isRequired);
    case LONG:
      String longAttributeValue = orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
      LongType longType = longAttributeValue == null ? LongType.LONG : LongType.valueOf(longAttributeValue);
      switch (longType) {
        case TIME:
          return getIcebergType(icebergID, name, Types.TimeType.get(), isRequired);
        case LONG:
          return getIcebergType(icebergID, name, Types.LongType.get(), isRequired);
        default:
          throw new IllegalStateException("Invalid Long type found in ORC type attribute");
      }
    case FLOAT:
      return getIcebergType(icebergID, name, Types.FloatType.get(), isRequired);
    case DOUBLE:
      return getIcebergType(icebergID, name, Types.DoubleType.get(), isRequired);
    case STRING:
    case CHAR:
    case VARCHAR:
      return getIcebergType(icebergID, name, Types.StringType.get(), isRequired);
    case BINARY:
      String binaryAttributeValue = orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
      BinaryType binaryType = binaryAttributeValue == null ? BinaryType.BINARY :
          BinaryType.valueOf(binaryAttributeValue);
      switch (binaryType) {
        case UUID:
          return getIcebergType(icebergID, name, Types.UUIDType.get(), isRequired);
        case FIXED:
          int fixedLength = Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
          return getIcebergType(icebergID, name, Types.FixedType.ofLength(fixedLength), isRequired);
        case BINARY:
          return getIcebergType(icebergID, name, Types.BinaryType.get(), isRequired);
        default:
          throw new IllegalStateException("Invalid Binary type found in ORC type attribute");
      }
    case DATE:
      return getIcebergType(icebergID, name, Types.DateType.get(), isRequired);
    case TIMESTAMP:
      return getIcebergType(icebergID, name, Types.TimestampType.withoutZone(), isRequired);
    case TIMESTAMP_INSTANT:
      return getIcebergType(icebergID, name, Types.TimestampType.withZone(), isRequired);
    case DECIMAL:
      return getIcebergType(icebergID, name,
          Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
          isRequired);
    case STRUCT: {
      List<String> fieldNames = orcType.getFieldNames();
      List<TypeDescription> fieldTypes = orcType.getChildren();
      List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
      for (int c = 0; c < fieldNames.size(); ++c) {
        String childName = fieldNames.get(c);
        TypeDescription type = fieldTypes.get(c);
        Types.NestedField field = convertOrcToIceberg(type, childName, nextID);
        fields.add(field);
      }

      return getIcebergType(icebergID, name, Types.StructType.of(fields), isRequired);
    }
    case LIST: {
      TypeDescription elementType = orcType.getChildren().get(0);
      Types.NestedField element = convertOrcToIceberg(elementType, "element", nextID);

      Types.ListType listTypeWithElem = isRequired(elementType) ?
          Types.ListType.ofRequired(element.fieldId(), element.type()) :
          Types.ListType.ofOptional(element.fieldId(), element.type());
      return isRequired ?
          Types.NestedField.required(icebergID, name, listTypeWithElem) :
          Types.NestedField.optional(icebergID, name, listTypeWithElem);
    }
    case MAP: {
      TypeDescription keyType = orcType.getChildren().get(0);
      Types.NestedField key = convertOrcToIceberg(keyType, "key", nextID);
      TypeDescription valueType = orcType.getChildren().get(1);
      Types.NestedField value = convertOrcToIceberg(valueType, "value", nextID);

      Types.MapType mapTypeWithKV = isRequired(valueType) ?
          Types.MapType.ofRequired(key.fieldId(), value.fieldId(), key.type(), value.type()) :
          Types.MapType.ofOptional(key.fieldId(), value.fieldId(), key.type(), value.type());

      return getIcebergType(icebergID, name, mapTypeWithKV, isRequired);
    }
    default:
      // We don't have an answer for union types.
      throw new IllegalArgumentException("Can't handle " + orcType);
  }
}
 
Example 17
Source File: OrcBatchReader.java    From flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private static Function<Object, Object> getCopyFunction(TypeDescription schema) {
	// check the type of the vector to decide how to read it.
	switch (schema.getCategory()) {
		case BOOLEAN:
		case BYTE:
		case SHORT:
		case INT:
		case LONG:
		case FLOAT:
		case DOUBLE:
		case CHAR:
		case VARCHAR:
		case STRING:
		case DECIMAL:
			return OrcBatchReader::returnImmutable;
		case DATE:
			return OrcBatchReader::copyDate;
		case TIMESTAMP:
			return OrcBatchReader::copyTimestamp;
		case BINARY:
			return OrcBatchReader::copyBinary;
		case STRUCT:
			List<TypeDescription> fieldTypes = schema.getChildren();
			Function<Object, Object>[] copyFields = new Function[fieldTypes.size()];
			for (int i = 0; i < fieldTypes.size(); i++) {
				copyFields[i] = getCopyFunction(fieldTypes.get(i));
			}
			return new CopyStruct(copyFields);
		case LIST:
			TypeDescription entryType = schema.getChildren().get(0);
			Function<Object, Object> copyEntry = getCopyFunction(entryType);
			Class entryClass = getClassForType(entryType);
			return new CopyList(copyEntry, entryClass);
		case MAP:
			TypeDescription keyType = schema.getChildren().get(0);
			TypeDescription valueType = schema.getChildren().get(1);
			Function<Object, Object> copyKey = getCopyFunction(keyType);
			Function<Object, Object> copyValue = getCopyFunction(valueType);
			return new CopyMap(copyKey, copyValue);
		case UNION:
			throw new UnsupportedOperationException("UNION type not supported yet");
		default:
			throw new IllegalArgumentException("Unknown type " + schema);
	}
}
 
Example 18
Source File: OrcBatchReader.java    From flink with Apache License 2.0 4 votes vote down vote up
private static Class<?> getClassForType(TypeDescription schema) {

		// check the type of the vector to decide how to read it.
		switch (schema.getCategory()) {
			case BOOLEAN:
				return Boolean.class;
			case BYTE:
				return Byte.class;
			case SHORT:
				return Short.class;
			case INT:
				return Integer.class;
			case LONG:
				return Long.class;
			case FLOAT:
				return Float.class;
			case DOUBLE:
				return Double.class;
			case CHAR:
			case VARCHAR:
			case STRING:
				return String.class;
			case DATE:
				return Date.class;
			case TIMESTAMP:
				return Timestamp.class;
			case BINARY:
				return byte[].class;
			case DECIMAL:
				return BigDecimal.class;
			case STRUCT:
				return Row.class;
			case LIST:
				Class<?> childClass = getClassForType(schema.getChildren().get(0));
				return Array.newInstance(childClass, 0).getClass();
			case MAP:
				return HashMap.class;
			case UNION:
				throw new UnsupportedOperationException("UNION type not supported yet");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}
 
Example 19
Source File: OrcBatchReader.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Converts an ORC schema to a Flink TypeInformation.
 *
 * @param schema The ORC schema.
 * @return The TypeInformation that corresponds to the ORC schema.
 */
static TypeInformation schemaToTypeInfo(TypeDescription schema) {
	switch (schema.getCategory()) {
		case BOOLEAN:
			return BasicTypeInfo.BOOLEAN_TYPE_INFO;
		case BYTE:
			return BasicTypeInfo.BYTE_TYPE_INFO;
		case SHORT:
			return BasicTypeInfo.SHORT_TYPE_INFO;
		case INT:
			return BasicTypeInfo.INT_TYPE_INFO;
		case LONG:
			return BasicTypeInfo.LONG_TYPE_INFO;
		case FLOAT:
			return BasicTypeInfo.FLOAT_TYPE_INFO;
		case DOUBLE:
			return BasicTypeInfo.DOUBLE_TYPE_INFO;
		case DECIMAL:
			return BasicTypeInfo.BIG_DEC_TYPE_INFO;
		case STRING:
		case CHAR:
		case VARCHAR:
			return BasicTypeInfo.STRING_TYPE_INFO;
		case DATE:
			return SqlTimeTypeInfo.DATE;
		case TIMESTAMP:
			return SqlTimeTypeInfo.TIMESTAMP;
		case BINARY:
			return PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO;
		case STRUCT:
			List<TypeDescription> fieldSchemas = schema.getChildren();
			TypeInformation[] fieldTypes = new TypeInformation[fieldSchemas.size()];
			for (int i = 0; i < fieldSchemas.size(); i++) {
				fieldTypes[i] = schemaToTypeInfo(fieldSchemas.get(i));
			}
			String[] fieldNames = schema.getFieldNames().toArray(new String[]{});
			return new RowTypeInfo(fieldTypes, fieldNames);
		case LIST:
			TypeDescription elementSchema = schema.getChildren().get(0);
			TypeInformation<?> elementType = schemaToTypeInfo(elementSchema);
			// arrays of primitive types are handled as object arrays to support null values
			return ObjectArrayTypeInfo.getInfoFor(elementType);
		case MAP:
			TypeDescription keySchema = schema.getChildren().get(0);
			TypeDescription valSchema = schema.getChildren().get(1);
			TypeInformation<?> keyType = schemaToTypeInfo(keySchema);
			TypeInformation<?> valType = schemaToTypeInfo(valSchema);
			return new MapTypeInfo<>(keyType, valType);
		case UNION:
			throw new UnsupportedOperationException("UNION type is not supported yet.");
		default:
			throw new IllegalArgumentException("Unknown type " + schema);
	}
}
 
Example 20
Source File: OrcBatchReader.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static Class<?> getClassForType(TypeDescription schema) {

		// check the type of the vector to decide how to read it.
		switch (schema.getCategory()) {
			case BOOLEAN:
				return Boolean.class;
			case BYTE:
				return Byte.class;
			case SHORT:
				return Short.class;
			case INT:
				return Integer.class;
			case LONG:
				return Long.class;
			case FLOAT:
				return Float.class;
			case DOUBLE:
				return Double.class;
			case CHAR:
			case VARCHAR:
			case STRING:
				return String.class;
			case DATE:
				return Date.class;
			case TIMESTAMP:
				return Timestamp.class;
			case BINARY:
				return byte[].class;
			case DECIMAL:
				return BigDecimal.class;
			case STRUCT:
				return Row.class;
			case LIST:
				Class<?> childClass = getClassForType(schema.getChildren().get(0));
				return Array.newInstance(childClass, 0).getClass();
			case MAP:
				return HashMap.class;
			case UNION:
				throw new UnsupportedOperationException("UNION type not supported yet");
			default:
				throw new IllegalArgumentException("Unknown type " + schema);
		}
	}