Java Code Examples for org.apache.avro.Schema#getElementType()

The following examples show how to use org.apache.avro.Schema#getElementType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ConverterTest.java    From xml-avro with Apache License 2.0 6 votes vote down vote up
@Test
public void arrayOfUnboundedChoiceElements() {
  String xsd = "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" +
          "  <xs:element name='root'>" +
          "    <xs:complexType>" +
          "      <xs:choice maxOccurs='unbounded'>" +
          "        <xs:element name='s' type='xs:string'/>" +
          "        <xs:element name='i' type='xs:int'/>" +
          "      </xs:choice>" +
          "    </xs:complexType>" +
          "  </xs:element>" +
          "</xs:schema>";

  Schema schema = Converter.createSchema(xsd);
  assertEquals(Schema.Type.ARRAY, schema.getType());
  final Schema elementType = schema.getElementType();
  assertEquals(Schema.Type.RECORD, elementType.getType());
}
 
Example 2
Source File: DatumBuilder.java    From xml-avro with Apache License 2.0 5 votes vote down vote up
private Object createArray(Schema schema, Element el) {
    NodeList childNodes = el.getChildNodes();
    Schema elementType = schema.getElementType();
    int numElements = childNodes.getLength();
    GenericData.Array array = new GenericData.Array(numElements, schema);

    for (int i = 0; i < numElements; i++) {
        Element child = (Element) childNodes.item(i);
        //noinspection unchecked
        array.add(createNodeDatum(elementType, child, true));
    }
    return array;
}
 
Example 3
Source File: PruneColumns.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Schema complexMapWithIds(Schema map, Integer keyId, Integer valueId) {
  Schema keyValue = map.getElementType();
  if (!AvroSchemaUtil.hasFieldId(keyValue.getField("key")) ||
      !AvroSchemaUtil.hasFieldId(keyValue.getField("value"))) {
    return AvroSchemaUtil.createMap(
        keyId, keyValue.getField("key").schema(),
        valueId, keyValue.getField("value").schema());
  }
  return map;
}
 
Example 4
Source File: ConverterTest.java    From xml-avro with Apache License 2.0 5 votes vote down vote up
@Test
public void arrayOfChoiceElements() {
  String xsd =
          "<xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'>" +
                  "  <xs:element name='root'>" +
                  "    <xs:complexType>" +
                  "      <xs:choice maxOccurs='3'>" +
                  "        <xs:element name='s' type='xs:string'/>" +
                  "        <xs:element name='i' type='xs:int'/>" +
                  "      </xs:choice>" +
                  "    </xs:complexType>" +
                  "  </xs:element>" +
                  "</xs:schema>";

  Schema schema = Converter.createSchema(xsd);
  assertEquals(Schema.Type.ARRAY, schema.getType());
  final Schema elementType = schema.getElementType();
  assertEquals(Schema.Type.RECORD, elementType.getType());

  assertEquals(2, elementType.getFields().size());

  String xml = "<root><s>s</s><i>1</i><i>2</i></root>";
  GenericData.Array record = Converter.createDatum(schema, xml);
  Object firstRecord = record.get(0);
  assertTrue(firstRecord instanceof GenericData.Record);
  assertEquals("s", ((GenericData.Record) firstRecord).get("s"));

  Object secondRecord = record.get(1);
  assertTrue(secondRecord instanceof GenericData.Record);
  assertEquals(1, ((GenericData.Record) secondRecord).get("i"));

  Object thirdRecord = record.get(2);
  assertTrue(thirdRecord instanceof GenericData.Record);
  assertEquals(2, ((GenericData.Record) thirdRecord).get("i"));
}
 
Example 5
Source File: AvroResolver.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * When an Avro field is actually an array, we resolve the type of the array
 * element, and for each element in the Avro array, we recursively invoke
 * the population of {@code List<OneField>} record.
 *
 * @param record      list of fields to be populated
 * @param fieldValue  field value
 * @param arraySchema array schema
 * @return number of populated fields
 */
int setArrayField(List<OneField> record, Object fieldValue,
                  Schema arraySchema) {
    Schema typeSchema = arraySchema.getElementType();
    GenericData.Array<?> array = (GenericData.Array<?>) fieldValue;
    int length = array.size();
    for (Object o : array) {
        populateRecord(record, o, typeSchema);
    }
    return length;
}
 
Example 6
Source File: AvroUtils.java    From envelope with Apache License 2.0 4 votes vote down vote up
/**
 * Convert Avro Types into their associated DataType.
 *
 * @param schemaType Avro Schema.Type
 * @return DataType representation
 */
public static DataType dataTypeFor(Schema schemaType) {
  LOG.trace("Converting Schema[{}] to DataType", schemaType);

  // Unwrap "optional" unions to the base type
  boolean isOptional = isNullable(schemaType);

  if (isOptional) {
    // if only 2 items in the union, then "unwrap," otherwise, it's a full union and should be rendered as such
    if (schemaType.getTypes().size() == 2) {
      LOG.trace("Unwrapping simple 'optional' union for {}", schemaType);
      for (Schema s : schemaType.getTypes()) {
        if (s.getType().equals(NULL)) {
          continue;
        }
        // Unwrap
        schemaType = s;
        break;
      }
    }
  }

  // Convert supported LogicalTypes
  if (null != schemaType.getLogicalType()) {
    LogicalType logicalType = schemaType.getLogicalType();
    switch (logicalType.getName()) {
      case "date" :
        return DataTypes.DateType;
      case "timestamp-millis" :
        return DataTypes.TimestampType;
      case "decimal" :
        LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
        return DataTypes.createDecimalType(decimal.getPrecision(), decimal.getScale());
      default:
        // Pass-thru
        LOG.warn("Unsupported LogicalType[{}], continuing with underlying base type", logicalType.getName());
    }
  }

  switch (schemaType.getType()) {
    case RECORD:
      // StructType
      List<StructField> structFieldList = Lists.newArrayListWithCapacity(schemaType.getFields().size());
      for (Field f : schemaType.getFields()) {
        structFieldList.add(DataTypes.createStructField(f.name(), dataTypeFor(f.schema()), isNullable(f.schema())));
      }
      return DataTypes.createStructType(structFieldList);
    case ARRAY:
      Schema elementType = schemaType.getElementType();
      return DataTypes.createArrayType(dataTypeFor(elementType), isNullable(elementType));
    case MAP:
      Schema valueType = schemaType.getValueType();
      return DataTypes.createMapType(DataTypes.StringType, dataTypeFor(valueType), isNullable(valueType));
    case UNION:
      // StructType of members
      List<StructField> unionFieldList = Lists.newArrayListWithCapacity(schemaType.getTypes().size());
      int m = 0;
      for (Schema u : schemaType.getTypes()) {
        unionFieldList.add(DataTypes.createStructField("member" + m++, dataTypeFor(u), isNullable(u)));
      }
      return DataTypes.createStructType(unionFieldList);
    case FIXED:
    case BYTES:
      return DataTypes.BinaryType;
    case ENUM:
    case STRING:
      return DataTypes.StringType;
    case INT:
      return DataTypes.IntegerType;
    case LONG:
      return DataTypes.LongType;
    case FLOAT:
      return DataTypes.FloatType;
    case DOUBLE:
      return DataTypes.DoubleType;
    case BOOLEAN:
      return DataTypes.BooleanType;
    case NULL:
      return DataTypes.NullType;
    default:
      throw new RuntimeException(String.format("Unrecognized or unsupported Avro Type conversion: %s", schemaType));
  }
}
 
Example 7
Source File: AvroRowSerializationSchema.java    From flink with Apache License 2.0 4 votes vote down vote up
private Object convertFlinkType(Schema schema, Object object) {
	if (object == null) {
		return null;
	}
	switch (schema.getType()) {
		case RECORD:
			if (object instanceof Row) {
				return convertRowToAvroRecord(schema, (Row) object);
			}
			throw new IllegalStateException("Row expected but was: " + object.getClass());
		case ENUM:
			return new GenericData.EnumSymbol(schema, object.toString());
		case ARRAY:
			final Schema elementSchema = schema.getElementType();
			final Object[] array = (Object[]) object;
			final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema);
			for (Object element : array) {
				convertedArray.add(convertFlinkType(elementSchema, element));
			}
			return convertedArray;
		case MAP:
			final Map<?, ?> map = (Map<?, ?>) object;
			final Map<Utf8, Object> convertedMap = new HashMap<>();
			for (Map.Entry<?, ?> entry : map.entrySet()) {
				convertedMap.put(
					new Utf8(entry.getKey().toString()),
					convertFlinkType(schema.getValueType(), entry.getValue()));
			}
			return convertedMap;
		case UNION:
			final List<Schema> types = schema.getTypes();
			final int size = types.size();
			final Schema actualSchema;
			if (size == 2 && types.get(0).getType() == Schema.Type.NULL) {
				actualSchema = types.get(1);
			} else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) {
				actualSchema = types.get(0);
			} else if (size == 1) {
				actualSchema = types.get(0);
			} else {
				// generic type
				return object;
			}
			return convertFlinkType(actualSchema, object);
		case FIXED:
			// check for logical type
			if (object instanceof BigDecimal) {
				return new GenericData.Fixed(
					schema,
					convertFromDecimal(schema, (BigDecimal) object));
			}
			return new GenericData.Fixed(schema, (byte[]) object);
		case STRING:
			return new Utf8(object.toString());
		case BYTES:
			// check for logical type
			if (object instanceof BigDecimal) {
				return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object));
			}
			return ByteBuffer.wrap((byte[]) object);
		case INT:
			// check for logical types
			if (object instanceof Date) {
				return convertFromDate(schema, (Date) object);
			} else if (object instanceof LocalDate) {
				return convertFromDate(schema, Date.valueOf((LocalDate) object));
			} else if (object instanceof Time) {
				return convertFromTime(schema, (Time) object);
			} else if (object instanceof LocalTime) {
				return convertFromTime(schema, Time.valueOf((LocalTime) object));
			}
			return object;
		case LONG:
			// check for logical type
			if (object instanceof Timestamp) {
				return convertFromTimestamp(schema, (Timestamp) object);
			} else if (object instanceof LocalDateTime) {
				return convertFromTimestamp(schema, Timestamp.valueOf((LocalDateTime) object));
			}
			return object;
		case FLOAT:
		case DOUBLE:
		case BOOLEAN:
			return object;
	}
	throw new RuntimeException("Unsupported Avro type:" + schema);
}
 
Example 8
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 4 votes vote down vote up
public void writeJavaArray(GroupType schema, Schema avroSchema,
    Class<?> arrayClass, Object value) {
  Class<?> elementClass = arrayClass.getComponentType();

  if (!elementClass.isPrimitive()) {
    writeObjectArray(schema, avroSchema, (Object[]) value);
    return;
  }

  switch (avroSchema.getElementType().getType()) {
    case BOOLEAN:
      Preconditions.checkArgument(elementClass == boolean.class,
          "Cannot write as boolean array: " + arrayClass.getName());
      writeBooleanArray((boolean[]) value);
      break;
    case INT:
      if (elementClass == byte.class) {
        writeByteArray((byte[]) value);
      } else if (elementClass == char.class) {
        writeCharArray((char[]) value);
      } else if (elementClass == short.class) {
        writeShortArray((short[]) value);
      } else if (elementClass == int.class) {
        writeIntArray((int[]) value);
      } else {
        throw new IllegalArgumentException(
            "Cannot write as an int array: " + arrayClass.getName());
      }
      break;
    case LONG:
      Preconditions.checkArgument(elementClass == long.class,
          "Cannot write as long array: " + arrayClass.getName());
      writeLongArray((long[]) value);
      break;
    case FLOAT:
      Preconditions.checkArgument(elementClass == float.class,
          "Cannot write as float array: " + arrayClass.getName());
      writeFloatArray((float[]) value);
      break;
    case DOUBLE:
      Preconditions.checkArgument(elementClass == double.class,
          "Cannot write as double array: " + arrayClass.getName());
      writeDoubleArray((double[]) value);
      break;
    default:
      throw new IllegalArgumentException("Cannot write " +
          avroSchema.getElementType() + " array: " + arrayClass.getName());
  }
}
 
Example 9
Source File: AvroSchemaManager.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Initialize given a schema
 */
protected void init(String namespace, Schema schema,
                                boolean ignoreNameMap) {

    /* put to map[type name]=>schema */
    if (isNamedSchema(schema)) {
        String typeName = schema.getName();
        if (typeName2Schema.containsKey(typeName))
            AvroStorageLog.warn("Duplicate schemas defined for type:"
                    + typeName
                    + ". will ignore the second one:"
                    + schema);
        else {
            AvroStorageLog.details("add " + schema.getName() + "=" + schema
                    + " to type2Schema");
            typeName2Schema.put(schema.getName(), schema);
        }
    }

    /* put field schema to map[field name]=>schema*/
    if (schema.getType().equals(Type.RECORD)) {

        List<Field> fields = schema.getFields();
        for (Field field : fields) {

            Schema fieldSchema = field.schema();
            String name = (namespace == null) ? field.name()  : namespace + "." + field.name();

            if (!ignoreNameMap) {
                if (name2Schema.containsKey(name))
                    AvroStorageLog.warn("Duplicate schemas defined for alias:" + name
                                      + ". Will ignore the second one:"+ fieldSchema);
                else {
                    AvroStorageLog.details("add " + name + "=" + fieldSchema + " to name2Schema");
                    name2Schema.put(name, fieldSchema);
                }
            }

            init(name, fieldSchema, ignoreNameMap);
        }
    } else if (schema.getType().equals(Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(schema)) {
            Schema realSchema = AvroStorageUtils.getAcceptedType(schema);
            init(namespace, realSchema, ignoreNameMap);
        } else {
            List<Schema> list = schema.getTypes();
            for (Schema s : list) {
                init(namespace, s, true);
            }
        }
    } else if (schema.getType().equals(Type.ARRAY)) {
        Schema elemSchema = schema.getElementType();
        init(namespace, elemSchema, true);
    } else if (schema.getType().equals(Type.MAP)) {
        Schema valueSchema = schema.getValueType();
        init(namespace, valueSchema, true);
    }
}
 
Example 10
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 4 votes vote down vote up
public void writeJavaArray(GroupType schema, Schema avroSchema,
    Class<?> arrayClass, Object value) {
  Class<?> elementClass = arrayClass.getComponentType();

  if (!elementClass.isPrimitive()) {
    writeObjectArray(schema, avroSchema, (Object[]) value);
    return;
  }

  switch (avroSchema.getElementType().getType()) {
    case BOOLEAN:
      Preconditions.checkArgument(elementClass == boolean.class,
          "Cannot write as boolean array: " + arrayClass.getName());
      writeBooleanArray((boolean[]) value);
      break;
    case INT:
      if (elementClass == byte.class) {
        writeByteArray((byte[]) value);
      } else if (elementClass == char.class) {
        writeCharArray((char[]) value);
      } else if (elementClass == short.class) {
        writeShortArray((short[]) value);
      } else if (elementClass == int.class) {
        writeIntArray((int[]) value);
      } else {
        throw new IllegalArgumentException(
            "Cannot write as an int array: " + arrayClass.getName());
      }
      break;
    case LONG:
      Preconditions.checkArgument(elementClass == long.class,
          "Cannot write as long array: " + arrayClass.getName());
      writeLongArray((long[]) value);
      break;
    case FLOAT:
      Preconditions.checkArgument(elementClass == float.class,
          "Cannot write as float array: " + arrayClass.getName());
      writeFloatArray((float[]) value);
      break;
    case DOUBLE:
      Preconditions.checkArgument(elementClass == double.class,
          "Cannot write as double array: " + arrayClass.getName());
      writeDoubleArray((double[]) value);
      break;
    default:
      throw new IllegalArgumentException("Cannot write " +
          avroSchema.getElementType() + " array: " + arrayClass.getName());
  }
}
 
Example 11
Source File: Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.arrayArrayElemSchema0 = readerSchema.getElementType();
    this.field0 = arrayArrayElemSchema0 .getField("field").schema();
}
 
Example 12
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Convert a schema with field name to a pig schema
 */
 private static ResourceFieldSchema inconvert(Schema in, String fieldName, Set<Schema> visitedRecords)
         throws IOException {

    AvroStorageLog.details("InConvert avro schema with field name " + fieldName);

    Schema.Type avroType = in.getType();
    ResourceFieldSchema fieldSchema = new ResourceFieldSchema();
    fieldSchema.setName(fieldName);

    if (avroType.equals(Schema.Type.RECORD)) {

        AvroStorageLog.details("convert to a pig tuple");

        if (visitedRecords.contains(in)) {
            fieldSchema.setType(DataType.BYTEARRAY);
        } else {
            visitedRecords.add(in);
            fieldSchema.setType(DataType.TUPLE);
            ResourceSchema tupleSchema = new ResourceSchema();
            List<Schema.Field> fields = in.getFields();
            ResourceFieldSchema[] childFields = new ResourceFieldSchema[fields.size()];
            int index = 0;
            for (Schema.Field field : fields) {
                childFields[index++] = inconvert(field.schema(), field.name(), visitedRecords);
            }

            tupleSchema.setFields(childFields);
            fieldSchema.setSchema(tupleSchema);
            visitedRecords.remove(in);
        }

    } else if (avroType.equals(Schema.Type.ARRAY)) {

        AvroStorageLog.details("convert array to a pig bag");
        fieldSchema.setType(DataType.BAG);
        Schema elemSchema = in.getElementType();
        ResourceFieldSchema subFieldSchema = inconvert(elemSchema, ARRAY_FIELD, visitedRecords);
        add2BagSchema(fieldSchema, subFieldSchema);

    } else if (avroType.equals(Schema.Type.MAP)) {

        AvroStorageLog.details("convert map to a pig map");
        fieldSchema.setType(DataType.MAP);

    } else if (avroType.equals(Schema.Type.UNION)) {

        if (AvroStorageUtils.isAcceptableUnion(in)) {
            Schema acceptSchema = AvroStorageUtils.getAcceptedType(in);
            ResourceFieldSchema realFieldSchema = inconvert(acceptSchema, null, visitedRecords);
            fieldSchema.setType(realFieldSchema.getType());
            fieldSchema.setSchema(realFieldSchema.getSchema());
        } else
            throw new IOException("Do not support generic union:" + in);

    } else if (avroType.equals(Schema.Type.FIXED)) {
         fieldSchema.setType(DataType.BYTEARRAY);
    } else if (avroType.equals(Schema.Type.BOOLEAN)) {
        fieldSchema.setType(DataType.BOOLEAN);
    } else if (avroType.equals(Schema.Type.BYTES)) {
        fieldSchema.setType(DataType.BYTEARRAY);
    } else if (avroType.equals(Schema.Type.DOUBLE)) {
        fieldSchema.setType(DataType.DOUBLE);
    } else if (avroType.equals(Schema.Type.ENUM)) {
        fieldSchema.setType(DataType.CHARARRAY);
    } else if (avroType.equals(Schema.Type.FLOAT)) {
        fieldSchema.setType(DataType.FLOAT);
    } else if (avroType.equals(Schema.Type.INT)) {
        fieldSchema.setType(DataType.INTEGER);
    } else if (avroType.equals(Schema.Type.LONG)) {
        fieldSchema.setType(DataType.LONG);
    } else if (avroType.equals(Schema.Type.STRING)) {
        fieldSchema.setType(DataType.CHARARRAY);
    } else if (avroType.equals(Schema.Type.NULL)) {
        // value of NULL is always NULL
        fieldSchema.setType(DataType.INTEGER);
    } else {
        throw new IOException("Unsupported avro type:" + avroType);
    }
    return fieldSchema;
}
 
Example 13
Source File: Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Array_of_record_GenericDeserializer_1629046702287533603_1629046702287533603(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.arrayArrayElemSchema0 = readerSchema.getElementType();
    this.field0 = arrayArrayElemSchema0 .getField("field").schema();
}
 
Example 14
Source File: Array_of_UNION_GenericDeserializer_585074122056792963_585074122056792963.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Array_of_UNION_GenericDeserializer_585074122056792963_585074122056792963(Schema readerSchema) {
    this.readerSchema = readerSchema;
    this.arrayArrayElemSchema0 = readerSchema.getElementType();
    this.arrayElemOptionSchema0 = arrayArrayElemSchema0 .getTypes().get(1);
    this.field0 = arrayElemOptionSchema0 .getField("field").schema();
}
 
Example 15
Source File: AvroRowSerializationSchema.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private Object convertFlinkType(Schema schema, Object object) {
	if (object == null) {
		return null;
	}
	switch (schema.getType()) {
		case RECORD:
			if (object instanceof Row) {
				return convertRowToAvroRecord(schema, (Row) object);
			}
			throw new IllegalStateException("Row expected but was: " + object.getClass());
		case ENUM:
			return new GenericData.EnumSymbol(schema, object.toString());
		case ARRAY:
			final Schema elementSchema = schema.getElementType();
			final Object[] array = (Object[]) object;
			final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema);
			for (Object element : array) {
				convertedArray.add(convertFlinkType(elementSchema, element));
			}
			return convertedArray;
		case MAP:
			final Map<?, ?> map = (Map<?, ?>) object;
			final Map<Utf8, Object> convertedMap = new HashMap<>();
			for (Map.Entry<?, ?> entry : map.entrySet()) {
				convertedMap.put(
					new Utf8(entry.getKey().toString()),
					convertFlinkType(schema.getValueType(), entry.getValue()));
			}
			return convertedMap;
		case UNION:
			final List<Schema> types = schema.getTypes();
			final int size = types.size();
			final Schema actualSchema;
			if (size == 2 && types.get(0).getType() == Schema.Type.NULL) {
				actualSchema = types.get(1);
			} else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) {
				actualSchema = types.get(0);
			} else if (size == 1) {
				actualSchema = types.get(0);
			} else {
				// generic type
				return object;
			}
			return convertFlinkType(actualSchema, object);
		case FIXED:
			// check for logical type
			if (object instanceof BigDecimal) {
				return new GenericData.Fixed(
					schema,
					convertFromDecimal(schema, (BigDecimal) object));
			}
			return new GenericData.Fixed(schema, (byte[]) object);
		case STRING:
			return new Utf8(object.toString());
		case BYTES:
			// check for logical type
			if (object instanceof BigDecimal) {
				return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object));
			}
			return ByteBuffer.wrap((byte[]) object);
		case INT:
			// check for logical types
			if (object instanceof Date) {
				return convertFromDate(schema, (Date) object);
			} else if (object instanceof Time) {
				return convertFromTime(schema, (Time) object);
			}
			return object;
		case LONG:
			// check for logical type
			if (object instanceof Timestamp) {
				return convertFromTimestamp(schema, (Timestamp) object);
			}
			return object;
		case FLOAT:
		case DOUBLE:
		case BOOLEAN:
			return object;
	}
	throw new RuntimeException("Unsupported Avro type:" + schema);
}
 
Example 16
Source File: AvroNestedFieldGetter.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
/**
 * Builds a list of field objects holding paths corresponding to the leaf primitives in an Avro schema.
 *
 * @param s the schema to process
 * @return a List of field objects
 * @throws KettleException if a problem occurs
 */
public static List<? extends IAvroInputField> getLeafFields( Schema s ) throws KettleException {
  if ( s == null ) {
    return null;
  }

  List<AvroInputField> fields = new ArrayList<>();

  String root = "";

  if ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) {
    while ( s.getType() == Schema.Type.ARRAY || s.getType() == Schema.Type.MAP ) {
      if ( s.getType() == Schema.Type.ARRAY ) {
        root += "[0]";
        s = s.getElementType();
      } else {
        root += KEY;
        s = s.getValueType();
      }
    }
  }

  if ( s.getType() == Schema.Type.RECORD ) {
    processRecord( root, s, fields );
  } else if ( s.getType() == Schema.Type.UNION ) {
    processUnion( root, s, fields );
  } else {

    // our top-level array/map structure bottoms out with primitive types
    // we'll create one zero-indexed path through to a primitive - the
    // user can copy and paste the path if they want to extract other
    // indexes out to separate Kettle fields
    AvroInputField newField = createAvroField( root, s );
    if ( newField != null ) {
      fields.add( newField );
    }
  }

  for ( int i = 0; i < fields.size() - 1; i++ ) {
    AvroInputField field = fields.get( i );
    boolean duplicateName;
    int suffix = 0;
    String fieldName;
    do {
      fieldName = field.getPentahoFieldName();
      if ( suffix > 0 ) {
        fieldName = fieldName + "-" + Integer.toString( suffix );
      }
      duplicateName = false;
      for ( int j = i + 1; ( j < fields.size() ) && !duplicateName; j++ ) {
        duplicateName = fieldName.equals( fields.get( j ).getPentahoFieldName() );
      }
      suffix++;
    } while ( duplicateName );

    field.setPentahoFieldName( fieldName );
  }
  return fields;
}
 
Example 17
Source File: AvroArray.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public AvroArray(Schema arraySchema, int size) {
  _elementSchema = arraySchema.getElementType();
  _genericArray = new GenericData.Array(size, arraySchema);
}
 
Example 18
Source File: AvroArray.java    From transport with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public AvroArray(GenericArray<Object> genericArray, Schema arraySchema) {
  _genericArray = genericArray;
  _elementSchema = arraySchema.getElementType();
}
 
Example 19
Source File: ParquetRecordReaderTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testNestedArrayGroup() throws IOException {
	Schema nestedArraySchema = unWrapSchema(NESTED_SCHEMA.getField("nestedArray").schema());
	Preconditions.checkState(nestedArraySchema.getType().equals(Schema.Type.ARRAY));

	Schema arrayItemSchema = nestedArraySchema.getElementType();
	GenericRecord item = new GenericRecordBuilder(arrayItemSchema)
		.set("type", "nested")
		.set("value", 1L).build();

	ImmutableList.Builder<GenericRecord> list = ImmutableList.builder();
	list.add(item);

	GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA)
		.set("nestedArray", list.build())
		.set("foo", 34L).build();

	Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record));
	MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA);
	ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema);

	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);

	rowReader.initialize(fileReader, testConfig);
	assertFalse(rowReader.reachEnd());

	Row row = rowReader.nextRecord();
	assertEquals(7, row.getArity());

	assertEquals(34L, row.getField(0));
	Object[] result = (Object[]) row.getField(6);

	assertEquals(1, result.length);

	Row nestedRow = (Row) result[0];
	assertEquals("nested", nestedRow.getField(0));
	assertEquals(1L, nestedRow.getField(1));
}
 
Example 20
Source File: ParquetRecordReaderTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testNestedArrayGroup() throws IOException {
	Schema nestedArraySchema = unWrapSchema(NESTED_SCHEMA.getField("nestedArray").schema());
	Preconditions.checkState(nestedArraySchema.getType().equals(Schema.Type.ARRAY));

	Schema arrayItemSchema = nestedArraySchema.getElementType();
	GenericRecord item = new GenericRecordBuilder(arrayItemSchema)
		.set("type", "nested")
		.set("value", 1L).build();

	ImmutableList.Builder<GenericRecord> list = ImmutableList.builder();
	list.add(item);

	GenericRecord record = new GenericRecordBuilder(NESTED_SCHEMA)
		.set("nestedArray", list.build())
		.set("foo", 34L).build();

	Path path = createTempParquetFile(tempRoot.getRoot(), NESTED_SCHEMA, Collections.singletonList(record));
	MessageType readSchema = (new AvroSchemaConverter()).convert(NESTED_SCHEMA);
	ParquetRecordReader<Row> rowReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema);

	InputFile inputFile =
		HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(path.toUri()), testConfig);
	ParquetReadOptions options = ParquetReadOptions.builder().build();
	ParquetFileReader fileReader = new ParquetFileReader(inputFile, options);

	rowReader.initialize(fileReader, testConfig);
	assertFalse(rowReader.reachEnd());

	Row row = rowReader.nextRecord();
	assertEquals(7, row.getArity());

	assertEquals(34L, row.getField(0));
	Object[] result = (Object[]) row.getField(6);

	assertEquals(1, result.length);

	Row nestedRow = (Row) result[0];
	assertEquals("nested", nestedRow.getField(0));
	assertEquals(1L, nestedRow.getField(1));
}