Java Code Examples for org.apache.avro.generic.GenericData#Array

The following examples show how to use org.apache.avro.generic.GenericData#Array . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAvroDecoder.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedLongArrayWithNulls()
{
    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(BIGINT)), "array_field", null, null, false, false, false);
    Schema schema = SchemaBuilder.array().items().nullable().array().items().nullable().longType();
    List<List<Long>> data = Arrays.asList(
            ImmutableList.of(12L, 15L, 17L),
            ImmutableList.of(22L, 25L, 27L, 29L),
            null,
            Arrays.asList(3L, 5L, null, 6L));

    GenericArray<List<Long>> list = new GenericData.Array<>(schema, data);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list);

    checkArrayValue(decodedRow, row, list);
}
 
Example 2
Source File: TestAvroDecoder.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedStringArrayWithNulls()
{
    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(VARCHAR)), "array_field", null, null, false, false, false);
    Schema schema = SchemaBuilder.array().items().nullable().array().items().nullable().stringType();
    List<List<String>> data = Arrays.asList(
            ImmutableList.of("a", "bb", "ccc"),
            ImmutableList.of("foo", "bar", "baz", "car"),
            null,
            Arrays.asList("boo", "hoo", null, "hoo"));

    GenericArray<List<String>> list = new GenericData.Array<>(schema, data);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list);

    checkArrayValue(decodedRow, row, list);
}
 
Example 3
Source File: TestAvroDecoder.java    From presto with Apache License 2.0 6 votes vote down vote up
@Test
public void testDeeplyNestedLongArray()
{
    Schema schema = SchemaBuilder.array()
            .items()
            .array()
            .items()
            .array()
            .items()
            .longType();

    List<List<List<Long>>> data = ImmutableList.<List<List<Long>>>builder()
            .add(ImmutableList.<List<Long>>builder()
                    .add(ImmutableList.of(12L, 15L, 17L))
                    .add(ImmutableList.of(22L, 25L, 27L, 29L))
                    .build())
            .build();

    GenericArray<List<List<Long>>> list = new GenericData.Array<>(schema, data);
    DecoderTestColumnHandle row = new DecoderTestColumnHandle(0, "row", new ArrayType(new ArrayType(new ArrayType(BIGINT))), "array_field", null, null, false, false, false);
    Map<DecoderColumnHandle, FieldValueProvider> decodedRow = buildAndDecodeColumn(row, "array_field", schema.toString(), list);

    checkArrayValue(decodedRow, row, list);
}
 
Example 4
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
/**
 * The issue consists on having an Avro's schema with a default value in an
 * array. See
 * <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>.
 * @throws IOException
 *             schema not found.
 */
@Test
public void testDefaultArrayValuesInRecordsCase2() throws IOException {
    Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayInRecords2.json"));
    GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
    Record field1Record = new GenericRecordBuilder(avroSchema.getField("field1").schema()).build();
    builder.set("field1", field1Record);
    Record r = builder.build();

    @SuppressWarnings("unchecked")
    GenericData.Array<Integer> values = (GenericData.Array<Integer>) ((GenericRecord) r.get("field1"))
            .get("listOfInt");
    assertArrayEquals(new Object[] {1,2,3}, values.toArray());
    RecordSchema record = AvroTypeUtil.createSchema(avroSchema);
    RecordField field = record.getField("field1").get();
    assertEquals(RecordFieldType.RECORD, field.getDataType().getFieldType());
    RecordDataType data = (RecordDataType) field.getDataType();
    RecordSchema childSchema = data.getChildSchema();
    RecordField childField = childSchema.getField("listOfInt").get();
    assertEquals(RecordFieldType.ARRAY, childField.getDataType().getFieldType());
    assertTrue(childField.getDefaultValue() instanceof Object[]);
    assertArrayEquals(new Object[] {1,2,3}, ((Object[]) childField.getDefaultValue()));
}
 
Example 5
Source File: AvroGenericRecordAccessorTest.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStringArrayUtf8() throws IOException {
  // Expectation: Even though we read an Avro object with UTF8 underneath, the accessor converts it into a
  // Java String
  List<String> expectedQuotes = ImmutableList.of("abc", "defg");

  GenericData.Array<Utf8> strings = new GenericData.Array<Utf8>(2, Schema.createArray(Schema.create(Schema.Type.STRING)));
  expectedQuotes.forEach(s -> strings.add(new Utf8(s)));
  record.put("favorite_quotes", strings);

  Assert.assertEquals(accessor.getGeneric("favorite_quotes"), expectedQuotes);
}
 
Example 6
Source File: FastGenericSerializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
private <E> void shouldWriteArrayOfPrimitives(Schema.Type elementType, List<E> data) {
  // given
  Schema elementSchema = Schema.create(elementType);
  Schema arraySchema = Schema.createArray(elementSchema);

  // Serialization should work on various types of lists
  GenericData.Array<E> vanillaAvroList = new GenericData.Array<>(0, arraySchema);
  ArrayList<E> javaList = new ArrayList<>(0);
  for (E element: data) {
    vanillaAvroList.add(element);
    javaList.add(element);
  }

  // when
  List<E> resultFromAvroList = decodeRecord(arraySchema, dataAsBinaryDecoder(vanillaAvroList));
  List<E> resultFromJavaList = decodeRecord(arraySchema, dataAsBinaryDecoder(javaList, arraySchema));
  List<E> resultFromPrimitiveList = decodeRecord(arraySchema, dataAsBinaryDecoder(data, arraySchema));

  // then
  Assert.assertEquals(resultFromAvroList.size(), data.size());
  Assert.assertEquals(resultFromJavaList.size(), data.size());
  Assert.assertEquals(resultFromPrimitiveList.size(), data.size());
  for (int i = 0; i < data.size(); i++) {
    Assert.assertEquals(resultFromAvroList.get(i), data.get(i));
    Assert.assertEquals(resultFromJavaList.get(i), data.get(i));
    Assert.assertEquals(resultFromPrimitiveList.get(i), data.get(i));
  }
}
 
Example 7
Source File: FastGenericDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 5 votes vote down vote up
private <E, L> void shouldReadArrayOfPrimitives(Implementation implementation, Schema.Type elementType, Class<L> expectedListClass, List<E> data) {
  // given
  Schema elementSchema = Schema.create(elementType);
  Schema arraySchema = Schema.createArray(elementSchema);

  GenericData.Array<E> avroArray = new GenericData.Array<>(0, arraySchema);
  for (E element: data) {
    avroArray.add(element);
  }

  // when
  List<E> array = implementation.decode(arraySchema, arraySchema, genericDataAsDecoder(avroArray));

  // then
  Assert.assertEquals(array.size(), data.size());
  for (int i = 0; i < data.size(); i++) {
    Assert.assertEquals(array.get(i), data.get(i));
  }

  if (implementation.isFast) {
    // The extended API should always be available, regardless of whether warm or cold
    Assert.assertTrue(Arrays.stream(array.getClass().getInterfaces()).anyMatch(c -> c.equals(expectedListClass)),
        "The returned type should implement " + expectedListClass.getSimpleName());

    try {
      Method getPrimitiveMethod = expectedListClass.getMethod("getPrimitive", int.class);
      for (int i = 0; i < data.size(); i++) {
        Assert.assertEquals(getPrimitiveMethod.invoke(array, i), data.get(i));
      }
    } catch (Exception e) {
      Assert.fail("Failed to access the getPrimitive function!");
    }
  }
}
 
Example 8
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * The issue consists on having an Avro's schema with a default value in an
 * array. See
 * <a href="https://issues.apache.org/jira/browse/NIFI-4893">NIFI-4893</a>.
 * @throws IOException
 *             schema not found.
 */
@Test
public void testDefaultArrayValue1() throws IOException {
    Schema avroSchema = new Schema.Parser().parse(getClass().getResourceAsStream("defaultArrayValue1.json"));
    GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
    Record r = builder.build();
    @SuppressWarnings("unchecked")
    GenericData.Array<Integer> values = (GenericData.Array<Integer>) r.get("listOfInt");
    assertEquals(values.size(), 0);
    RecordSchema record = AvroTypeUtil.createSchema(avroSchema);
    RecordField field = record.getField("listOfInt").get();
    assertEquals(RecordFieldType.ARRAY, field.getDataType().getFieldType());
    assertTrue(field.getDefaultValue() instanceof Object[]);
    assertEquals(0, ((Object[]) field.getDefaultValue()).length);
}
 
Example 9
Source File: KsqlGenericRowAvroSerializerTest.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldSerializeRowWithNullCorrectly() {
  SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient();
  KsqlGenericRowAvroSerializer ksqlGenericRowAvroSerializer = new KsqlGenericRowAvroSerializer
      (schema, schemaRegistryClient, new KsqlConfig(new HashMap<>()));

  List columns = Arrays.asList(1511897796092L, 1L, null, 10.0, new Double[]{100.0},
                               Collections.singletonMap("key1", 100.0));

  GenericRow genericRow = new GenericRow(columns);
  byte[] serializedRow = ksqlGenericRowAvroSerializer.serialize("t1", genericRow);
  KafkaAvroDeserializer kafkaAvroDeserializer = new KafkaAvroDeserializer(schemaRegistryClient);
  GenericRecord genericRecord = (GenericRecord) kafkaAvroDeserializer.deserialize("t1", serializedRow);
  Assert.assertNotNull(genericRecord);
  assertThat("Incorrect serialization.", genericRecord.get("ordertime".toUpperCase()), equalTo
      (1511897796092L));
  assertThat("Incorrect serialization.", genericRecord.get("orderid".toUpperCase()), equalTo
      (1L));
  assertThat("Incorrect serialization.", genericRecord.get("itemid".toUpperCase()), equalTo
      (null));
  assertThat("Incorrect serialization.", genericRecord.get("orderunits".toUpperCase()), equalTo
      (10.0));

  GenericData.Array array = (GenericData.Array) genericRecord.get("arraycol".toUpperCase());
  Map map = (Map) genericRecord.get("mapcol".toUpperCase());

  assertThat("Incorrect serialization.", array.size(), equalTo(1));
  assertThat("Incorrect serialization.", array.get(0), equalTo(100.0));
  assertThat("Incorrect serialization.", map,
             equalTo(Collections.singletonMap(new Utf8("key1"), 100.0)));

}
 
Example 10
Source File: KsqlGenericRowAvroDeserializer.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
private Object handleArray(Schema fieldSchema, GenericData.Array genericArray) {
  Class elementClass = SchemaUtil.getJavaType(fieldSchema.valueSchema());
  Object[] arrayField =
      (Object[]) java.lang.reflect.Array.newInstance(elementClass, genericArray.size());
  for (int i = 0; i < genericArray.size(); i++) {
    Object obj = enforceFieldType(fieldSchema.valueSchema(), genericArray.get(i));
    arrayField[i] = obj;
  }
  return arrayField;
}
 
Example 11
Source File: FastGenericDeserializerGeneratorTest.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Test(groups = {"deserializationTest"}, dataProvider = "Implementation")
public void shouldReadArrayOfRecords(Implementation implementation) {
  // given
  Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

  Schema arrayRecordSchema = Schema.createArray(recordSchema);

  GenericData.Record subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  GenericData.Array<GenericData.Record> recordsArray = new GenericData.Array<>(0, arrayRecordSchema);
  recordsArray.add(subRecordBuilder);
  recordsArray.add(subRecordBuilder);

  // when
  GenericData.Array<GenericRecord> array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray));

  // then
  Assert.assertEquals(2, array.size());
  Assert.assertEquals(new Utf8("abc"), array.get(0).get("field"));
  Assert.assertEquals(new Utf8("abc"), array.get(1).get("field"));

  // given

  arrayRecordSchema = Schema.createArray(createUnionSchema(recordSchema));

  subRecordBuilder = new GenericData.Record(recordSchema);
  subRecordBuilder.put("field", "abc");

  recordsArray = new GenericData.Array<>(0, arrayRecordSchema);
  recordsArray.add(subRecordBuilder);
  recordsArray.add(subRecordBuilder);

  // when
  array = implementation.decode(arrayRecordSchema, arrayRecordSchema, genericDataAsDecoder(recordsArray));

  // then
  Assert.assertEquals(2, array.size());
  Assert.assertEquals(new Utf8("abc"), array.get(0).get("field"));
  Assert.assertEquals(new Utf8("abc"), array.get(1).get("field"));
}
 
Example 12
Source File: AvroParquetMorphlineTest.java    From kite with Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/all.avsc"));

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());
  
  AvroParquetWriter<GenericRecord> writer = new
      AvroParquetWriter<GenericRecord>(file, schema);

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
          .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

  writer.write(record);
  writer.close();

  morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema");
  
  Record morphlineRecord = new Record();
  morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
  collector.reset();
  
  assertTrue(morphline.process(morphlineRecord));

  assertEquals(1, collector.getRecords().size());
  GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
  assertNotNull(actualRecord);
  assertEquals(null, actualRecord.get("mynull"));
  assertEquals(true, actualRecord.get("myboolean"));
  assertEquals(1, actualRecord.get("myint"));
  assertEquals(2L, actualRecord.get("mylong"));
  assertEquals(null, actualRecord.get("myfloat"));
  assertEquals(4.1, actualRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), actualRecord.get("mybytes"));
  assertEquals("hello", actualRecord.get("mystring"));
  assertEquals("a", actualRecord.get("myenum"));
  assertEquals(nestedRecord, actualRecord.get("mynestedrecord"));
  assertEquals(integerArray, actualRecord.get("myarray"));
  assertEquals(emptyArray, actualRecord.get("myemptyarray"));
  assertEquals(integerArray, actualRecord.get("myoptionalarray"));
  assertEquals(ImmutableMap.of("a", 1, "b", 2), actualRecord.get("mymap"));
  assertEquals(emptyMap, actualRecord.get("myemptymap"));
  assertEquals(genericFixed, actualRecord.get("myfixed"));
}
 
Example 13
Source File: TestReadWrite.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  Path file = new Path(createTempFile().getPath());
  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Record nestedRecord = new GenericRecordBuilder(
    schema.getField("mynestedrecord").schema())
    .set("mynestedint", 1).build();
  List<Integer> emptyArray = new ArrayList<Integer>();
  Schema arrayOfOptionalIntegers = Schema.createArray(
    optional(Schema.create(Schema.Type.INT)));
  GenericData.Array<Integer> genericIntegerArrayWithNulls =
    new GenericData.Array<Integer>(
      arrayOfOptionalIntegers,
      Arrays.asList(1, null, 2, null, 3));
  GenericFixed genericFixed = new GenericData.Fixed(
    Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65});
  ImmutableMap<String, Integer> emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  try(ParquetWriter<GenericRecord> writer = AvroParquetWriter
      .<GenericRecord>builder(file)
      .withSchema(schema)
      .withConf(testConf)
      .build()) {

    GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

    GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArrayWithNulls)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

    writer.write(record);
  }

  final GenericRecord nextRecord;
  try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file)) {
    nextRecord = reader.read();
  }

  Object expectedEnumSymbol = compat ? "a" :
      new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a");

  assertNotNull(nextRecord);
  assertEquals(null, nextRecord.get("mynull"));
  assertEquals(true, nextRecord.get("myboolean"));
  assertEquals(1, nextRecord.get("myint"));
  assertEquals(2L, nextRecord.get("mylong"));
  assertEquals(3.1f, nextRecord.get("myfloat"));
  assertEquals(4.1, nextRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
  assertEquals(str("hello"), nextRecord.get("mystring"));
  assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
  assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
  assertEquals(integerArray, nextRecord.get("myarray"));
  assertEquals(emptyArray, nextRecord.get("myemptyarray"));
  assertEquals(integerArray, nextRecord.get("myoptionalarray"));
  assertEquals(genericIntegerArrayWithNulls, nextRecord.get("myarrayofoptional"));
  assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
  assertEquals(emptyMap, nextRecord.get("myemptymap"));
  assertEquals(genericFixed, nextRecord.get("myfixed"));
}
 
Example 14
Source File: AvroRowSerializationSchema.java    From flink with Apache License 2.0 4 votes vote down vote up
private Object convertFlinkType(Schema schema, Object object) {
	if (object == null) {
		return null;
	}
	switch (schema.getType()) {
		case RECORD:
			if (object instanceof Row) {
				return convertRowToAvroRecord(schema, (Row) object);
			}
			throw new IllegalStateException("Row expected but was: " + object.getClass());
		case ENUM:
			return new GenericData.EnumSymbol(schema, object.toString());
		case ARRAY:
			final Schema elementSchema = schema.getElementType();
			final Object[] array = (Object[]) object;
			final GenericData.Array<Object> convertedArray = new GenericData.Array<>(array.length, schema);
			for (Object element : array) {
				convertedArray.add(convertFlinkType(elementSchema, element));
			}
			return convertedArray;
		case MAP:
			final Map<?, ?> map = (Map<?, ?>) object;
			final Map<Utf8, Object> convertedMap = new HashMap<>();
			for (Map.Entry<?, ?> entry : map.entrySet()) {
				convertedMap.put(
					new Utf8(entry.getKey().toString()),
					convertFlinkType(schema.getValueType(), entry.getValue()));
			}
			return convertedMap;
		case UNION:
			final List<Schema> types = schema.getTypes();
			final int size = types.size();
			final Schema actualSchema;
			if (size == 2 && types.get(0).getType() == Schema.Type.NULL) {
				actualSchema = types.get(1);
			} else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) {
				actualSchema = types.get(0);
			} else if (size == 1) {
				actualSchema = types.get(0);
			} else {
				// generic type
				return object;
			}
			return convertFlinkType(actualSchema, object);
		case FIXED:
			// check for logical type
			if (object instanceof BigDecimal) {
				return new GenericData.Fixed(
					schema,
					convertFromDecimal(schema, (BigDecimal) object));
			}
			return new GenericData.Fixed(schema, (byte[]) object);
		case STRING:
			return new Utf8(object.toString());
		case BYTES:
			// check for logical type
			if (object instanceof BigDecimal) {
				return ByteBuffer.wrap(convertFromDecimal(schema, (BigDecimal) object));
			}
			return ByteBuffer.wrap((byte[]) object);
		case INT:
			// check for logical types
			if (object instanceof Date) {
				return convertFromDate(schema, (Date) object);
			} else if (object instanceof LocalDate) {
				return convertFromDate(schema, Date.valueOf((LocalDate) object));
			} else if (object instanceof Time) {
				return convertFromTime(schema, (Time) object);
			} else if (object instanceof LocalTime) {
				return convertFromTime(schema, Time.valueOf((LocalTime) object));
			}
			return object;
		case LONG:
			// check for logical type
			if (object instanceof Timestamp) {
				return convertFromTimestamp(schema, (Timestamp) object);
			} else if (object instanceof LocalDateTime) {
				return convertFromTimestamp(schema, Timestamp.valueOf((LocalDateTime) object));
			}
			return object;
		case FLOAT:
		case DOUBLE:
		case BOOLEAN:
			return object;
	}
	throw new RuntimeException("Unsupported Avro type:" + schema);
}
 
Example 15
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_array_of_records() throws Exception {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
    List<GenericRecord> innerRecords = new LinkedList<>();

    final GenericRecord outerRecord = new GenericData.Record(schema);

    Schema arraySchema = schema.getField("records").schema();
    Schema innerRecordSchema = arraySchema.getElementType();
    final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
    innerRecord1.put("name", "Joe");
    innerRecord1.put("age", 42);

    innerRecords.add(innerRecord1);

    final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
    innerRecord2.put("name", "Mary");
    innerRecord2.put("age", 28);

    innerRecords.add(innerRecord2);

    GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
    outerRecord.put("records", array);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        dataFileWriter.append(outerRecord);
    }
    out.close();

    // Build a flow file from the Avro record
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
            "(records ARRAY<STRUCT<name:STRING, age:INT>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));

    // Verify the record contains an array
    Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
    assertTrue(arrayFieldObject instanceof ArrayList);
    ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
    assertEquals(2, arrayField.size());

    // Verify the first element. Should be a record with two fields "name" and "age"
    Object element = arrayField.get(0);
    assertTrue(element instanceof OrcStruct);
    StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
    Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Joe", nameObject.toString());
    Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(42, ((IntWritable) ageObject).get());

    // Verify the first element. Should be a record with two fields "name" and "age"
    element = arrayField.get(1);
    assertTrue(element instanceof OrcStruct);
    nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Mary", nameObject.toString());
    ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(28, ((IntWritable) ageObject).get());
}
 
Example 16
Source File: AvroGenericRecordAccessor.java    From incubator-gobblin with Apache License 2.0 4 votes vote down vote up
@Override
public void setStringArray(String fieldName, List<String> value) {
  GenericData.Array<String> avroArray = new GenericData.Array<>(
      Schema.createArray(Schema.create(Schema.Type.STRING)), value);
  set(fieldName, avroArray);
}
 
Example 17
Source File: CustomAvroRecordPreparer.java    From pxf with Apache License 2.0 4 votes vote down vote up
public void deserialize(GenericRecord record) throws IOException {

        // 1. integers
        @SuppressWarnings("unchecked")
        GenericData.Array<Integer> intArray = (GenericData.Array<Integer>) record.get("num");
        for (int i = 0; i < intArray.size(); i++) {
            num[i] = intArray.get(i).intValue();
        }

        int1 = ((Integer) record.get("int1")).intValue();
        int2 = ((Integer) record.get("int2")).intValue();

        // 2. strings
        @SuppressWarnings("unchecked")
        GenericData.Array<Utf8> stringArray = (GenericData.Array<Utf8>) record.get("strings");
        for (int i = 0; i < stringArray.size(); i++) {
            strings[i] = stringArray.get(i).toString();
        }

        st1 = record.get("st1").toString();

        // 3. doubles
        @SuppressWarnings("unchecked")
        GenericData.Array<Double> doubleArray = (GenericData.Array<Double>) record.get("dubs");
        for (int i = 0; i < doubleArray.size(); i++) {
            dubs[i] = doubleArray.get(i).doubleValue();
        }

        db = ((Double) record.get("db")).doubleValue();

        // 4. floats
        @SuppressWarnings("unchecked")
        GenericData.Array<Float> floatArray = (GenericData.Array<Float>) record.get("fts");
        for (int i = 0; i < floatArray.size(); i++) {
            fts[i] = floatArray.get(i).floatValue();
        }

        ft = ((Float) record.get("ft")).floatValue();

        // 5. longs
        @SuppressWarnings("unchecked")
        GenericData.Array<Long> longArray = (GenericData.Array<Long>) record.get("lngs");
        for (int i = 0; i < longArray.size(); i++) {
            lngs[i] = longArray.get(i).longValue();
        }

        lng = ((Long) record.get("lng")).longValue();

        // 6. booleans
        @SuppressWarnings("unchecked")
        GenericData.Array<Boolean> booleanArray = (GenericData.Array<Boolean>) record.get("bls");
        for (int i = 0; i < booleanArray.size(); i++) {
            bls[i] = booleanArray.get(i);
        }
        bl = (Boolean) record.get("bl");

        // 7. bytes
        ByteBuffer bytesBuffer = (ByteBuffer) record.get("bts");
        bts = bytesBuffer.array();
    }
 
Example 18
Source File: HoodieTestDataGenerator.java    From hudi with Apache License 2.0 4 votes vote down vote up
public static GenericRecord generateGenericRecord(String rowKey, String riderName, String driverName,
                                                  double timestamp, boolean isDeleteRecord,
                                                  boolean isFlattened) {
  GenericRecord rec = new GenericData.Record(isFlattened ? FLATTENED_AVRO_SCHEMA : AVRO_SCHEMA);
  rec.put("_row_key", rowKey);
  rec.put("timestamp", timestamp);
  rec.put("rider", riderName);
  rec.put("driver", driverName);
  rec.put("begin_lat", RAND.nextDouble());
  rec.put("begin_lon", RAND.nextDouble());
  rec.put("end_lat", RAND.nextDouble());
  rec.put("end_lon", RAND.nextDouble());

  if (isFlattened) {
    rec.put("fare", RAND.nextDouble() * 100);
    rec.put("currency", "USD");
  } else {
    rec.put("distance_in_meters", RAND.nextInt());
    rec.put("seconds_since_epoch", RAND.nextLong());
    rec.put("weight", RAND.nextFloat());
    byte[] bytes = "Canada".getBytes();
    rec.put("nation", ByteBuffer.wrap(bytes));
    long currentTimeMillis = System.currentTimeMillis();
    Date date = new Date(currentTimeMillis);
    rec.put("current_date", (int) date.toLocalDate().toEpochDay());
    rec.put("current_ts", currentTimeMillis);

    BigDecimal bigDecimal = new BigDecimal(String.format("%5f", RAND.nextFloat()));
    Schema decimalSchema = AVRO_SCHEMA.getField("height").schema();
    Conversions.DecimalConversion decimalConversions = new Conversions.DecimalConversion();
    GenericFixed genericFixed = decimalConversions.toFixed(bigDecimal, decimalSchema, LogicalTypes.decimal(10, 6));
    rec.put("height", genericFixed);

    rec.put("city_to_state", Collections.singletonMap("LA", "CA"));

    GenericRecord fareRecord = new GenericData.Record(AVRO_SCHEMA.getField("fare").schema());
    fareRecord.put("amount", RAND.nextDouble() * 100);
    fareRecord.put("currency", "USD");
    rec.put("fare", fareRecord);

    GenericArray<GenericRecord> tipHistoryArray = new GenericData.Array<>(1, AVRO_SCHEMA.getField("tip_history").schema());
    Schema tipSchema = new Schema.Parser().parse(AVRO_SCHEMA.getField("tip_history").schema().toString()).getElementType();
    GenericRecord tipRecord = new GenericData.Record(tipSchema);
    tipRecord.put("amount", RAND.nextDouble() * 100);
    tipRecord.put("currency", "USD");
    tipHistoryArray.add(tipRecord);
    rec.put("tip_history", tipHistoryArray);
  }

  if (isDeleteRecord) {
    rec.put("_hoodie_is_deleted", true);
  } else {
    rec.put("_hoodie_is_deleted", false);
  }
  return rec;
}
 
Example 19
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_array_of_records() throws Exception {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
    List<GenericRecord> innerRecords = new LinkedList<>();

    final GenericRecord outerRecord = new GenericData.Record(schema);

    Schema arraySchema = schema.getField("records").schema();
    Schema innerRecordSchema = arraySchema.getElementType();
    final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
    innerRecord1.put("name", "Joe");
    innerRecord1.put("age", 42);

    innerRecords.add(innerRecord1);

    final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
    innerRecord2.put("name", "Mary");
    innerRecord2.put("age", 28);

    innerRecords.add(innerRecord2);

    GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
    outerRecord.put("records", array);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        dataFileWriter.append(outerRecord);
    }
    out.close();

    // Build a flow file from the Avro record
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
            "(records ARRAY<STRUCT<name:STRING, age:INT>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));

    // Verify the record contains an array
    Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
    assertTrue(arrayFieldObject instanceof ArrayList);
    ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
    assertEquals(2, arrayField.size());

    // Verify the first element. Should be a record with two fields "name" and "age"
    Object element = arrayField.get(0);
    assertTrue(element instanceof OrcStruct);
    StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
    Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Joe", nameObject.toString());
    Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(42, ((IntWritable) ageObject).get());

    // Verify the first element. Should be a record with two fields "name" and "age"
    element = arrayField.get(1);
    assertTrue(element instanceof OrcStruct);
    nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Mary", nameObject.toString());
    ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(28, ((IntWritable) ageObject).get());
}
 
Example 20
Source File: CustomAvroRecordPreparer.java    From pxf with Apache License 2.0 4 votes vote down vote up
@Override
public GenericRecord serialize() throws IOException {

    // 1. num, int1, int2
    Schema.Field field = schema.getField("num");
    Schema fieldSchema = field.schema();
    GenericData.Array<Integer> intArray = new GenericData.Array<Integer>(
            num.length, fieldSchema);
    for (int i = 0; i < num.length; i++) {
        intArray.add(new Integer(num[i]));
    }
    datum.put("num", intArray);

    datum.put("int1", int1);
    datum.put("int2", int2);

    // 2. st1
    field = schema.getField("strings");
    fieldSchema = field.schema();
    GenericData.Array<Utf8> stringArray = new GenericData.Array<Utf8>(
            strings.length, fieldSchema);
    for (int i = 0; i < strings.length; i++) {
        stringArray.add(new Utf8(strings[i]));
    }
    datum.put("strings", stringArray);

    datum.put("st1", st1);

    // 3. doubles
    field = schema.getField("dubs");
    fieldSchema = field.schema();
    GenericData.Array<Double> doubleArray = new GenericData.Array<Double>(
            dubs.length, fieldSchema);
    for (int i = 0; i < dubs.length; i++) {
        doubleArray.add(new Double(dubs[i]));
    }
    datum.put("dubs", doubleArray);
    datum.put("db", db);

    // 4. floats
    field = schema.getField("fts");
    fieldSchema = field.schema();
    GenericData.Array<Float> floatArray = new GenericData.Array<Float>(
            fts.length, fieldSchema);
    for (int i = 0; i < fts.length; i++) {
        floatArray.add(new Float(fts[i]));
    }
    datum.put("fts", floatArray);
    datum.put("ft", ft);

    // 5. longs
    field = schema.getField("lngs");
    fieldSchema = field.schema();
    GenericData.Array<Long> longArray = new GenericData.Array<Long>(
            lngs.length, fieldSchema);
    for (int i = 0; i < lngs.length; i++) {
        longArray.add(lngs[i]);
    }
    datum.put("lngs", longArray);
    datum.put("lng", lng);

    // 6. booleans
    field = schema.getField("bls");
    fieldSchema = field.schema();
    GenericData.Array<Boolean> booleanArray = new GenericData.Array<Boolean>(
            bls.length, fieldSchema);
    for (int i = 0; i < bls.length; i++) {
        booleanArray.add(bls[i]);
    }
    datum.put("bls", booleanArray);
    datum.put("bl", bl);

    // 7. bytes
    ByteBuffer byteBuffer = ByteBuffer.wrap(bts);
    datum.put("bts", byteBuffer);

    return datum;
}