Java Code Examples for org.apache.avro.generic.GenericRecordBuilder#set()

The following examples show how to use org.apache.avro.generic.GenericRecordBuilder#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void shouldReadEnum() {
    // given
    Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B"});
    Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema),
            createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema),
            createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema)));

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "A"));
    builder.set("testEnumUnion", new GenericData.EnumSymbol(enumSchema, "A"));
    builder.set("testEnumArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A")));
    builder.set("testEnumUnionArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A")));

    // when
    GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build()));

    // then
    Assert.assertEquals("A", record.get("testEnum").toString());
    Assert.assertEquals("A", record.get("testEnumUnion").toString());
    Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString());
    Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString());
}
 
Example 2
Source File: FastGenericSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void shouldWriteEnum() {
    // given
    Schema enumSchema = createEnumSchema("testEnum", new String[]{"A", "B"});
    Schema recordSchema = createRecord("testRecord", createField("testEnum", enumSchema),
            createUnionField("testEnumUnion", enumSchema), createArrayFieldSchema("testEnumArray", enumSchema),
            createArrayFieldSchema("testEnumUnionArray", createUnionSchema(enumSchema)));

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    builder.set("testEnum", new GenericData.EnumSymbol(enumSchema, "A"));
    builder.set("testEnumUnion", new GenericData.EnumSymbol(enumSchema, "A"));
    builder.set("testEnumArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A")));
    builder.set("testEnumUnionArray", Arrays.asList(new GenericData.EnumSymbol(enumSchema, "A")));

    // when
    GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build()));

    // then
    Assert.assertEquals("A", record.get("testEnum").toString());
    Assert.assertEquals("A", record.get("testEnumUnion").toString());
    Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumArray")).get(0).toString());
    Assert.assertEquals("A", ((List<GenericData.EnumSymbol>) record.get("testEnumUnionArray")).get(0).toString());
}
 
Example 3
Source File: FastDatumWriterTest.java    From avro-fastserde with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void shouldCreateGenericDatumReader() throws IOException {
    Schema recordSchema = createRecord("TestSchema",
            createPrimitiveUnionFieldSchema("test", Schema.Type.STRING));
    FastGenericDatumWriter<GenericRecord> fastGenericDatumReader = new FastGenericDatumWriter<>(
            recordSchema, cache);

    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(recordSchema);
    recordBuilder.set("test", "test");

    // when
    fastGenericDatumReader.write(recordBuilder.build(),
            EncoderFactory.get().directBinaryEncoder(new ByteArrayOutputStream(), null));

    // then
    FastSerializer<GenericRecord> fastGenericSerializer = (FastSerializer<GenericRecord>) cache
            .getFastGenericSerializer(recordSchema);

    fastGenericSerializer = (FastSerializer<GenericRecord>) cache.getFastGenericSerializer(recordSchema);

    Assert.assertNotNull(fastGenericSerializer);
    Assert.assertNotEquals(2, fastGenericSerializer.getClass().getDeclaredMethods().length);
}
 
Example 4
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldReadMapOfRecords() {
    // given
    Schema recordSchema = createRecord("record",
            createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

    Schema mapRecordSchema = Schema.createMap(recordSchema);

    GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(recordSchema);
    subRecordBuilder.set("field", "abc");

    Map<String, GenericData.Record> recordsMap = new HashMap<>();
    recordsMap.put("1", subRecordBuilder.build());
    recordsMap.put("2", subRecordBuilder.build());

    // when
    Map<Utf8, GenericRecord> map = deserializeGenericFast(mapRecordSchema, mapRecordSchema,
            serializeGeneric(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString());
    Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString());

    // given
    mapRecordSchema = Schema.createMap(createUnionSchema(recordSchema));

    // when
    map = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema));

    // then
    Assert.assertEquals(2, map.size());
    Assert.assertEquals("abc", map.get(new Utf8("1")).get("field").toString());
    Assert.assertEquals("abc", map.get(new Utf8("2")).get("field").toString());
}
 
Example 5
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldReadSubRecordField() {
    // given
    Schema subRecordSchema = createRecord("subRecord",
            createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));

    Schema recordSchema = createRecord("test", createUnionField("record", subRecordSchema),
            createField("record1", subRecordSchema), createPrimitiveUnionFieldSchema("field", Schema.Type.STRING));

    GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecordSchema);
    subRecordBuilder.set("subField", "abc");

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    builder.set("record", subRecordBuilder.build());
    builder.set("record1", subRecordBuilder.build());
    builder.set("field", "abc");

    // when
    GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build()));

    // then
    Assert.assertEquals("abc", ((GenericRecord) record.get("record")).get("subField").toString());
    Assert.assertEquals(subRecordSchema.hashCode(), ((GenericRecord) record.get("record")).getSchema().hashCode());
    Assert.assertEquals("abc", ((GenericRecord) record.get("record1")).get("subField").toString());
    Assert.assertEquals(subRecordSchema.hashCode(), ((GenericRecord) record.get("record1")).getSchema().hashCode());
    Assert.assertEquals("abc", record.get("field").toString());
}
 
Example 6
Source File: TestMetricsRowGroupFilter.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void createInputFile() throws IOException {
  if (PARQUET_FILE.exists()) {
    Assert.assertTrue(PARQUET_FILE.delete());
  }

  OutputFile outFile = Files.localOutput(PARQUET_FILE);
  try (FileAppender<Record> appender = Parquet.write(outFile)
      .schema(FILE_SCHEMA)
      .build()) {
    GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
    // create 50 records
    for (int i = 0; i < 50; i += 1) {
      builder.set("_id", 30 + i); // min=30, max=79, num-nulls=0
      builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats
      builder.set("_required", "req"); // required, always non-null
      builder.set("_all_nulls", null); // never non-null
      builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
      builder.set("_no_nulls", ""); // optional, but always non-null
      appender.add(builder.build());
    }
  }

  InputFile inFile = Files.localInput(PARQUET_FILE);
  try (ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile))) {
    Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size());
    ROW_GROUP_METADATA = reader.getRowGroups().get(0);
    PARQUET_SCHEMA = reader.getFileMetaData().getSchema();
  }

  PARQUET_FILE.deleteOnExit();
}
 
Example 7
Source File: FieldSelectorUtil.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Generate an indexed record from a given {@code Schema} and its associated data as a map.
 * 
 * @param fields the fields that should be inside the generated indexed record
 * @param schema the schema of the indexed record
 * @return an indexed record
 */
public static IndexedRecord generateIndexedRecord(Map<String, Object> fields, Schema schema) {
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(schema);
    for (Entry<String, Object> field : fields.entrySet()) {
        recordBuilder.set(field.getKey(), field.getValue());
    }
    return recordBuilder.build();

}
 
Example 8
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace
 * when the namespace is not specified.
 */
@Test
public void testAvroToEntityDefaultNamespace() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, 1);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  AvroToEntity noNamespaceConverter =
      AvroToEntity.newBuilder()
          .setEntityKind(entityKind)
          .setUniqueNameColumn(uniqueNameColumn)
          .build();
  Entity outputEntity = noNamespaceConverter.apply(inputBqData);
  // Assess results
  assertTrue(outputEntity.hasKey());
  assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId());
}
 
Example 9
Source File: Generator.java    From avro-random-generator with Do What The F*ck You Want To Public License 5 votes vote down vote up
private GenericRecord generateRecord(Schema schema) {
  GenericRecordBuilder builder = new GenericRecordBuilder(schema);
  for (Schema.Field field : schema.getFields()) {
    builder.set(field, generateObject(field.schema()));
  }
  return builder.build();
}
 
Example 10
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the
 * unique name column is integer.
 */
@Test
public void testAvroToEntityIntegerIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("INTEGER"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueInt);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(outputEntity.hasKey());
  assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
  validateMetadata(outputEntity);
}
 
Example 11
Source File: AvroData.java    From apicurio-registry with Apache License 2.0 5 votes vote down vote up
private static Object maybeWrapSchemaless(Schema schema, Object value, String typeField) {
    if (schema != null) {
        return value;
    }

    GenericRecordBuilder builder = new GenericRecordBuilder(ANYTHING_SCHEMA);
    if (value != null) {
        builder.set(typeField, value);
    }
    return builder.build();
}
 
Example 12
Source File: Generator.java    From avro-random-generator with Do What The F*ck You Want To Public License 5 votes vote down vote up
@SuppressWarnings("unchecked")
private Object wrapOption(Schema schema, Object option) {
  if (schema.getType() == Schema.Type.BYTES && option instanceof String) {
    option = ByteBuffer.wrap(((String) option).getBytes(Charset.defaultCharset()));
  } else if (schema.getType() == Schema.Type.FLOAT && option instanceof Double) {
    option = ((Double) option).floatValue();
  } else if (schema.getType() == Schema.Type.LONG && option instanceof Integer) {
    option = ((Integer) option).longValue();
  } else if (schema.getType() == Schema.Type.ARRAY && option instanceof Collection) {
    option = new GenericData.Array(schema, (Collection) option);
  } else if (schema.getType() == Schema.Type.ENUM && option instanceof String) {
    option = new GenericData.EnumSymbol(schema, (String) option);
  } else if (schema.getType() == Schema.Type.FIXED && option instanceof String) {
    option =
        new GenericData.Fixed(schema, ((String) option).getBytes(Charset.defaultCharset()));
  } else if (schema.getType() == Schema.Type.RECORD && option instanceof Map) {
    Map optionMap = (Map) option;
    GenericRecordBuilder optionBuilder = new GenericRecordBuilder(schema);
    for (Schema.Field field : schema.getFields()) {
      if (optionMap.containsKey(field.name())) {
        optionBuilder.set(field, optionMap.get(field.name()));
      }
    }
    option = optionBuilder.build();
  }
  return option;
}
 
Example 13
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Generates an Avro record with a single field. */
private Record generateSingleFieldAvroRecord(
    String name, String type, String description, Object value) {
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  AVRO_SCHEMA_TEMPLATE,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, name, type, description))
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(name, value);
  return builder.build();
}
 
Example 14
Source File: Generator.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
private GenericRecord generateRecord(Schema schema) {
  GenericRecordBuilder builder = new GenericRecordBuilder(schema);
  for (Schema.Field field : schema.getFields()) {
    builder.set(field, generateObject(field.schema(), field.name()));
  }
  return builder.build();
}
 
Example 15
Source File: AvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred
 * from the Beam schema on the row.
 */
public static GenericRecord toGenericRecord(
    Row row, @Nullable org.apache.avro.Schema avroSchema) {
  Schema beamSchema = row.getSchema();
  // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row
  // schema.
  if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) {
    throw new IllegalArgumentException(
        "AVRO schema doesn't match row schema. Row schema "
            + beamSchema
            + ". AVRO schema + "
            + avroSchema);
  }
  if (avroSchema == null) {
    avroSchema = toAvroSchema(beamSchema);
  }

  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  for (int i = 0; i < beamSchema.getFieldCount(); ++i) {
    Schema.Field field = beamSchema.getField(i);
    builder.set(
        field.getName(),
        genericFromBeamField(
            field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i)));
  }
  return builder.build();
}
 
Example 16
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldSkipRemovedRecord() {
    // given
    Schema subRecord1Schema = createRecord("subRecord",
            createPrimitiveFieldSchema("test1", Schema.Type.STRING),
            createPrimitiveFieldSchema("test2", Schema.Type.STRING));
    Schema subRecord2Schema = createRecord("subRecord2",
            createPrimitiveFieldSchema("test1", Schema.Type.STRING),
            createPrimitiveFieldSchema("test2", Schema.Type.STRING));

    Schema record1Schema = createRecord("test",
            createField("subRecord1", subRecord1Schema),
            createField("subRecord2", subRecord2Schema),
            createUnionField("subRecord3", subRecord2Schema),
            createField("subRecord4", subRecord1Schema));

    Schema record2Schema = createRecord("test",
            createField("subRecord1", subRecord1Schema),
            createField("subRecord4", subRecord1Schema));

    GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecord1Schema);
    subRecordBuilder.set("test1", "abc");
    subRecordBuilder.set("test2", "def");

    GenericRecordBuilder subRecordBuilder2 = new GenericRecordBuilder(subRecord2Schema);
    subRecordBuilder2.set("test1", "ghi");
    subRecordBuilder2.set("test2", "jkl");

    GenericRecordBuilder builder = new GenericRecordBuilder(record1Schema);
    builder.set("subRecord1", subRecordBuilder.build());
    builder.set("subRecord2", subRecordBuilder2.build());
    builder.set("subRecord3", subRecordBuilder2.build());
    builder.set("subRecord4", subRecordBuilder.build());

    // when
    GenericRecord record = deserializeGenericFast(record1Schema, record2Schema, serializeGeneric(builder.build()));

    // then
    Assert.assertEquals("abc", ((GenericRecord) record.get("subRecord1")).get("test1").toString());
    Assert.assertEquals("def", ((GenericRecord) record.get("subRecord1")).get("test2").toString());
    Assert.assertEquals("abc", ((GenericRecord) record.get("subRecord4")).get("test1").toString());
    Assert.assertEquals("def", ((GenericRecord) record.get("subRecord4")).get("test2").toString());
}
 
Example 17
Source File: TestDictionaryRowGroupFilter.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void createInputFile() throws IOException {
  if (PARQUET_FILE.exists()) {
    Assert.assertTrue(PARQUET_FILE.delete());
  }

  // build struct field schema
  org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(_structFieldType);

  OutputFile outFile = Files.localOutput(PARQUET_FILE);
  try (FileAppender<Record> appender = Parquet.write(outFile)
      .schema(FILE_SCHEMA)
      .build()) {
    GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
    // create 20 copies of each record to ensure dictionary-encoding
    for (int copy = 0; copy < 20; copy += 1) {
      // create 50 records
      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
        builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
        builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats
        builder.set("_required", "req"); // required, always non-null
        builder.set("_all_nulls", null); // never non-null
        builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
        builder.set("_no_nulls", ""); // optional, but always non-null
        builder.set("_non_dict", UUID.randomUUID().toString()); // not dictionary-encoded

        Record structNotNull = new Record(structSchema);
        structNotNull.put("_int_field", INT_MIN_VALUE + i);
        builder.set("_struct_not_null", structNotNull); // struct with int

        appender.add(builder.build());
      }
    }
  }

  InputFile inFile = Files.localInput(PARQUET_FILE);

  ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile));

  Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size());
  rowGroupMetadata = reader.getRowGroups().get(0);
  parquetSchema = reader.getFileMetaData().getSchema();
  dictionaryStore = reader.getNextDictionaryReader();

  PARQUET_FILE.deleteOnExit();
}
 
Example 18
Source File: FastGenericDeserializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldReadPrimitives() {
    // given
    Schema javaLangStringSchema = Schema.create(Schema.Type.STRING);
    GenericData.setStringType(javaLangStringSchema, GenericData.StringType.String);
    Schema recordSchema = createRecord("testRecord",
            createField("testInt", Schema.create(Schema.Type.INT)),
            createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT),
            createField("testString", Schema.create(Schema.Type.STRING)),
            createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING),
            createField("testJavaString", javaLangStringSchema),
            createUnionField("testJavaStringUnion", javaLangStringSchema),
            createField("testLong", Schema.create(Schema.Type.LONG)),
            createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG),
            createField("testDouble", Schema.create(Schema.Type.DOUBLE)),
            createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE),
            createField("testFloat", Schema.create(Schema.Type.FLOAT)),
            createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT),
            createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)),
            createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN),
            createField("testBytes", Schema.create(Schema.Type.BYTES)),
            createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES));

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    builder.set("testInt", 1);
    builder.set("testIntUnion", 1);
    builder.set("testString", "aaa");
    builder.set("testStringUnion", "aaa");
    builder.set("testJavaString", "aaa");
    builder.set("testJavaStringUnion", "aaa");
    builder.set("testLong", 1L);
    builder.set("testLongUnion", 1L);
    builder.set("testDouble", 1.0);
    builder.set("testDoubleUnion", 1.0);
    builder.set("testFloat", 1.0f);
    builder.set("testFloatUnion", 1.0f);
    builder.set("testBoolean", true);
    builder.set("testBooleanUnion", true);
    builder.set("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02}));
    builder.set("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02}));

    // when
    GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build()));

    // then
    Assert.assertEquals(1, record.get("testInt"));
    Assert.assertEquals(1, record.get("testIntUnion"));
    Assert.assertEquals("aaa", record.get("testString").toString());
    Assert.assertEquals("aaa", record.get("testStringUnion").toString());
    Assert.assertEquals("aaa", record.get("testJavaString"));
    Assert.assertEquals("aaa", record.get("testJavaStringUnion"));
    Assert.assertEquals(1L, record.get("testLong"));
    Assert.assertEquals(1L, record.get("testLongUnion"));
    Assert.assertEquals(1.0, record.get("testDouble"));
    Assert.assertEquals(1.0, record.get("testDoubleUnion"));
    Assert.assertEquals(1.0f, record.get("testFloat"));
    Assert.assertEquals(1.0f, record.get("testFloatUnion"));
    Assert.assertEquals(true, record.get("testBoolean"));
    Assert.assertEquals(true, record.get("testBooleanUnion"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes"));
    Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytesUnion"));

}
 
Example 19
Source File: TestMetricsRowGroupFilter.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private void createParquetInputFile() throws IOException {
  if (parquetFile.exists()) {
    Assert.assertTrue(parquetFile.delete());
  }

  // build struct field schema
  org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(_structFieldType);

  OutputFile outFile = Files.localOutput(parquetFile);
  try (FileAppender<Record> appender = Parquet.write(outFile)
      .schema(FILE_SCHEMA)
      .build()) {
    GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
    // create 50 records
    for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
      builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
      builder.set("_no_stats_parquet", TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
                                                                    // in Parquet
      builder.set("_required", "req"); // required, always non-null
      builder.set("_all_nulls", null); // never non-null
      builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
      builder.set("_no_nulls", ""); // optional, but always non-null
      builder.set("_str", i + "str" + i);

      Record structNotNull = new Record(structSchema);
      structNotNull.put("_int_field", INT_MIN_VALUE + i);
      builder.set("_struct_not_null", structNotNull); // struct with int

      appender.add(builder.build());
    }
  }

  InputFile inFile = Files.localInput(parquetFile);
  try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
    Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size());
    rowGroupMetadata = reader.getRowGroups().get(0);
    parquetSchema = reader.getFileMetaData().getSchema();
  }

  parquetFile.deleteOnExit();
}
 
Example 20
Source File: FastGenericSerializerGeneratorTest.java    From avro-fastserde with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void shouldWriteSubRecordComplexCollectionsField() {
    // given
    Schema subRecordSchema = createRecord("subRecord",
            createPrimitiveUnionFieldSchema("subField", Schema.Type.STRING));
    Schema recordSchema = createRecord(
            "test",
            createArrayFieldSchema("recordsArrayMap", Schema.createMap(createUnionSchema(subRecordSchema))),
            createMapFieldSchema("recordsMapArray", Schema.createArray(createUnionSchema(subRecordSchema))),
            createUnionField("recordsArrayMapUnion",
                    Schema.createArray(Schema.createMap(createUnionSchema(subRecordSchema)))),
            createUnionField("recordsMapArrayUnion",
                    Schema.createMap(Schema.createArray(createUnionSchema(subRecordSchema)))));

    GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(subRecordSchema);
    subRecordBuilder.set("subField", "abc");

    GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
    List<Map<String, GenericRecord>> recordsArrayMap = new ArrayList<>();
    Map<String, GenericRecord> recordMap = new HashMap<>();
    recordMap.put("1", subRecordBuilder.build());
    recordsArrayMap.add(recordMap);

    builder.set("recordsArrayMap", recordsArrayMap);
    builder.set("recordsArrayMapUnion", recordsArrayMap);

    Map<String, List<GenericRecord>> recordsMapArray = new HashMap<>();
    List<GenericRecord> recordList = new ArrayList<>();
    recordList.add(subRecordBuilder.build());
    recordsMapArray.put("1", recordList);

    builder.set("recordsMapArray", recordsMapArray);
    builder.set("recordsMapArrayUnion", recordsMapArray);

    // when
    GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build()));

    // then
    Assert.assertEquals("abc",
            ((List<Map<Utf8, GenericRecord>>) record.get("recordsArrayMap")).get(0).get(new Utf8("1"))
                    .get("subField").toString());
    Assert.assertEquals("abc",
            ((Map<Utf8, List<GenericRecord>>) record.get("recordsMapArray")).get(new Utf8("1")).get(0)
                    .get("subField").toString());
    Assert.assertEquals("abc",
            ((List<Map<Utf8, GenericRecord>>) record.get("recordsArrayMapUnion")).get(0).get(new Utf8("1"))
                    .get("subField").toString());
    Assert.assertEquals("abc",
            ((Map<Utf8, List<GenericRecord>>) record.get("recordsMapArrayUnion")).get(new Utf8("1")).get(0)
                    .get("subField").toString());
}