Java Code Examples for org.apache.avro.generic.GenericRecordBuilder#build()

The following examples show how to use org.apache.avro.generic.GenericRecordBuilder#build() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TypeConverterUtilsTest.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Performs a type conversion with the following information.
 *
 * @param inputSchema The Avro schema that corresponds to the input object to convert.
 * @param input The input object to convert.
 * @param outputType The output type expected.
 * @param inputFormat The format to use in the conversion, or null if none.
 * @param outputClass The expected class of the output object.
 * @return The converted value.
 */
public <T> T testConvertValue(Schema inputSchema, Object input, TypeConverterProperties.TypeConverterOutputTypes outputType,
        String inputFormat, Class<T> outputClass) {
    // Create a temporary schema to use for converting the value.
    Schema recordSchema = SchemaBuilder.record("TestConvertValueRecord") //
            .fields() //
            .name("instance").type(inputSchema).noDefault().endRecord();
    // And a record that contains the value to convert.
    GenericRecordBuilder outputRecordBuilder = new GenericRecordBuilder(recordSchema).set("instance", input);
    Stack<String> converterPath = new Stack<String>();
    converterPath.add("instance");
    // Do the conversion.
    TypeConverterUtils.convertValue(recordSchema, outputRecordBuilder, converterPath, outputType, inputFormat);
    GenericRecord outputRecord = outputRecordBuilder.build();
    // And check.
    assertThat(outputRecord.get(0), anyOf(nullValue(), instanceOf(outputClass)));
    return (T) outputRecord.get(0);
}
 
Example 2
Source File: KeyValueUtils.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Use a Schema to generate a hierarchical GenericRecord that contains only null values.
 *
 * @param schema the parent schema of the field to set as null
 * @param fieldName the name of the field to set as null
 * @return if fieldName is a Record of the schema, the method will return a GenericRecord with any leaf set as null,
 * otherwise return null
 */
public static IndexedRecord generateEmptyRecord(Schema schema, String fieldName) {
    if (schema.getType().equals(Type.RECORD)) {
        Schema unwrappedSchema = getUnwrappedSchema(schema.getField(fieldName));
        if (unwrappedSchema.getType().equals(Type.RECORD)) {
            GenericRecordBuilder outputRecord = new GenericRecordBuilder(unwrappedSchema);
            for (Field field : unwrappedSchema.getFields()) {
                IndexedRecord value = generateEmptyRecord(unwrappedSchema, field.name());
                outputRecord.set(field.name(), value);
            }
            return outputRecord.build();
        } else {
            return null;
        }
    } else {
        return null;
    }
}
 
Example 3
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace
 * when the namespace is not specified.
 */
@Test
public void testAvroToEntityDefaultNamespace() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, 1);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  AvroToEntity noNamespaceConverter =
      AvroToEntity.newBuilder()
          .setEntityKind(entityKind)
          .setUniqueNameColumn(uniqueNameColumn)
          .build();
  Entity outputEntity = noNamespaceConverter.apply(inputBqData);
  // Assess results
  assertTrue(outputEntity.hasKey());
  assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId());
}
 
Example 4
Source File: KeyValueUtils.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Transform a indexedRecord to match the associated key-value schema
 *
 * @param record a indexed record
 * @param kvSchema its associated key value schema
 * @return the key-value
 */
public static IndexedRecord transformToKV(IndexedRecord record, Schema kvSchema) {
    Schema keySchema = kvSchema.getField(RECORD_KEY_PREFIX).schema();
    IndexedRecord keyIndexRecord = extractIndexedRecord(record, keySchema);
    Schema valueSchema = kvSchema.getField(RECORD_VALUE_PREFIX).schema();
    IndexedRecord valueIndexRecord = extractIndexedRecord(record, valueSchema);

    GenericRecordBuilder outputRecord = new GenericRecordBuilder(kvSchema);
    outputRecord.set(RECORD_KEY_PREFIX, keyIndexRecord);
    outputRecord.set(RECORD_VALUE_PREFIX, valueIndexRecord);

    return outputRecord.build();
}
 
Example 5
Source File: ICMPParquetPacketWriterImpl.java    From entrada with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Partition write(Row row, String server) {
  rowCounter++;

  Calendar cal = Calendar.getInstance();
  cal.setTimeInMillis(row.getTs().getTime());

  // convert to avro
  GenericRecordBuilder builder = recordBuilder(ICMP_AVRO_SCHEMA);

  // map all the columns in the row to the avro record fields
  row.getColumns().stream().forEach(c -> {
    if (hasField(c.getName())) {
      builder.set(c.getName(), c.getValue());
    }
  });

  // create the actual record and write to parquet file
  GenericRecord record = builder.build();
  Partition partition = Partition
      .builder()
      .year(cal.get(Calendar.YEAR))
      .month(cal.get(Calendar.MONTH) + 1)
      .day(cal.get(Calendar.DAY_OF_MONTH))
      .dns(false)
      .server(server)
      .build();

  writer.write(record, schema(ICMP_AVRO_SCHEMA), partition);

  return partition;

}
 
Example 6
Source File: AvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred
 * from the Beam schema on the row.
 */
public static GenericRecord toGenericRecord(
    Row row, @Nullable org.apache.avro.Schema avroSchema) {
  Schema beamSchema = row.getSchema();
  // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row
  // schema.
  if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) {
    throw new IllegalArgumentException(
        "AVRO schema doesn't match row schema. Row schema "
            + beamSchema
            + ". AVRO schema + "
            + avroSchema);
  }
  if (avroSchema == null) {
    avroSchema = toAvroSchema(beamSchema);
  }

  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  for (int i = 0; i < beamSchema.getFieldCount(); ++i) {
    Schema.Field field = beamSchema.getField(i);
    builder.set(
        field.getName(),
        genericFromBeamField(
            field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i)));
  }
  return builder.build();
}
 
Example 7
Source File: Generator.java    From avro-random-generator with Do What The F*ck You Want To Public License 5 votes vote down vote up
private GenericRecord generateRecord(Schema schema) {
  GenericRecordBuilder builder = new GenericRecordBuilder(schema);
  for (Schema.Field field : schema.getFields()) {
    builder.set(field, generateObject(field.schema()));
  }
  return builder.build();
}
 
Example 8
Source File: Generator.java    From avro-random-generator with Do What The F*ck You Want To Public License 5 votes vote down vote up
@SuppressWarnings("unchecked")
private Object wrapOption(Schema schema, Object option) {
  if (schema.getType() == Schema.Type.BYTES && option instanceof String) {
    option = ByteBuffer.wrap(((String) option).getBytes(Charset.defaultCharset()));
  } else if (schema.getType() == Schema.Type.FLOAT && option instanceof Double) {
    option = ((Double) option).floatValue();
  } else if (schema.getType() == Schema.Type.LONG && option instanceof Integer) {
    option = ((Integer) option).longValue();
  } else if (schema.getType() == Schema.Type.ARRAY && option instanceof Collection) {
    option = new GenericData.Array(schema, (Collection) option);
  } else if (schema.getType() == Schema.Type.ENUM && option instanceof String) {
    option = new GenericData.EnumSymbol(schema, (String) option);
  } else if (schema.getType() == Schema.Type.FIXED && option instanceof String) {
    option =
        new GenericData.Fixed(schema, ((String) option).getBytes(Charset.defaultCharset()));
  } else if (schema.getType() == Schema.Type.RECORD && option instanceof Map) {
    Map optionMap = (Map) option;
    GenericRecordBuilder optionBuilder = new GenericRecordBuilder(schema);
    for (Schema.Field field : schema.getFields()) {
      if (optionMap.containsKey(field.name())) {
        optionBuilder.set(field, optionMap.get(field.name()));
      }
    }
    option = optionBuilder.build();
  }
  return option;
}
 
Example 9
Source File: KeyValueUtils.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Generate a new Index Record which is the filtered result of the input record.
 *
 * The user can freely remove column, add empty column or change the place of column in the same hierarchical level.
 *
 * @return the new record
 */
public static IndexedRecord extractIndexedRecord(IndexedRecord inputRecord, Schema outputSchema) {
    GenericRecordBuilder outputRecord = new GenericRecordBuilder(outputSchema);
    Schema inputSchema = getUnwrappedSchema(inputRecord);
    for (Field field : outputSchema.getFields()) {
        if (inputSchema.getField(field.name()) != null) {
            // The column was existing on the input record, we forward it to the output record.
            Object inputValue = inputRecord.get(inputSchema.getField(field.name()).pos());

            // The current column can be a Record (an hierarchical sub-object) or directly a value.
            // If we are on a record, we need to recursively do the process
            // if we are on a object, we save it to the output.
            if (inputValue instanceof Record) {
                // The sub-schema at this level is a union of "empty" and a record,
                // so we need to get the true sub-schema
                Schema inputChildSchema = getUnwrappedSchema(inputSchema.getField(field.name()));
                Schema outputChildSchema = getUnwrappedSchema(outputSchema.getField(field.name()));
                if (inputChildSchema.getType().equals(Type.RECORD)
                        && outputChildSchema.getType().equals(Type.RECORD)) {
                    Object childRecord = extractIndexedRecord((IndexedRecord) inputValue, outputChildSchema);
                    outputRecord.set(field.name(), childRecord);
                }
            } else {
                outputRecord.set(field.name(), inputValue);
            }
        } else {
            // element not found => set to the value and its hierarchy to null
            outputRecord.set(field.name(), KeyValueUtils.generateEmptyRecord(outputSchema, field.name()));
        }
    }
    return outputRecord.build();
}
 
Example 10
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Generates an Avro record with a record field type. */
private Record generateNestedAvroRecord() {
  String avroRecordFieldSchema =
      new StringBuilder()
          .append("{")
          .append("  \"name\" : \"address\",")
          .append("  \"type\" :")
          .append("  {")
          .append("    \"type\" : \"record\",")
          .append("    \"name\" : \"address\",")
          .append("    \"namespace\"  : \"nothing\",")
          .append("    \"fields\" : ")
          .append("    [")
          .append("      {\"name\" : \"street_number\", \"type\" : \"int\"},")
          .append("      {\"name\" : \"street_name\", \"type\" : \"string\"}")
          .append("    ]")
          .append("  }")
          .append("}")
          .toString();
  Schema avroSchema =
      new Schema.Parser().parse(String.format(avroSchemaTemplate, avroRecordFieldSchema));
  GenericRecordBuilder addressBuilder =
      new GenericRecordBuilder(avroSchema.getField("address").schema());
  addressBuilder.set("street_number", 12);
  addressBuilder.set("street_name", "Magnolia street");
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set("address", addressBuilder);
  return builder.build();
}
 
Example 11
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Generates an Avro record with a single field. */
private Record generateSingleFieldAvroRecord(
    String name, String type, String description, Object value) {
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, name, type, description))
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(name, value);
  return builder.build();
}
 
Example 12
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a valid key when a
 * Timestamp field is invalid.
 */
@Test
public void testAvroToEntityInvalidTimestampField() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(invalidTimestampField).setType("TIMESTAMP"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(
                          String.format(
                              avroFieldTemplate,
                              invalidTimestampField,
                              "long",
                              invalidTimestampFieldDesc))
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueStr);
  builder.set(invalidTimestampField, invalidTimestampFieldValueNanos);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  // Assess results
  assertTrue(!outputEntity.hasKey());
  assertTrue(
      outputEntity
          .getPropertiesMap()
          .get("cause")
          .getStringValue()
          .startsWith("Timestamp is not valid"));
  assertEquals(record.toString(), outputEntity.getPropertiesMap().get("row").getStringValue());
}
 
Example 13
Source File: TypeConverterUtilsTest.java    From components with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyFieldsValues() {
    Schema intSchema = SchemaBuilder.record("intSchema").fields().name("a").type().intType().noDefault().endRecord();
    GenericRecord intRecord = new GenericRecordBuilder(intSchema).set("a", 1).build();

    Schema stringSchema = SchemaBuilder.record("intSchema").fields().name("a").type().stringType().noDefault().endRecord();
    GenericRecordBuilder stringRecordBuilder = new GenericRecordBuilder(stringSchema).set("a", "s");
    TypeConverterUtils.copyFieldsValues(intRecord, stringRecordBuilder);
    GenericRecord stringRecord = stringRecordBuilder.build();
    Assert.assertEquals(intRecord.get("a"), stringRecord.get("a"));
}
 
Example 14
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the
 * unique name column is string.
 */
@Test
public void testAvroToEntityStringIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueStr);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(outputEntity.hasKey());
  assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
  validateMetadata(outputEntity);
}
 
Example 15
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the
 * unique name column is integer.
 */
@Test
public void testAvroToEntityIntegerIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("INTEGER"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueInt);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(outputEntity.hasKey());
  assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
  validateMetadata(outputEntity);
}
 
Example 16
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
 * unique name column exceeds the maximum size allowed of 1500 bytes.
 */
@Test
public void testAvroToEntityTooLongIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, longStringFieldValue);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(!outputEntity.hasKey());
}
 
Example 17
Source File: BigQueryConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Generates an Avro record with a record field type. */
static Record generateNestedAvroRecord() {
  String avroRecordFieldSchema =
      new StringBuilder()
          .append("{")
          .append("  \"name\" : \"address\",")
          .append("  \"type\" :")
          .append("  {")
          .append("    \"type\" : \"record\",")
          .append("    \"name\" : \"address\",")
          .append("    \"namespace\"  : \"nothing\",")
          .append("    \"fields\" : ")
          .append("    [")
          .append("      {\"name\" : \"street_number\", \"type\" : \"int\"},")
          .append("      {\"name\" : \"street_name\", \"type\" : \"string\"}")
          .append("    ]")
          .append("  }")
          .append("}")
          .toString();
  Schema avroSchema =
      new Schema.Parser().parse(String.format(AVRO_SCHEMA_TEMPLATE, avroRecordFieldSchema));
  GenericRecordBuilder addressBuilder =
      new GenericRecordBuilder(avroSchema.getField("address").schema());
  addressBuilder.set("street_number", 12);
  addressBuilder.set("street_name", "Magnolia street");
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set("address", addressBuilder.build());
  return builder.build();
}
 
Example 18
Source File: FieldSelectorUtil.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Generate an indexed record from a given {@code Schema} and its associated data as a map.
 * 
 * @param fields the fields that should be inside the generated indexed record
 * @param schema the schema of the indexed record
 * @return an indexed record
 */
public static IndexedRecord generateIndexedRecord(Map<String, Object> fields, Schema schema) {
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(schema);
    for (Entry<String, Object> field : fields.entrySet()) {
        recordBuilder.set(field.getKey(), field.getValue());
    }
    return recordBuilder.build();

}
 
Example 19
Source File: Generator.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
private GenericRecord generateRecord(Schema schema) {
  GenericRecordBuilder builder = new GenericRecordBuilder(schema);
  for (Schema.Field field : schema.getFields()) {
    builder.set(field, generateObject(field.schema(), field.name()));
  }
  return builder.build();
}
 
Example 20
Source File: KafkaKeySerializer.java    From kareldb with Apache License 2.0 5 votes vote down vote up
private GenericRecord toRecord(Comparable[] object) {
    GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
    for (Ord<Field> field : Ord.zip(avroSchema.getFields())) {
        Comparable c = object[field.i];
        builder.set(field.e, AvroSchema.toAvroValue(field.e.schema(), c));
    }
    return builder.build();
}