Java Code Examples for org.apache.avro.SchemaBuilder#FieldAssembler

The following examples show how to use org.apache.avro.SchemaBuilder#FieldAssembler . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SnowflakeRuntimeIT.java    From components with Apache License 2.0 7 votes vote down vote up
public Schema getMakeRowSchema() {
    SchemaBuilder.FieldAssembler<Schema> fa = SchemaBuilder.builder().record("MakeRowRecord").fields() //
            .name("ID").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "ID").type(AvroUtils._decimal()).noDefault() //
            .name("C1").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C1").type().nullable().stringType().noDefault() //
            .name("C2").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C2").type().nullable().booleanType().noDefault() //
            .name("C3").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C3").type().nullable().doubleType().noDefault() //
            // date
            .name("C4").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C4").type(AvroUtils._logicalDate()).noDefault() //
            // time
            .name("C5").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C5").type(AvroUtils._logicalTime()).noDefault() //
            // timestamp
            .name("C6").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C6").type(AvroUtils._logicalTimestamp())
            .noDefault() //
            // variant
            .name("C7").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "C7").type().nullable().stringType().noDefault();
    return fa.endRecord();
}
 
Example 2
Source File: DayPartitioner.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
public DayPartitioner(State state, int numBranches, int branchId) {
  _withColumnNames = state.getPropAsBoolean(GoggleIngestionConfigurationKeys.KEY_INCLUDE_COLUMN_NAMES, false);
  _prefix = state.getProp(GoggleIngestionConfigurationKeys.KEY_PARTITIONER_PREFIX);
  _withPrefix = StringUtils.isNotBlank(_prefix);

  _dateColumn = state.getProp(GoggleIngestionConfigurationKeys.KEY_DATE_COLUMN_NAME, DEFAULT_DATE_COLUMN);
  _dateFormatter =
      DateTimeFormat.forPattern(state.getProp(GoggleIngestionConfigurationKeys.KEY_DATE_FORMAT, DEFAULT_DATE_FORMAT));

  SchemaBuilder.FieldAssembler<Schema> assembler = SchemaBuilder.record(NAME).namespace(NAME_SPACE).fields();
  Schema stringType = Schema.create(Schema.Type.STRING);

  if (_withPrefix) {
    assembler = assembler.name(PARTITION_COLUMN_PREFIX).type(stringType).noDefault();
  }
  _partitionSchema =
      assembler.name(PARTITION_COLUMN_YEAR).type(stringType).noDefault().name(PARTITION_COLUMN_MONTH).type(stringType)
          .noDefault().name(PARTITION_COLUMN_DAY).type(stringType).noDefault().endRecord();
}
 
Example 3
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_nested_map() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("map").type().map().values().map().values().doubleType().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
    assertEquals(
            TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(),
                    TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())),
            orcType);
}
 
Example 4
Source File: FixedDatasetRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Infers an Avro schema for the given String array. This can be an expensive operation so the schema should be
 * cached where possible. This is always an {@link Schema.Type#RECORD}.
 *
 * @param in the DescribeSObjectResult to analyse.
 * @return the schema for data given from the object.
 */
static Schema inferSchema(CSVRecord in) {
    List<Schema.Field> fields = new ArrayList<>();
    SchemaBuilder.FieldAssembler<Schema> fa = SchemaBuilder.record(RECORD_NAME).fields();
    for (int i = 0; i < in.size(); i++) {
        fa = fa.name(in.get(i)).type(Schema.create(Schema.Type.STRING)).noDefault();
    }
    return fa.endRecord();
}
 
Example 5
Source File: TestNiFiOrcUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
public static Schema buildPrimitiveAvroSchema() {
    // Build a fake Avro record with all primitive types
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
    builder.name("int").type().intType().noDefault();
    builder.name("long").type().longType().longDefault(1L);
    builder.name("boolean").type().booleanType().booleanDefault(true);
    builder.name("float").type().floatType().floatDefault(0.0f);
    builder.name("double").type().doubleType().doubleDefault(0.0);
    builder.name("bytes").type().bytesType().noDefault();
    builder.name("string").type().stringType().stringDefault("default");
    return builder.endRecord();
}
 
Example 6
Source File: TestNiFiOrcUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_enum() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a");
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema());
    assertEquals(TypeInfoCreator.createString(), orcType);
}
 
Example 7
Source File: EventDataPresenterAvro.java    From replicator with Apache License 2.0 5 votes vote down vote up
private static void addFloatField(String name, String defaultVal, SchemaBuilder.FieldAssembler<Schema> builder) {
    if (isNullValue(defaultVal)) {
        builder.optionalFloat(name);
    } else {
        builder.nullableFloat(name, Float.valueOf(defaultVal));
    }
}
 
Example 8
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static Schema buildNestedComplexAvroSchema() {
    // Build a fake Avro record with nested complex types
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("nested.complex.record").namespace("any.data").fields();
    builder.name("myMapOfArray").type().map().values().array().items().doubleType().noDefault();
    builder.name("myArrayOfMap").type().array().items().map().values().stringType().noDefault();
    return builder.endRecord();
}
 
Example 9
Source File: EventDataPresenterAvro.java    From replicator with Apache License 2.0 5 votes vote down vote up
private static void addLongField(String name, String defaultVal, SchemaBuilder.FieldAssembler<Schema> builder) {
    if (isNullValue(defaultVal)) {
        builder.optionalLong(name);
    } else {
        builder.nullableLong(name, Long.valueOf(defaultVal));
    }
}
 
Example 10
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_union() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
    assertEquals(
            TypeInfoFactory.getUnionTypeInfo(Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createBoolean())),
            orcType);
}
 
Example 11
Source File: FixedDatasetRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
public static Schema inferSchema(int maxSize) {
    List<Schema.Field> fields = new ArrayList<>();
    SchemaBuilder.FieldAssembler<Schema> fa = SchemaBuilder.record(RECORD_NAME).fields();
    for (int i = 0; i < maxSize; i++) {
        fa = fa.name(FIELD_PREFIX + i).type(Schema.create(Schema.Type.STRING)).noDefault();
    }
    return fa.endRecord();
}
 
Example 12
Source File: TestNiFiOrcUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_union() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
    assertEquals(
            TypeInfoFactory.getUnionTypeInfo(Arrays.asList(
                    TypeInfoCreator.createInt(),
                    TypeInfoCreator.createBoolean())),
            orcType);
}
 
Example 13
Source File: TestNiFiOrcUtils.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test_getOrcField_union_optional_type() throws Exception {
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
    builder.name("union").type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
    Schema testSchema = builder.endRecord();
    TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
    assertEquals(TypeInfoCreator.createBoolean(), orcType);
}
 
Example 14
Source File: TestNiFiOrcUtils.java    From nifi with Apache License 2.0 5 votes vote down vote up
public static Schema buildComplexAvroSchema() {
    // Build a fake Avro record with nested  types
    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("complex.record").namespace("any.data").fields();
    builder.name("myInt").type().unionOf().nullType().and().intType().endUnion().nullDefault();
    builder.name("myMap").type().map().values().doubleType().noDefault();
    builder.name("myEnum").type().enumeration("myEnum").symbols("ABC", "DEF", "XYZ").enumDefault("ABC");
    builder.name("myLongOrFloat").type().unionOf().longType().and().floatType().endUnion().noDefault();
    builder.name("myIntList").type().array().items().intType().noDefault();
    return builder.endRecord();
}
 
Example 15
Source File: JdbcAvroSchema.java    From dbeam with Apache License 2.0 4 votes vote down vote up
private static SchemaBuilder.FieldAssembler<Schema> fieldAvroType(
    final int columnType,
    final int precision,
    final SchemaBuilder.FieldBuilder<Schema> fieldBuilder,
    boolean useLogicalTypes) {

  final SchemaBuilder.BaseTypeBuilder<
          SchemaBuilder.UnionAccumulator<SchemaBuilder.NullDefault<Schema>>>
      field = fieldBuilder.type().unionOf().nullBuilder().endNull().and();

  switch (columnType) {
    case VARCHAR:
    case CHAR:
    case CLOB:
    case LONGNVARCHAR:
    case LONGVARCHAR:
    case NCHAR:
      return field.stringType().endUnion().nullDefault();
    case BIGINT:
      if (precision > 0 && precision <= JdbcAvroRecord.MAX_DIGITS_BIGINT) {
        return field.longType().endUnion().nullDefault();
      } else {
        return field.stringType().endUnion().nullDefault();
      }
    case INTEGER:
    case SMALLINT:
    case TINYINT:
      return field.intType().endUnion().nullDefault();
    case TIMESTAMP:
    case DATE:
    case TIME:
    case TIME_WITH_TIMEZONE:
      if (useLogicalTypes) {
        return field
            .longBuilder()
            .prop("logicalType", "timestamp-millis")
            .endLong()
            .endUnion()
            .nullDefault();
      } else {
        return field.longType().endUnion().nullDefault();
      }
    case BOOLEAN:
      return field.booleanType().endUnion().nullDefault();
    case BIT:
      if (precision <= 1) {
        return field.booleanType().endUnion().nullDefault();
      } else {
        return field.bytesType().endUnion().nullDefault();
      }
    case BINARY:
    case VARBINARY:
    case LONGVARBINARY:
    case ARRAY:
    case BLOB:
      return field.bytesType().endUnion().nullDefault();
    case DOUBLE:
      return field.doubleType().endUnion().nullDefault();
    case FLOAT:
    case REAL:
      return field.floatType().endUnion().nullDefault();
    default:
      return field.stringType().endUnion().nullDefault();
  }
}
 
Example 16
Source File: QueryCassandra.java    From nifi with Apache License 2.0 4 votes vote down vote up
/**
 * Creates an Avro schema from the given result set. The metadata (column definitions, data types, etc.) is used
 * to determine a schema for Avro.
 *
 * @param rs The result set from which an Avro schema will be created
 * @return An Avro schema corresponding to the given result set's metadata
 * @throws IOException If an error occurs during schema discovery/building
 */
public static Schema createSchema(final ResultSet rs) throws IOException {
    final ColumnDefinitions columnDefinitions = rs.getColumnDefinitions();
    final int nrOfColumns = (columnDefinitions == null ? 0 : columnDefinitions.size());
    String tableName = "NiFi_Cassandra_Query_Record";
    if (nrOfColumns > 0) {
        String tableNameFromMeta = columnDefinitions.getTable(0);
        if (!StringUtils.isBlank(tableNameFromMeta)) {
            tableName = tableNameFromMeta;
        }
    }

    final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record(tableName).namespace("any.data").fields();
    if (columnDefinitions != null) {
        for (int i = 0; i < nrOfColumns; i++) {

            DataType dataType = columnDefinitions.getType(i);
            if (dataType == null) {
                throw new IllegalArgumentException("No data type for column[" + i + "] with name " + columnDefinitions.getName(i));
            }

            // Map types from Cassandra to Avro where possible
            if (dataType.isCollection()) {
                List<DataType> typeArguments = dataType.getTypeArguments();
                if (typeArguments == null || typeArguments.size() == 0) {
                    throw new IllegalArgumentException("Column[" + i + "] " + dataType.getName()
                            + " is a collection but no type arguments were specified!");
                }
                // Get the first type argument, to be used for lists and sets
                DataType firstArg = typeArguments.get(0);
                if (dataType.equals(DataType.set(firstArg))
                        || dataType.equals(DataType.list(firstArg))) {
                    builder.name(columnDefinitions.getName(i)).type().unionOf().nullBuilder().endNull().and().array()
                            .items(getUnionFieldType(getPrimitiveAvroTypeFromCassandraType(firstArg))).endUnion().noDefault();
                } else {
                    // Must be an n-arg collection like map
                    DataType secondArg = typeArguments.get(1);
                    if (dataType.equals(DataType.map(firstArg, secondArg))) {
                        builder.name(columnDefinitions.getName(i)).type().unionOf().nullBuilder().endNull().and().map().values(
                                getUnionFieldType(getPrimitiveAvroTypeFromCassandraType(secondArg))).endUnion().noDefault();
                    }
                }
            } else {
                builder.name(columnDefinitions.getName(i))
                        .type(getUnionFieldType(getPrimitiveAvroTypeFromCassandraType(dataType))).noDefault();
            }
        }
    }
    return builder.endRecord();
}
 
Example 17
Source File: ImportFromAvroTest.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Test
public void floats() throws Exception {
  SchemaBuilder.RecordBuilder<Schema> record = SchemaBuilder.record("floats");
  SchemaBuilder.FieldAssembler<Schema> fieldAssembler = record.fields();

  fieldAssembler
      // Primary key.
      .requiredLong("id")
      // Integer columns.
      .optionalInt("optional_int")
      .requiredInt("required_int")
      .requiredLong("required_long")
      .optionalLong("optional_long")
      // Floating columns
      .optionalFloat("optional_float")
      .requiredFloat("required_float")
      .requiredDouble("required_double")
      .optionalDouble("optional_double")
      .optionalString("optional_string_double")
      .requiredString("required_string_double");
  Schema schema = fieldAssembler.endRecord();
  String spannerSchema =
      "CREATE TABLE `AvroTable` (" + "`id`                                    INT64 NOT NULL,"
          + "`optional_int`                          FLOAT64,"
          + "`required_int`                          FLOAT64 NOT NULL,"
          + "`optional_long`                         FLOAT64,"
          + "`required_long`                         FLOAT64 NOT NULL,"
          + "`optional_float`                        FLOAT64,"
          + "`required_float`                        FLOAT64 NOT NULL,"
          + "`optional_double`                       FLOAT64,"
          + "`required_double`                       FLOAT64 NOT NULL,"
          + "`optional_string_double`                FLOAT64,"
          + "`required_string_double`                FLOAT64 NOT NULL,"
          + ") PRIMARY KEY (`id`)";

  runTest(schema, spannerSchema, Arrays.asList(new GenericRecordBuilder(schema)
      .set("id", 1L)
      .set("optional_int", 1)
      .set("required_int", 4)
      .set("optional_long", 2L)
      .set("required_long", 3L)
      .set("optional_float", 2.3f)
      .set("required_float", 3.4f)
      .set("optional_double", 2.5)
      .set("required_double", 3.6)
      .set("optional_string_double", "100.30")
      .set("required_string_double", "0.1e-3")
      .build(), new GenericRecordBuilder(schema)
      .set("id", 2L)
      .set("optional_int", 10)
      .set("required_int", 40)
      .set("optional_long", 20L)
      .set("required_long", 30L)
      .set("optional_float", 2.03f)
      .set("required_float", 3.14f)
      .set("optional_double", 2.05)
      .set("required_double", 3.16)
      .set("optional_string_double", "100.301")
      .set("required_string_double", "1.1e-3")
      .build()));
}
 
Example 18
Source File: ImportFromAvroTest.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Test
public void strings() throws Exception {
  SchemaBuilder.RecordBuilder<Schema> record = SchemaBuilder.record("strings");
  SchemaBuilder.FieldAssembler<Schema> fieldAssembler = record.fields();

  fieldAssembler
      // Primary key.
      .requiredLong("id")
      // Integer columns.
      .optionalInt("optional_int")
      .requiredInt("required_int")
      .requiredLong("required_long")
      .optionalLong("optional_long")
      // Floating columns
      .optionalFloat("optional_float")
      .requiredFloat("required_float")
      .requiredDouble("required_double")
      .optionalDouble("optional_double")
      // String columns
      .optionalString("optional_string")
      .requiredString("required_string");
  Schema schema = fieldAssembler.endRecord();
  String spannerSchema =
      "CREATE TABLE `AvroTable` (" + "`id`                                    INT64 NOT NULL,"
          + "`optional_int`                          STRING(10),"
          + "`required_int`                          STRING(MAX) NOT NULL,"
          + "`optional_long`                         STRING(MAX),"
          + "`required_long`                         STRING(MAX) NOT NULL,"
          + "`optional_float`                        STRING(MAX),"
          + "`required_float`                        STRING(MAX) NOT NULL,"
          + "`optional_double`                       STRING(MAX),"
          + "`required_double`                       STRING(MAX) NOT NULL,"
          + "`optional_string`                       STRING(MAX),"
          + "`required_string`                       STRING(30) NOT NULL,"
          + ") PRIMARY KEY (`id`)";

  runTest(schema, spannerSchema, Arrays.asList(new GenericRecordBuilder(schema)
      .set("id", 1L)
      .set("optional_int", 1)
      .set("required_int", 4)
      .set("optional_long", 2L)
      .set("required_long", 3L)
      .set("optional_float", 2.3f)
      .set("required_float", 3.4f)
      .set("optional_double", 2.5)
      .set("required_double", 3.6)
      .set("optional_string", "ONE STRING")
      .set("required_string", "TWO STRING")
      .build(), new GenericRecordBuilder(schema)
      .set("id", 2L)
      .set("optional_int", 10)
      .set("required_int", 40)
      .set("optional_long", 20L)
      .set("required_long", 30L)
      .set("optional_float", 2.03f)
      .set("required_float", 3.14f)
      .set("optional_double", 2.05)
      .set("required_double", 3.16)
      .set("optional_string", null)
      .set("required_string", "THE STRING")
      .build()));
}
 
Example 19
Source File: ImportFromAvroTest.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Test
public void timestamps() throws Exception {
  SchemaBuilder.RecordBuilder<Schema> record = SchemaBuilder.record("timestamps");
  SchemaBuilder.FieldAssembler<Schema> fieldAssembler = record.fields();

  fieldAssembler
      // Primary key.
      .requiredLong("id")
      // Long columns.
      .requiredLong("required_long")
      .optionalLong("optional_long")
      // String columns
      .optionalString("optional_string")
      .requiredString("required_string");
  Schema schema = fieldAssembler.endRecord();
  String spannerSchema =
      "CREATE TABLE `AvroTable` ("
          + "`id`                                    INT64 NOT NULL,"
          + "`optional_long`                         TIMESTAMP,"
          + "`required_long`                         TIMESTAMP NOT NULL,"
          + "`optional_string`                       TIMESTAMP,"
          + "`required_string`                       TIMESTAMP NOT NULL,"
          + ") PRIMARY KEY (`id`)";

  runTest(
      schema,
      spannerSchema,
      Arrays.asList(
          new GenericRecordBuilder(schema)
              .set("id", 1L)
              .set("optional_long", 5000000L)
              .set("required_long", 6000000L)
              .set("optional_string", "2018-06-06T21:00:35.312000000Z")
              .set("required_string", "2018-06-06T21:00:35.312000000Z")
              .build(),
          new GenericRecordBuilder(schema)
              .set("id", 2L)
              .set("optional_long", 500000330L)
              .set("required_long", 6000020000L)
              .set("optional_string", "2017-06-06T21:00:35.312000000Z")
              .set("required_string", "2017-06-06T21:00:35.312000000Z")
              .build(),
          new GenericRecordBuilder(schema)
              .set("id", 3L)
              .set("optional_long", null)
              .set("required_long", 6000020000L)
              .set("optional_string", null)
              .set("required_string", "0001-01-01T00:00:00Z")
              .build()));
}
 
Example 20
Source File: EventDataPresenterAvro.java    From replicator with Apache License 2.0 4 votes vote down vote up
private static void addMetaFields(SchemaBuilder.FieldAssembler<Schema> builder) {
    addLongField("__timestamp", "NULL", builder);
    addIntField("__is_deleted", "NULL", builder);
    addLongField("__binlog_position", "NULL", builder);
}