Java Code Examples for org.apache.beam.sdk.schemas.Schema#Builder

The following examples show how to use org.apache.beam.sdk.schemas.Schema#Builder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BigQueryUtils.java    From beam with Apache License 2.0 7 votes vote down vote up
private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas) {
  Schema.Builder schemaBuilder = Schema.builder();
  for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
    FieldType fieldType =
        fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields());

    Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
    if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent()) {
      fieldType = FieldType.array(fieldType);
    }

    // if the mode is not defined or if it is set to NULLABLE, then the field is nullable
    boolean nullable =
        !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
    Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
    if (tableFieldSchema.getDescription() != null
        && !"".equals(tableFieldSchema.getDescription())) {
      field = field.withDescription(tableFieldSchema.getDescription());
    }
    schemaBuilder.addField(field);
  }
  return schemaBuilder.build();
}
 
Example 2
Source File: Select.java    From beam with Apache License 2.0 5 votes vote down vote up
private static Schema uniquifyNames(Schema schema) {
  Schema.Builder builder = new Schema.Builder();
  for (Field field : schema.getFields()) {
    builder.addField(UUID.randomUUID().toString(), uniquifyNames(field.getType()));
  }
  return builder.build();
}
 
Example 3
Source File: SchemaAggregateFn.java    From beam with Apache License 2.0 5 votes vote down vote up
private Schema getOutputSchema(List<FieldAggregation> fieldAggregations) {
  Schema.Builder outputSchema = Schema.builder();
  for (FieldAggregation aggregation : fieldAggregations) {
    outputSchema.addField(aggregation.outputField);
  }
  return outputSchema.build();
}
 
Example 4
Source File: CoGroup.java    From beam with Apache License 2.0 5 votes vote down vote up
static Schema getUnexandedOutputSchema(String keyFieldName, JoinInformation joinInformation) {
  Schema.Builder schemaBuilder =
      Schema.builder().addRowField(keyFieldName, joinInformation.keySchema);
  for (Map.Entry<String, Schema> entry : joinInformation.componentSchemas.entrySet()) {
    schemaBuilder.addIterableField(entry.getKey(), FieldType.row(entry.getValue()));
  }
  return schemaBuilder.build();
}
 
Example 5
Source File: CoGroup.java    From beam with Apache License 2.0 5 votes vote down vote up
static Schema getExpandedOutputSchema(JoinInformation joinInformation, JoinArguments joinArgs) {
  // Construct the output schema. It contains one field for each input PCollection, of type
  // ROW. If a field has optional participation, then that field will be nullable in the
  // schema.
  Schema.Builder joinedSchemaBuilder = Schema.builder();
  for (Map.Entry<String, Schema> entry : joinInformation.componentSchemas.entrySet()) {
    FieldType fieldType = FieldType.row(entry.getValue());
    if (joinArgs.getOptionalParticipation(entry.getKey())) {
      fieldType = fieldType.withNullable(true);
    }
    joinedSchemaBuilder.addField(entry.getKey(), fieldType);
  }
  return joinedSchemaBuilder.build();
}
 
Example 6
Source File: AvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Converts AVRO schema to Beam row schema.
 *
 * @param schema schema of type RECORD
 */
public static Schema toBeamSchema(org.apache.avro.Schema schema) {
  Schema.Builder builder = Schema.builder();

  for (org.apache.avro.Schema.Field field : schema.getFields()) {
    Field beamField = toBeamField(field);
    if (field.doc() != null) {
      beamField = beamField.withDescription(field.doc());
    }
    builder.addField(beamField);
  }

  return builder.build();
}
 
Example 7
Source File: SelectHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
private static Schema union(Iterable<Schema> schemas) {
  Schema.Builder unioned = Schema.builder();
  for (Schema schema : schemas) {
    unioned.addFields(schema.getFields());
  }
  return unioned.build();
}
 
Example 8
Source File: StaticSchemaInference.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Infer a schema from a Java class.
 *
 * <p>Takes in a function to extract a list of field types from a class. Different callers may
 * have different strategies for extracting this list: e.g. introspecting public member variables,
 * public getter methods, or special annotations on the class.
 */
public static Schema schemaFromClass(
    Class<?> clazz, FieldValueTypeSupplier fieldValueTypeSupplier) {
  Schema.Builder builder = Schema.builder();
  for (FieldValueTypeInformation type : fieldValueTypeSupplier.get(clazz)) {
    Schema.FieldType fieldType = fieldFromType(type.getType(), fieldValueTypeSupplier);
    if (type.isNullable()) {
      builder.addNullableField(type.getName(), fieldType);
    } else {
      builder.addField(type.getName(), fieldType);
    }
  }
  return builder.build();
}
 
Example 9
Source File: SchemaUtil.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Infers the Beam {@link Schema} from {@link ResultSetMetaData}. */
static Schema toBeamSchema(ResultSetMetaData md) throws SQLException {
  Schema.Builder schemaBuilder = Schema.builder();

  for (int i = 1; i <= md.getColumnCount(); i++) {
    JDBCType jdbcType = valueOf(md.getColumnType(i));
    BeamFieldConverter fieldConverter = jdbcTypeToBeamFieldConverter(jdbcType);
    schemaBuilder.addField(fieldConverter.create(i, md));
  }

  return schemaBuilder.build();
}
 
Example 10
Source File: JdbcIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteWithoutPsWithNonNullableTableField() throws Exception {
  final int rowsToAdd = 10;

  Schema.Builder schemaBuilder = Schema.builder();
  schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN));
  schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING));
  Schema schema = schemaBuilder.build();

  String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE");
  StringBuilder stmt = new StringBuilder("CREATE TABLE ");
  stmt.append(tableName);
  stmt.append(" (");
  stmt.append("column_boolean       BOOLEAN,");
  stmt.append("column_int           INTEGER NOT NULL");
  stmt.append(" )");
  DatabaseTestHelper.createTableWithStatement(dataSource, stmt.toString());
  try {
    ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema);
    pipeline
        .apply(Create.of(data))
        .setRowSchema(schema)
        .apply(
            JdbcIO.<Row>write()
                .withDataSourceConfiguration(
                    JdbcIO.DataSourceConfiguration.create(
                        "org.apache.derby.jdbc.ClientDriver",
                        "jdbc:derby://localhost:" + port + "/target/beam"))
                .withBatchSize(10L)
                .withTable(tableName));
    pipeline.run();
  } finally {
    DatabaseTestHelper.deleteTable(dataSource, tableName);
    thrown.expect(RuntimeException.class);
  }
}
 
Example 11
Source File: AddFields.java    From beam with Apache License 2.0 4 votes vote down vote up
private static AddFieldsInformation getAddFieldsInformation(
    Schema inputSchema, Collection<NewField> fieldsToAdd) {
  List<NewField> newTopLevelFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  List<NewField> newNestedFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  // Group all nested fields together by the field at the current level. For example, if adding
  // a.b, a.c, a.d
  // this map will contain a -> {a.b, a.c, a.d}.
  Multimap<String, NewField> newNestedFieldsMap =
      Multimaps.index(newNestedFields, NewField::getName);

  Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap();
  Schema.Builder builder = Schema.builder();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    Schema.Field field = inputSchema.getField(i);
    Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName());

    // If this field is a nested field and new subfields are added further down the tree, add
    // those subfields before
    // adding to the current schema. Otherwise we just add this field as is to the new schema.
    if (!nestedFields.isEmpty()) {
      nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList());

      AddFieldsInformation nestedInformation =
          getAddFieldsInformation(field.getType(), nestedFields);
      field = field.withType(nestedInformation.getOutputFieldType());
      resolvedNestedNewValues.put(i, nestedInformation);
    }
    builder.addField(field);
  }

  // Add any new fields at this level.
  List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size());
  for (NewField newField : newTopLevelFields) {
    builder.addField(newField.getName(), newField.getFieldType());
    newValuesThisLevel.add(newField.getDefaultValue());
  }

  // If there are any nested field additions left that are not already processed, that means
  // that the root of the
  // nested field doesn't exist in the schema. In this case we'll walk down the new nested
  // fields and recursively create each nested level as necessary.
  for (Map.Entry<String, Collection<NewField>> newNested :
      newNestedFieldsMap.asMap().entrySet()) {
    String fieldName = newNested.getKey();

    // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x)
    FieldAccessDescriptor.FieldDescriptor fieldDescriptor =
        Iterables.getOnlyElement(
            newNested.getValue().stream()
                .map(NewField::getFieldDescriptor)
                .distinct()
                .collect(Collectors.toList()));
    FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true);
    for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
      // The problem with adding recursive map fields is that we don't know what the map key
      // type should be.
      // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in
      // the map value.
      // Since in this code path the mapField field does not already exist this means we need to
      // create the new
      // map field, and we have no way of knowing what type the key should be.
      // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or
      // extend our selector
      // syntax to allow specifying key types.
      checkArgument(
          !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here");
      fieldType = FieldType.array(fieldType).withNullable(true);
    }
    if (!inputSchema.hasField(fieldName)) {
      // This is a brand-new nested field with no matching field in the input schema. We will
      // recursively create a nested schema to match it.
      Collection<NewField> nestedNewFields =
          newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList());
      AddFieldsInformation addFieldsInformation =
          getAddFieldsInformation(fieldType, nestedNewFields);
      builder.addField(fieldName, addFieldsInformation.getOutputFieldType());
      resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation);
    }
  }
  Schema schema = builder.build();

  List<AddFieldsInformation> nestedNewValueList =
      new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null));
  for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) {
    nestedNewValueList.set(entry.getKey(), entry.getValue());
  }
  return AddFieldsInformation.of(
      Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList);
}
 
Example 12
Source File: JdbcIOTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteWithoutPreparedStatement() throws Exception {
  final int rowsToAdd = 10;

  Schema.Builder schemaBuilder = Schema.builder();
  schemaBuilder.addField(Schema.Field.of("column_boolean", Schema.FieldType.BOOLEAN));
  schemaBuilder.addField(Schema.Field.of("column_string", Schema.FieldType.STRING));
  schemaBuilder.addField(Schema.Field.of("column_int", Schema.FieldType.INT32));
  schemaBuilder.addField(Schema.Field.of("column_long", Schema.FieldType.INT64));
  schemaBuilder.addField(Schema.Field.of("column_float", Schema.FieldType.FLOAT));
  schemaBuilder.addField(Schema.Field.of("column_double", Schema.FieldType.DOUBLE));
  schemaBuilder.addField(Schema.Field.of("column_bigdecimal", Schema.FieldType.DECIMAL));
  schemaBuilder.addField(Schema.Field.of("column_date", LogicalTypes.JDBC_DATE_TYPE));
  schemaBuilder.addField(Schema.Field.of("column_time", LogicalTypes.JDBC_TIME_TYPE));
  schemaBuilder.addField(
      Schema.Field.of("column_timestamptz", LogicalTypes.JDBC_TIMESTAMP_WITH_TIMEZONE_TYPE));
  schemaBuilder.addField(Schema.Field.of("column_timestamp", Schema.FieldType.DATETIME));
  schemaBuilder.addField(Schema.Field.of("column_short", Schema.FieldType.INT16));
  Schema schema = schemaBuilder.build();

  String tableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS");
  StringBuilder stmt = new StringBuilder("CREATE TABLE ");
  stmt.append(tableName);
  stmt.append(" (");
  stmt.append("column_boolean       BOOLEAN,"); // boolean
  stmt.append("column_string        VARCHAR(254),"); // String
  stmt.append("column_int           INTEGER,"); // int
  stmt.append("column_long          BIGINT,"); // long
  stmt.append("column_float         REAL,"); // float
  stmt.append("column_double        DOUBLE PRECISION,"); // double
  stmt.append("column_bigdecimal    DECIMAL(13,0),"); // BigDecimal
  stmt.append("column_date          DATE,"); // Date
  stmt.append("column_time          TIME,"); // Time
  stmt.append("column_timestamptz   TIMESTAMP,"); // Timestamp
  stmt.append("column_timestamp     TIMESTAMP,"); // Timestamp
  stmt.append("column_short         SMALLINT"); // short
  stmt.append(" )");
  DatabaseTestHelper.createTableWithStatement(dataSource, stmt.toString());
  try {
    ArrayList<Row> data = getRowsToWrite(rowsToAdd, schema);
    pipeline
        .apply(Create.of(data))
        .setRowSchema(schema)
        .apply(
            JdbcIO.<Row>write()
                .withDataSourceConfiguration(
                    JdbcIO.DataSourceConfiguration.create(
                        "org.apache.derby.jdbc.ClientDriver",
                        "jdbc:derby://localhost:" + port + "/target/beam"))
                .withBatchSize(10L)
                .withTable(tableName));
    pipeline.run();
    assertRowCount(tableName, rowsToAdd);
  } finally {
    DatabaseTestHelper.deleteTable(dataSource, tableName);
  }
}