Java Code Examples for org.apache.beam.sdk.annotations.Experimental.Kind#SCHEMAS

The following examples show how to use org.apache.beam.sdk.annotations.Experimental.Kind#SCHEMAS . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProtoByteBuddyUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Return the list of {@link FieldValueGetter}s for a Java Bean class
 *
 * <p>The returned list is ordered by the order of fields in the schema.
 */
@Experimental(Kind.SCHEMAS)
public static List<FieldValueGetter> getGetters(
    Class<?> clazz,
    Schema schema,
    FieldValueTypeSupplier fieldValueTypeSupplier,
    TypeConversionsFactory typeConversionsFactory) {
  Multimap<String, Method> methods = ReflectUtils.getMethodsMap(clazz);
  return CACHED_GETTERS.computeIfAbsent(
      ClassWithSchema.create(clazz, schema),
      c -> {
        List<FieldValueTypeInformation> types = fieldValueTypeSupplier.get(clazz, schema);
        return types.stream()
            .map(
                t ->
                    createGetter(
                        t,
                        typeConversionsFactory,
                        clazz,
                        methods,
                        schema.getField(t.getName()),
                        fieldValueTypeSupplier))
            .collect(Collectors.toList());
      });
}
 
Example 2
Source File: BigQueryUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tries to parse the JSON {@link TableRow} from BigQuery.
 *
 * <p>Only supports basic types and arrays. Doesn't support date types.
 */
@Experimental(Kind.SCHEMAS)
public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) {
  List<TableFieldSchema> bqFields = bqSchema.getFields();

  Map<String, Integer> bqFieldIndices =
      IntStream.range(0, bqFields.size())
          .boxed()
          .collect(toMap(i -> bqFields.get(i).getName(), i -> i));

  List<Object> rawJsonValues =
      rowSchema.getFields().stream()
          .map(field -> bqFieldIndices.get(field.getName()))
          .map(index -> jsonBqRow.getF().get(index).getV())
          .collect(toList());

  return IntStream.range(0, rowSchema.getFieldCount())
      .boxed()
      .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index)))
      .collect(toRow(rowSchema));
}
 
Example 3
Source File: BigQueryQuerySourceDef.java    From beam with Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
@Experimental(Kind.SCHEMAS)
@Override
public Schema getBeamSchema(BigQueryOptions bqOptions) {
  try {
    JobStatistics stats =
        BigQueryQueryHelper.dryRunQueryIfNeeded(
            bqServices,
            bqOptions,
            dryRunJobStats,
            query.get(),
            flattenResults,
            useLegacySql,
            location);
    TableSchema tableSchema = stats.getQuery().getSchema();
    return BigQueryUtils.fromTableSchema(tableSchema);
  } catch (IOException | InterruptedException | NullPointerException e) {
    throw new BigQuerySchemaRetrievalException(
        "Exception while trying to retrieve schema of query", e);
  }
}
 
Example 4
Source File: ProtoByteBuddyUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
@Nullable
public static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator getBuilderCreator(
    Class<?> protoClass, Schema schema, FieldValueTypeSupplier fieldValueTypeSupplier) {
  Class<ProtoBuilderT> builderClass = getProtoGeneratedBuilder(protoClass);
  if (builderClass == null) {
    return null;
  }
  Multimap<String, Method> methods = ReflectUtils.getMethodsMap(builderClass);
  List<FieldValueSetter<ProtoBuilderT, Object>> setters =
      schema.getFields().stream()
          .map(f -> getProtoFieldValueSetter(f, methods, builderClass))
          .collect(Collectors.toList());
  return createBuilderCreator(protoClass, builderClass, setters, schema);
}
 
Example 5
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
private static <T> PCollection<T> setBeamSchema(
    PCollection<T> pc, Class<T> clazz, @Nullable Schema schema) {
  org.apache.beam.sdk.schemas.Schema beamSchema =
      org.apache.beam.sdk.schemas.utils.AvroUtils.getSchema(clazz, schema);
  if (beamSchema != null) {
    pc.setSchema(
        beamSchema,
        TypeDescriptor.of(clazz),
        org.apache.beam.sdk.schemas.utils.AvroUtils.getToRowFunction(clazz, schema),
        org.apache.beam.sdk.schemas.utils.AvroUtils.getFromRowFunction(clazz));
  }
  return pc;
}
 
Example 6
Source File: Pipeline.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public SchemaRegistry getSchemaRegistry() {
  if (schemaRegistry == null) {
    schemaRegistry = SchemaRegistry.createDefault();
  }
  return schemaRegistry;
}
 
Example 7
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema's toRowFunction. */
@Experimental(Kind.SCHEMAS)
public SerializableFunction<T, Row> getToRowFunction() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getToRowFunction when there is no schema");
  }
  return ((SchemaCoder<T>) getCoder()).getToRowFunction();
}
 
Example 8
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema. */
@Experimental(Kind.SCHEMAS)
public Schema getSchema() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getSchema when there is no schema");
  }
  return ((SchemaCoder) getCoder()).getSchema();
}
 
Example 9
Source File: PubsubIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link PTransform} that continuously reads binary encoded Avro messages into the Avro
 * {@link GenericRecord} type.
 *
 * <p>Beam will infer a schema for the Avro schema. This allows the output to be used by SQL and
 * by the schema-transform library.
 */
@Experimental(Kind.SCHEMAS)
public static Read<GenericRecord> readAvroGenericRecords(org.apache.avro.Schema avroSchema) {
  Schema schema = AvroUtils.getSchema(GenericRecord.class, avroSchema);
  AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, avroSchema);
  return Read.newBuilder(parsePayloadUsingCoder(coder))
      .setCoder(
          SchemaCoder.of(
              schema,
              TypeDescriptor.of(GenericRecord.class),
              AvroUtils.getToRowFunction(GenericRecord.class, avroSchema),
              AvroUtils.getFromRowFunction(GenericRecord.class)))
      .build();
}
 
Example 10
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Sets a {@link Schema} on this {@link PCollection}. */
@Experimental(Kind.SCHEMAS)
public PCollection<T> setSchema(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return setCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example 11
Source File: BigQueryTableSourceDef.java    From beam with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Experimental(Kind.SCHEMAS)
@Override
public Schema getBeamSchema(BigQueryOptions bqOptions) {
  try {
    TableReference tableRef = getTableReference(bqOptions);
    TableSchema tableSchema =
        bqServices.getDatasetService(bqOptions).getTable(tableRef).getSchema();
    return BigQueryUtils.fromTableSchema(tableSchema);
  } catch (IOException | InterruptedException | NullPointerException e) {
    throw new BigQuerySchemaRetrievalException("Exception while trying to retrieve schema", e);
  }
}
 
Example 12
Source File: BigQueryUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Convert a BigQuery {@link TableSchema} to a Beam {@link Schema}. */
@Experimental(Kind.SCHEMAS)
public static Schema fromTableSchema(TableSchema tableSchema) {
  return fromTableFieldSchema(tableSchema.getFields());
}
 
Example 13
Source File: Create.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Returns a {@link Create.Values} PTransform like this one that uses the given {@code Schema}
 * to represent objects.
 */
@Experimental(Kind.SCHEMAS)
public Values<T> withRowSchema(Schema schema) {
  return withCoder((SchemaCoder<T>) SchemaCoder.of(schema));
}
 
Example 14
Source File: BigQueryIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
@Experimental(Kind.SCHEMAS)
abstract FromBeamRowFunction<T> getFromBeamRowFn();
 
Example 15
Source File: BigQueryIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
@Experimental(Kind.SCHEMAS)
abstract TypeDescriptor<T> getTypeDescriptor();
 
Example 16
Source File: BigQueryIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Experimental(Kind.SCHEMAS)
abstract Builder<T> setTypeDescriptor(TypeDescriptor<T> typeDescriptor);
 
Example 17
Source File: TestStream.java    From beam with Apache License 2.0 4 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public static Builder<Row> create(Schema schema) {
  return create(SchemaCoder.of(schema));
}
 
Example 18
Source File: BigQueryIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Experimental(Kind.SCHEMAS)
abstract Boolean getUseBeamSchema();
 
Example 19
Source File: TestBigQuery.java    From beam with Apache License 2.0 4 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public RowsAssertion assertThatAllRows(Schema rowSchema) {
  return new RowsAssertion(rowSchema);
}
 
Example 20
Source File: JsonToRow.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Enable Exception Reporting support. If this value is set errors in the parsing layer are
 * returned as Row objects within a {@link ParseResult}
 *
 * <p>You can access the results by using {@link JsonToRow#withExceptionReporting(Schema)}:
 *
 * <p>ParseResult results = jsonPersons.apply(JsonToRow.withExceptionReporting(PERSON_SCHEMA));
 *
 * <p>Then access the parsed results via, {@link ParseResult#getResults()}
 *
 * <p>{@Code PCollection<Row> personRows = results.getResults()}
 *
 * <p>And access the failed to parse results via, {@link ParseResult#getFailedToParseLines()}
 *
 * <p>{@Code PCollection<Row> errorsLines = results.getFailedToParseLines()}
 *
 * <p>This will produce a Row with Schema {@link JsonToRowWithErrFn#ERROR_ROW_SCHEMA}
 *
 * <p>To access the reason for the failure you will need to first enable extended error reporting.
 * {@link JsonToRowWithErrFn#withExtendedErrorInfo()} {@Code ParseResult results =
 * jsonPersons.apply(JsonToRow.withExceptionReporting(PERSON_SCHEMA).withExtendedErrorInfo()); }
 *
 * <p>This will provide access to the reason for the Parse failure. The call to {@link
 * ParseResult#getFailedToParseLines()} will produce a Row with Schema {@link
 * JsonToRowWithErrFn#ERROR_ROW_WITH_ERR_MSG_SCHEMA}
 *
 * @return {@link JsonToRowWithErrFn}
 */
@Experimental(Kind.SCHEMAS)
public static JsonToRowWithErrFn withExceptionReporting(Schema rowSchema) {
  return JsonToRowWithErrFn.forSchema(rowSchema);
}