org.apache.beam.sdk.annotations.Experimental Java Examples

The following examples show how to use org.apache.beam.sdk.annotations.Experimental. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryQuerySourceDef.java    From beam with Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
@Experimental(Kind.SCHEMAS)
@Override
public Schema getBeamSchema(BigQueryOptions bqOptions) {
  try {
    JobStatistics stats =
        BigQueryQueryHelper.dryRunQueryIfNeeded(
            bqServices,
            bqOptions,
            dryRunJobStats,
            query.get(),
            flattenResults,
            useLegacySql,
            location);
    TableSchema tableSchema = stats.getQuery().getSchema();
    return BigQueryUtils.fromTableSchema(tableSchema);
  } catch (IOException | InterruptedException | NullPointerException e) {
    throw new BigQuerySchemaRetrievalException(
        "Exception while trying to retrieve schema of query", e);
  }
}
 
Example #2
Source File: TestBigQuery.java    From beam with Apache License 2.0 6 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public TableDataInsertAllResponse insertRows(Schema rowSchema, Row... rows) throws IOException {
  List<Rows> bqRows =
      Arrays.stream(rows)
          .map(row -> new Rows().setJson(BigQueryUtils.toTableRow(row)))
          .collect(ImmutableList.toImmutableList());
  Bigquery bq = newBigQueryClient(pipelineOptions);

  return bq.tabledata()
      .insertAll(
          pipelineOptions.getProject(),
          pipelineOptions.getTargetDataset(),
          table.getTableReference().getTableId(),
          new TableDataInsertAllRequest().setRows(bqRows))
      .execute();
}
 
Example #3
Source File: BigQueryUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public static Row toBeamRow(GenericRecord record, Schema schema, ConversionOptions options) {
  List<Object> valuesInOrder =
      schema.getFields().stream()
          .map(
              field -> {
                try {
                  return convertAvroFormat(field.getType(), record.get(field.getName()), options);
                } catch (Exception cause) {
                  throw new IllegalArgumentException(
                      "Error converting field " + field + ": " + cause.getMessage(), cause);
                }
              })
          .collect(toList());

  return Row.withSchema(schema).addValues(valuesInOrder).build();
}
 
Example #4
Source File: BigQueryUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tries to parse the JSON {@link TableRow} from BigQuery.
 *
 * <p>Only supports basic types and arrays. Doesn't support date types.
 */
@Experimental(Kind.SCHEMAS)
public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) {
  List<TableFieldSchema> bqFields = bqSchema.getFields();

  Map<String, Integer> bqFieldIndices =
      IntStream.range(0, bqFields.size())
          .boxed()
          .collect(toMap(i -> bqFields.get(i).getName(), i -> i));

  List<Object> rawJsonValues =
      rowSchema.getFields().stream()
          .map(field -> bqFieldIndices.get(field.getName()))
          .map(index -> jsonBqRow.getF().get(index).getV())
          .collect(toList());

  return IntStream.range(0, rowSchema.getFieldCount())
      .boxed()
      .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index)))
      .collect(toRow(rowSchema));
}
 
Example #5
Source File: BigQueryTableSourceDef.java    From beam with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Experimental(Kind.SCHEMAS)
@Override
public Schema getBeamSchema(BigQueryOptions bqOptions) {
  try {
    TableReference tableRef = getTableReference(bqOptions);
    TableSchema tableSchema =
        bqServices.getDatasetService(bqOptions).getTable(tableRef).getSchema();
    return BigQueryUtils.fromTableSchema(tableSchema);
  } catch (IOException | InterruptedException | NullPointerException e) {
    throw new BigQuerySchemaRetrievalException("Exception while trying to retrieve schema", e);
  }
}
 
Example #6
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema's toRowFunction. */
@Experimental(Kind.SCHEMAS)
public SerializableFunction<T, Row> getToRowFunction() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getToRowFunction when there is no schema");
  }
  return ((SchemaCoder<T>) getCoder()).getToRowFunction();
}
 
Example #7
Source File: BigQueryIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Sets the functions to convert elements to/from {@link Row} objects.
 *
 * <p>Setting these conversion functions is necessary to enable {@link Schema} support.
 */
@Experimental(Kind.SCHEMAS)
public TypedRead<T> withBeamRowConverters(
    TypeDescriptor<T> typeDescriptor,
    ToBeamRowFunction<T> toRowFn,
    FromBeamRowFunction<T> fromRowFn) {
  return toBuilder()
      .setTypeDescriptor(typeDescriptor)
      .setToBeamRowFn(toRowFn)
      .setFromBeamRowFn(fromRowFn)
      .build();
}
 
Example #8
Source File: SpannerIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an uninitialized instance of {@link Write}. Before use, the {@link Write} must be
 * configured with a {@link Write#withInstanceId} and {@link Write#withDatabaseId} that identify
 * the Cloud Spanner database being written.
 */
@Experimental
public static Write write() {
  return new AutoValue_SpannerIO_Write.Builder()
      .setSpannerConfig(SpannerConfig.create())
      .setBatchSizeBytes(DEFAULT_BATCH_SIZE_BYTES)
      .setMaxNumMutations(DEFAULT_MAX_NUM_MUTATIONS)
      .setMaxNumRows(DEFAULT_MAX_NUM_ROWS)
      .setFailureMode(FailureMode.FAIL_FAST)
      .build();
}
 
Example #9
Source File: DataflowPipelineWorkerPoolOptions.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Specifies whether worker pools should be started with public IP addresses.
 *
 * <p>WARNING: This feature is experimental. You must be allowlisted to use it.
 */
@Description(
    "Specifies whether worker pools should be started with public IP addresses. WARNING:"
        + "This feature is experimental. You must be allowlisted to use it.")
@Experimental
@JsonIgnore
@Nullable
Boolean getUsePublicIps();
 
Example #10
Source File: PubsubIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link PTransform} that continuously reads binary encoded Avro messages into the Avro
 * {@link GenericRecord} type.
 *
 * <p>Beam will infer a schema for the Avro schema. This allows the output to be used by SQL and
 * by the schema-transform library.
 */
@Experimental(Kind.SCHEMAS)
public static Read<GenericRecord> readAvroGenericRecords(org.apache.avro.Schema avroSchema) {
  Schema schema = AvroUtils.getSchema(GenericRecord.class, avroSchema);
  AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, avroSchema);
  return Read.newBuilder(parsePayloadUsingCoder(coder))
      .setCoder(
          SchemaCoder.of(
              schema,
              TypeDescriptor.of(GenericRecord.class),
              AvroUtils.getToRowFunction(GenericRecord.class, avroSchema),
              AvroUtils.getFromRowFunction(GenericRecord.class)))
      .build();
}
 
Example #11
Source File: Create.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given
 * {@code Schema} to represent objects.
 */
@Experimental(Kind.SCHEMAS)
public TimestampedValues<T> withSchema(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example #12
Source File: JdbcIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Read Beam {@link Row}s from a JDBC data source. */
@Experimental(Kind.SCHEMAS)
public static ReadRows readRows() {
  return new AutoValue_JdbcIO_ReadRows.Builder()
      .setFetchSize(DEFAULT_FETCH_SIZE)
      .setOutputParallelization(true)
      .setStatementPreparator(ignored -> {})
      .build();
}
 
Example #13
Source File: GcpOptions.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * GCP <a href="https://cloud.google.com/kms/">Cloud KMS</a> key for Dataflow pipelines and
 * buckets created by GcpTempLocationFactory.
 */
@Description(
    "GCP Cloud KMS key for Dataflow pipelines. Also used by gcpTempLocation as the default key "
        + "for new buckets. Key format is: "
        + "projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>")
@Experimental
@Nullable
String getDataflowKmsKey();
 
Example #14
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Same as {@link Read#watchForNewFiles(Duration, TerminationCondition)}. */
@Experimental(Kind.SPLITTABLE_DO_FN)
public ReadAll watchForNewFiles(
    Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
  return withMatchConfiguration(
      getMatchConfiguration().continuously(pollInterval, terminationCondition));
}
 
Example #15
Source File: ProtoByteBuddyUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator createBuilderCreator(
    Class<?> protoClass,
    Class<?> builderClass,
    List<FieldValueSetter<ProtoBuilderT, Object>> setters,
    Schema schema) {
  try {
    DynamicType.Builder<Supplier> builder =
        BYTE_BUDDY
            .with(new InjectPackageStrategy(builderClass))
            .subclass(Supplier.class)
            .method(ElementMatchers.named("get"))
            .intercept(new BuilderSupplier(protoClass));
    Supplier supplier =
        builder
            .visit(
                new AsmVisitorWrapper.ForDeclaredMethods()
                    .writerFlags(ClassWriter.COMPUTE_FRAMES))
            .make()
            .load(ReflectHelpers.findClassLoader(), ClassLoadingStrategy.Default.INJECTION)
            .getLoaded()
            .getDeclaredConstructor()
            .newInstance();
    return new ProtoCreatorFactory<>(supplier, setters);
  } catch (InstantiationException
      | IllegalAccessException
      | NoSuchMethodException
      | InvocationTargetException e) {
    throw new RuntimeException(
        "Unable to generate a creator for class " + builderClass + " with schema " + schema);
  }
}
 
Example #16
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link TypedWrite#to(SerializableFunction, Params)}.
 *
 * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} ()} with {@link
 *     #sink()} instead.
 */
@Experimental(Kind.FILESYSTEM)
@Deprecated
public Write to(
    SerializableFunction<String, Params> destinationFunction, Params emptyDestination) {
  return new Write(
      inner
          .to(destinationFunction, emptyDestination)
          .withFormatFunction(SerializableFunctions.identity()));
}
 
Example #17
Source File: LocalSpannerIO.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a transform that creates a batch transaction. By default, {@link
 * TimestampBound#strong()} transaction is created, to override this use {@link
 * CreateTransaction#withTimestampBound(TimestampBound)}.
 */
@Experimental
public static CreateTransaction createTransaction() {
  return new AutoValue_LocalSpannerIO_CreateTransaction.Builder()
      .setSpannerConfig(SpannerConfig.create())
      .setTimestampBound(TimestampBound.strong())
      .build();
}
 
Example #18
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema. */
@Experimental(Kind.SCHEMAS)
public Schema getSchema() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getSchema when there is no schema");
  }
  return ((SchemaCoder) getCoder()).getSchema();
}
 
Example #19
Source File: SpannerIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a transform that creates a batch transaction. By default, {@link
 * TimestampBound#strong()} transaction is created, to override this use {@link
 * CreateTransaction#withTimestampBound(TimestampBound)}.
 */
@Experimental
public static CreateTransaction createTransaction() {
  return new AutoValue_SpannerIO_CreateTransaction.Builder()
      .setSpannerConfig(SpannerConfig.create())
      .setTimestampBound(TimestampBound.strong())
      .build();
}
 
Example #20
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema's fromRowFunction. */
@Experimental(Kind.SCHEMAS)
public SerializableFunction<Row, T> getFromRowFunction() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getFromRowFunction when there is no schema");
  }
  return ((SchemaCoder<T>) getCoder()).getFromRowFunction();
}
 
Example #21
Source File: Pipeline.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public SchemaRegistry getSchemaRegistry() {
  if (schemaRegistry == null) {
    schemaRegistry = SchemaRegistry.createDefault();
  }
  return schemaRegistry;
}
 
Example #22
Source File: TestStream.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public static <T> Builder<T> create(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return create(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example #23
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Construct a {@link FileBasedSink} with the given temp directory, producing uncompressed files.
 */
@Experimental(Kind.FILESYSTEM)
public FileBasedSink(
    ValueProvider<ResourceId> tempDirectoryProvider,
    DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations) {
  this(tempDirectoryProvider, dynamicDestinations, Compression.UNCOMPRESSED);
}
 
Example #24
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */
@Experimental(Kind.FILESYSTEM)
public FileBasedSink(
    ValueProvider<ResourceId> tempDirectoryProvider,
    DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations,
    WritableByteChannelFactory writableByteChannelFactory) {
  this.tempDirectoryProvider =
      NestedValueProvider.of(tempDirectoryProvider, new ExtractDirectory());
  this.dynamicDestinations = checkNotNull(dynamicDestinations);
  this.writableByteChannelFactory = writableByteChannelFactory;
}
 
Example #25
Source File: BigQueryUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tries to convert a JSON {@link TableRow} from BigQuery into a Beam {@link Row}.
 *
 * <p>Only supports basic types and arrays. Doesn't support date types or structs.
 */
@Experimental(Kind.SCHEMAS)
public static Row toBeamRow(Schema rowSchema, TableRow jsonBqRow) {
  // TODO deprecate toBeamRow(Schema, TableSchema, TableRow) function in favour of this function.
  // This function attempts to convert TableRows without  having access to the
  // corresponding TableSchema because:
  // 1. TableSchema contains redundant information already available in the Schema object.
  // 2. TableSchema objects are not serializable and are therefore harder to propagate through a
  // pipeline.
  return rowSchema.getFields().stream()
      .map(field -> toBeamRowFieldValue(field, jsonBqRow.get(field.getName())))
      .collect(toRow(rowSchema));
}
 
Example #26
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.FILESYSTEM)
public FileResult(
    ResourceId tempFilename,
    int shard,
    BoundedWindow window,
    PaneInfo paneInfo,
    DestinationT destination) {
  checkArgument(window != null, "window can not be null");
  checkArgument(paneInfo != null, "paneInfo can not be null");
  this.tempFilename = tempFilename;
  this.shard = shard;
  this.window = window;
  this.paneInfo = paneInfo;
  this.destination = destination;
}
 
Example #27
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
private static <T> PCollection<T> setBeamSchema(
    PCollection<T> pc, Class<T> clazz, @Nullable Schema schema) {
  org.apache.beam.sdk.schemas.Schema beamSchema =
      org.apache.beam.sdk.schemas.utils.AvroUtils.getSchema(clazz, schema);
  if (beamSchema != null) {
    pc.setSchema(
        beamSchema,
        TypeDescriptor.of(clazz),
        org.apache.beam.sdk.schemas.utils.AvroUtils.getToRowFunction(clazz, schema),
        org.apache.beam.sdk.schemas.utils.AvroUtils.getFromRowFunction(clazz));
  }
  return pc;
}
 
Example #28
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link Read#watchForNewFiles}. */
@Experimental(Kind.SPLITTABLE_DO_FN)
public ReadAll<T> watchForNewFiles(
    Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
  return withMatchConfiguration(
      getMatchConfiguration().continuously(pollInterval, terminationCondition));
}
 
Example #29
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link MatchConfiguration#continuously}.
 *
 * <p>This works only in runners supporting {@link Kind#SPLITTABLE_DO_FN}.
 */
@Experimental(Kind.SPLITTABLE_DO_FN)
public Read watchForNewFiles(
    Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
  return withMatchConfiguration(
      getMatchConfiguration().continuously(pollInterval, terminationCondition));
}
 
Example #30
Source File: DataflowPipelineOptions.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * The Google Compute Engine <a
 * href="https://cloud.google.com/compute/docs/regions-zones/regions-zones">region</a> for
 * creating Dataflow jobs.
 */
@Hidden
@Experimental
@Description(
    "The Google Compute Engine region for creating Dataflow jobs. See "
        + "https://cloud.google.com/compute/docs/regions-zones/regions-zones for a list of valid "
        + "options. Currently defaults to us-central1, but future releases of Beam will "
        + "require the user to set the region explicitly.")
@Default.InstanceFactory(DefaultGcpRegionFactory.class)
String getRegion();