org.apache.beam.sdk.annotations.Experimental Java Examples
The following examples show how to use
org.apache.beam.sdk.annotations.Experimental.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryQuerySourceDef.java From beam with Apache License 2.0 | 6 votes |
/** {@inheritDoc} */ @Experimental(Kind.SCHEMAS) @Override public Schema getBeamSchema(BigQueryOptions bqOptions) { try { JobStatistics stats = BigQueryQueryHelper.dryRunQueryIfNeeded( bqServices, bqOptions, dryRunJobStats, query.get(), flattenResults, useLegacySql, location); TableSchema tableSchema = stats.getQuery().getSchema(); return BigQueryUtils.fromTableSchema(tableSchema); } catch (IOException | InterruptedException | NullPointerException e) { throw new BigQuerySchemaRetrievalException( "Exception while trying to retrieve schema of query", e); } }
Example #2
Source File: TestBigQuery.java From beam with Apache License 2.0 | 6 votes |
@Experimental(Kind.SCHEMAS) public TableDataInsertAllResponse insertRows(Schema rowSchema, Row... rows) throws IOException { List<Rows> bqRows = Arrays.stream(rows) .map(row -> new Rows().setJson(BigQueryUtils.toTableRow(row))) .collect(ImmutableList.toImmutableList()); Bigquery bq = newBigQueryClient(pipelineOptions); return bq.tabledata() .insertAll( pipelineOptions.getProject(), pipelineOptions.getTargetDataset(), table.getTableReference().getTableId(), new TableDataInsertAllRequest().setRows(bqRows)) .execute(); }
Example #3
Source File: BigQueryUtils.java From beam with Apache License 2.0 | 6 votes |
@Experimental(Kind.SCHEMAS) public static Row toBeamRow(GenericRecord record, Schema schema, ConversionOptions options) { List<Object> valuesInOrder = schema.getFields().stream() .map( field -> { try { return convertAvroFormat(field.getType(), record.get(field.getName()), options); } catch (Exception cause) { throw new IllegalArgumentException( "Error converting field " + field + ": " + cause.getMessage(), cause); } }) .collect(toList()); return Row.withSchema(schema).addValues(valuesInOrder).build(); }
Example #4
Source File: BigQueryUtils.java From beam with Apache License 2.0 | 6 votes |
/** * Tries to parse the JSON {@link TableRow} from BigQuery. * * <p>Only supports basic types and arrays. Doesn't support date types. */ @Experimental(Kind.SCHEMAS) public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) { List<TableFieldSchema> bqFields = bqSchema.getFields(); Map<String, Integer> bqFieldIndices = IntStream.range(0, bqFields.size()) .boxed() .collect(toMap(i -> bqFields.get(i).getName(), i -> i)); List<Object> rawJsonValues = rowSchema.getFields().stream() .map(field -> bqFieldIndices.get(field.getName())) .map(index -> jsonBqRow.getF().get(index).getV()) .collect(toList()); return IntStream.range(0, rowSchema.getFieldCount()) .boxed() .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index))) .collect(toRow(rowSchema)); }
Example #5
Source File: BigQueryTableSourceDef.java From beam with Apache License 2.0 | 5 votes |
/** {@inheritDoc} */ @Experimental(Kind.SCHEMAS) @Override public Schema getBeamSchema(BigQueryOptions bqOptions) { try { TableReference tableRef = getTableReference(bqOptions); TableSchema tableSchema = bqServices.getDatasetService(bqOptions).getTable(tableRef).getSchema(); return BigQueryUtils.fromTableSchema(tableSchema); } catch (IOException | InterruptedException | NullPointerException e) { throw new BigQuerySchemaRetrievalException("Exception while trying to retrieve schema", e); } }
Example #6
Source File: PCollection.java From beam with Apache License 2.0 | 5 votes |
/** Returns the attached schema's toRowFunction. */ @Experimental(Kind.SCHEMAS) public SerializableFunction<T, Row> getToRowFunction() { if (!hasSchema()) { throw new IllegalStateException("Cannot call getToRowFunction when there is no schema"); } return ((SchemaCoder<T>) getCoder()).getToRowFunction(); }
Example #7
Source File: BigQueryIO.java From beam with Apache License 2.0 | 5 votes |
/** * Sets the functions to convert elements to/from {@link Row} objects. * * <p>Setting these conversion functions is necessary to enable {@link Schema} support. */ @Experimental(Kind.SCHEMAS) public TypedRead<T> withBeamRowConverters( TypeDescriptor<T> typeDescriptor, ToBeamRowFunction<T> toRowFn, FromBeamRowFunction<T> fromRowFn) { return toBuilder() .setTypeDescriptor(typeDescriptor) .setToBeamRowFn(toRowFn) .setFromBeamRowFn(fromRowFn) .build(); }
Example #8
Source File: SpannerIO.java From beam with Apache License 2.0 | 5 votes |
/** * Creates an uninitialized instance of {@link Write}. Before use, the {@link Write} must be * configured with a {@link Write#withInstanceId} and {@link Write#withDatabaseId} that identify * the Cloud Spanner database being written. */ @Experimental public static Write write() { return new AutoValue_SpannerIO_Write.Builder() .setSpannerConfig(SpannerConfig.create()) .setBatchSizeBytes(DEFAULT_BATCH_SIZE_BYTES) .setMaxNumMutations(DEFAULT_MAX_NUM_MUTATIONS) .setMaxNumRows(DEFAULT_MAX_NUM_ROWS) .setFailureMode(FailureMode.FAIL_FAST) .build(); }
Example #9
Source File: DataflowPipelineWorkerPoolOptions.java From beam with Apache License 2.0 | 5 votes |
/** * Specifies whether worker pools should be started with public IP addresses. * * <p>WARNING: This feature is experimental. You must be allowlisted to use it. */ @Description( "Specifies whether worker pools should be started with public IP addresses. WARNING:" + "This feature is experimental. You must be allowlisted to use it.") @Experimental @JsonIgnore @Nullable Boolean getUsePublicIps();
Example #10
Source File: PubsubIO.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link PTransform} that continuously reads binary encoded Avro messages into the Avro * {@link GenericRecord} type. * * <p>Beam will infer a schema for the Avro schema. This allows the output to be used by SQL and * by the schema-transform library. */ @Experimental(Kind.SCHEMAS) public static Read<GenericRecord> readAvroGenericRecords(org.apache.avro.Schema avroSchema) { Schema schema = AvroUtils.getSchema(GenericRecord.class, avroSchema); AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, avroSchema); return Read.newBuilder(parsePayloadUsingCoder(coder)) .setCoder( SchemaCoder.of( schema, TypeDescriptor.of(GenericRecord.class), AvroUtils.getToRowFunction(GenericRecord.class, avroSchema), AvroUtils.getFromRowFunction(GenericRecord.class))) .build(); }
Example #11
Source File: Create.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given * {@code Schema} to represent objects. */ @Experimental(Kind.SCHEMAS) public TimestampedValues<T> withSchema( Schema schema, TypeDescriptor<T> typeDescriptor, SerializableFunction<T, Row> toRowFunction, SerializableFunction<Row, T> fromRowFunction) { return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction)); }
Example #12
Source File: JdbcIO.java From beam with Apache License 2.0 | 5 votes |
/** Read Beam {@link Row}s from a JDBC data source. */ @Experimental(Kind.SCHEMAS) public static ReadRows readRows() { return new AutoValue_JdbcIO_ReadRows.Builder() .setFetchSize(DEFAULT_FETCH_SIZE) .setOutputParallelization(true) .setStatementPreparator(ignored -> {}) .build(); }
Example #13
Source File: GcpOptions.java From beam with Apache License 2.0 | 5 votes |
/** * GCP <a href="https://cloud.google.com/kms/">Cloud KMS</a> key for Dataflow pipelines and * buckets created by GcpTempLocationFactory. */ @Description( "GCP Cloud KMS key for Dataflow pipelines. Also used by gcpTempLocation as the default key " + "for new buckets. Key format is: " + "projects/<project>/locations/<location>/keyRings/<keyring>/cryptoKeys/<key>") @Experimental @Nullable String getDataflowKmsKey();
Example #14
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** Same as {@link Read#watchForNewFiles(Duration, TerminationCondition)}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public ReadAll watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
Example #15
Source File: ProtoByteBuddyUtils.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator createBuilderCreator( Class<?> protoClass, Class<?> builderClass, List<FieldValueSetter<ProtoBuilderT, Object>> setters, Schema schema) { try { DynamicType.Builder<Supplier> builder = BYTE_BUDDY .with(new InjectPackageStrategy(builderClass)) .subclass(Supplier.class) .method(ElementMatchers.named("get")) .intercept(new BuilderSupplier(protoClass)); Supplier supplier = builder .visit( new AsmVisitorWrapper.ForDeclaredMethods() .writerFlags(ClassWriter.COMPUTE_FRAMES)) .make() .load(ReflectHelpers.findClassLoader(), ClassLoadingStrategy.Default.INJECTION) .getLoaded() .getDeclaredConstructor() .newInstance(); return new ProtoCreatorFactory<>(supplier, setters); } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { throw new RuntimeException( "Unable to generate a creator for class " + builderClass + " with schema " + schema); } }
Example #16
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** * See {@link TypedWrite#to(SerializableFunction, Params)}. * * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} ()} with {@link * #sink()} instead. */ @Experimental(Kind.FILESYSTEM) @Deprecated public Write to( SerializableFunction<String, Params> destinationFunction, Params emptyDestination) { return new Write( inner .to(destinationFunction, emptyDestination) .withFormatFunction(SerializableFunctions.identity())); }
Example #17
Source File: LocalSpannerIO.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Returns a transform that creates a batch transaction. By default, {@link * TimestampBound#strong()} transaction is created, to override this use {@link * CreateTransaction#withTimestampBound(TimestampBound)}. */ @Experimental public static CreateTransaction createTransaction() { return new AutoValue_LocalSpannerIO_CreateTransaction.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .build(); }
Example #18
Source File: PCollection.java From beam with Apache License 2.0 | 5 votes |
/** Returns the attached schema. */ @Experimental(Kind.SCHEMAS) public Schema getSchema() { if (!hasSchema()) { throw new IllegalStateException("Cannot call getSchema when there is no schema"); } return ((SchemaCoder) getCoder()).getSchema(); }
Example #19
Source File: SpannerIO.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a transform that creates a batch transaction. By default, {@link * TimestampBound#strong()} transaction is created, to override this use {@link * CreateTransaction#withTimestampBound(TimestampBound)}. */ @Experimental public static CreateTransaction createTransaction() { return new AutoValue_SpannerIO_CreateTransaction.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .build(); }
Example #20
Source File: PCollection.java From beam with Apache License 2.0 | 5 votes |
/** Returns the attached schema's fromRowFunction. */ @Experimental(Kind.SCHEMAS) public SerializableFunction<Row, T> getFromRowFunction() { if (!hasSchema()) { throw new IllegalStateException("Cannot call getFromRowFunction when there is no schema"); } return ((SchemaCoder<T>) getCoder()).getFromRowFunction(); }
Example #21
Source File: Pipeline.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) public SchemaRegistry getSchemaRegistry() { if (schemaRegistry == null) { schemaRegistry = SchemaRegistry.createDefault(); } return schemaRegistry; }
Example #22
Source File: TestStream.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) public static <T> Builder<T> create( Schema schema, TypeDescriptor<T> typeDescriptor, SerializableFunction<T, Row> toRowFunction, SerializableFunction<Row, T> fromRowFunction) { return create(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction)); }
Example #23
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
/** * Construct a {@link FileBasedSink} with the given temp directory, producing uncompressed files. */ @Experimental(Kind.FILESYSTEM) public FileBasedSink( ValueProvider<ResourceId> tempDirectoryProvider, DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations) { this(tempDirectoryProvider, dynamicDestinations, Compression.UNCOMPRESSED); }
Example #24
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
/** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */ @Experimental(Kind.FILESYSTEM) public FileBasedSink( ValueProvider<ResourceId> tempDirectoryProvider, DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations, WritableByteChannelFactory writableByteChannelFactory) { this.tempDirectoryProvider = NestedValueProvider.of(tempDirectoryProvider, new ExtractDirectory()); this.dynamicDestinations = checkNotNull(dynamicDestinations); this.writableByteChannelFactory = writableByteChannelFactory; }
Example #25
Source File: BigQueryUtils.java From beam with Apache License 2.0 | 5 votes |
/** * Tries to convert a JSON {@link TableRow} from BigQuery into a Beam {@link Row}. * * <p>Only supports basic types and arrays. Doesn't support date types or structs. */ @Experimental(Kind.SCHEMAS) public static Row toBeamRow(Schema rowSchema, TableRow jsonBqRow) { // TODO deprecate toBeamRow(Schema, TableSchema, TableRow) function in favour of this function. // This function attempts to convert TableRows without having access to the // corresponding TableSchema because: // 1. TableSchema contains redundant information already available in the Schema object. // 2. TableSchema objects are not serializable and are therefore harder to propagate through a // pipeline. return rowSchema.getFields().stream() .map(field -> toBeamRowFieldValue(field, jsonBqRow.get(field.getName()))) .collect(toRow(rowSchema)); }
Example #26
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.FILESYSTEM) public FileResult( ResourceId tempFilename, int shard, BoundedWindow window, PaneInfo paneInfo, DestinationT destination) { checkArgument(window != null, "window can not be null"); checkArgument(paneInfo != null, "paneInfo can not be null"); this.tempFilename = tempFilename; this.shard = shard; this.window = window; this.paneInfo = paneInfo; this.destination = destination; }
Example #27
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) private static <T> PCollection<T> setBeamSchema( PCollection<T> pc, Class<T> clazz, @Nullable Schema schema) { org.apache.beam.sdk.schemas.Schema beamSchema = org.apache.beam.sdk.schemas.utils.AvroUtils.getSchema(clazz, schema); if (beamSchema != null) { pc.setSchema( beamSchema, TypeDescriptor.of(clazz), org.apache.beam.sdk.schemas.utils.AvroUtils.getToRowFunction(clazz, schema), org.apache.beam.sdk.schemas.utils.AvroUtils.getFromRowFunction(clazz)); } return pc; }
Example #28
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link Read#watchForNewFiles}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public ReadAll<T> watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
Example #29
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** * See {@link MatchConfiguration#continuously}. * * <p>This works only in runners supporting {@link Kind#SPLITTABLE_DO_FN}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public Read watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
Example #30
Source File: DataflowPipelineOptions.java From beam with Apache License 2.0 | 5 votes |
/** * The Google Compute Engine <a * href="https://cloud.google.com/compute/docs/regions-zones/regions-zones">region</a> for * creating Dataflow jobs. */ @Hidden @Experimental @Description( "The Google Compute Engine region for creating Dataflow jobs. See " + "https://cloud.google.com/compute/docs/regions-zones/regions-zones for a list of valid " + "options. Currently defaults to us-central1, but future releases of Beam will " + "require the user to set the region explicitly.") @Default.InstanceFactory(DefaultGcpRegionFactory.class) String getRegion();