org.apache.beam.sdk.annotations.Experimental.Kind Java Examples

The following examples show how to use org.apache.beam.sdk.annotations.Experimental.Kind. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileBasedSink.java    From beam with Apache License 2.0 6 votes vote down vote up
@Experimental(Kind.FILESYSTEM)
public ResourceId getDestinationFile(
    boolean windowedWrites,
    DynamicDestinations<?, DestinationT, ?> dynamicDestinations,
    int numShards,
    OutputFileHints outputFileHints) {
  checkArgument(getShard() != UNKNOWN_SHARDNUM);
  checkArgument(numShards > 0);
  FilenamePolicy policy = dynamicDestinations.getFilenamePolicy(destination);
  if (windowedWrites) {
    return policy.windowedFilename(
        getShard(), numShards, getWindow(), getPaneInfo(), outputFileHints);
  } else {
    return policy.unwindowedFilename(getShard(), numShards, outputFileHints);
  }
}
 
Example #2
Source File: TestBigQuery.java    From beam with Apache License 2.0 6 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public TableDataInsertAllResponse insertRows(Schema rowSchema, Row... rows) throws IOException {
  List<Rows> bqRows =
      Arrays.stream(rows)
          .map(row -> new Rows().setJson(BigQueryUtils.toTableRow(row)))
          .collect(ImmutableList.toImmutableList());
  Bigquery bq = newBigQueryClient(pipelineOptions);

  return bq.tabledata()
      .insertAll(
          pipelineOptions.getProject(),
          pipelineOptions.getTargetDataset(),
          table.getTableReference().getTableId(),
          new TableDataInsertAllRequest().setRows(bqRows))
      .execute();
}
 
Example #3
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * A {@link Sink} for use with {@link FileIO#write} and {@link FileIO#writeDynamic}, writing
 * elements with a given (common) schema, like {@link #writeGenericRecords(String)}.
 */
@Experimental(Kind.SOURCE_SINK)
public static <ElementT extends IndexedRecord> Sink<ElementT> sink(String jsonSchema) {
  return new AutoValue_AvroIO_Sink.Builder<ElementT>()
      .setJsonSchema(jsonSchema)
      .setMetadata(ImmutableMap.of())
      .setCodec(TypedWrite.DEFAULT_SERIALIZABLE_CODEC)
      .build();
}
 
Example #4
Source File: Create.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link Create.Values} PTransform like this one that uses the given {@code Schema}
 * to represent objects.
 */
@Experimental(Kind.SCHEMAS)
public Values<T> withSchema(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example #5
Source File: Create.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given
 * {@code Schema} to represent objects.
 */
@Experimental(Kind.SCHEMAS)
public TimestampedValues<T> withSchema(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example #6
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link MatchConfiguration#continuously}.
 *
 * <p>This works only in runners supporting {@link Kind#SPLITTABLE_DO_FN}.
 */
@Experimental(Kind.SPLITTABLE_DO_FN)
public Read watchForNewFiles(
    Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
  return withMatchConfiguration(
      getMatchConfiguration().continuously(pollInterval, terminationCondition));
}
 
Example #7
Source File: FlatMapElements.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link #via(ProcessFunction)}, but allows access to additional context. */
@Experimental(Kind.CONTEXTFUL)
public <NewInputT> FlatMapElements<NewInputT, OutputT> via(
    Contextful<Fn<NewInputT, Iterable<OutputT>>> fn) {
  return new FlatMapElements<>(
      fn, fn.getClosure(), TypeDescriptors.inputOf(fn.getClosure()), outputType);
}
 
Example #8
Source File: ProtoByteBuddyUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator createBuilderCreator(
    Class<?> protoClass,
    Class<?> builderClass,
    List<FieldValueSetter<ProtoBuilderT, Object>> setters,
    Schema schema) {
  try {
    DynamicType.Builder<Supplier> builder =
        BYTE_BUDDY
            .with(new InjectPackageStrategy(builderClass))
            .subclass(Supplier.class)
            .method(ElementMatchers.named("get"))
            .intercept(new BuilderSupplier(protoClass));
    Supplier supplier =
        builder
            .visit(
                new AsmVisitorWrapper.ForDeclaredMethods()
                    .writerFlags(ClassWriter.COMPUTE_FRAMES))
            .make()
            .load(ReflectHelpers.findClassLoader(), ClassLoadingStrategy.Default.INJECTION)
            .getLoaded()
            .getDeclaredConstructor()
            .newInstance();
    return new ProtoCreatorFactory<>(supplier, setters);
  } catch (InstantiationException
      | IllegalAccessException
      | NoSuchMethodException
      | InvocationTargetException e) {
    throw new RuntimeException(
        "Unable to generate a creator for class " + builderClass + " with schema " + schema);
  }
}
 
Example #9
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link TypedWrite#to(SerializableFunction, Params)}.
 *
 * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} ()} with {@link
 *     #sink()} instead.
 */
@Experimental(Kind.FILESYSTEM)
@Deprecated
public Write to(
    SerializableFunction<String, Params> destinationFunction, Params emptyDestination) {
  return new Write(
      inner
          .to(destinationFunction, emptyDestination)
          .withFormatFunction(SerializableFunctions.identity()));
}
 
Example #10
Source File: SlidingWindows.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Ensures that later sliding windows have an output time that is past the end of earlier windows.
 *
 * <p>If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
 * Otherwise, we pick the earliest time that doesn't overlap with earlier windows.
 */
@Experimental(Kind.OUTPUT_TIME)
@Override
public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
  Instant startOfLastSegment = window.maxTimestamp().minus(period);
  return startOfLastSegment.isBefore(inputTimestamp)
      ? inputTimestamp
      : startOfLastSegment.plus(1);
}
 
Example #11
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * See {@link TypedWrite#to(DynamicDestinations)}.
 *
 * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} ()} with {@link
 *     #sink()} instead.
 */
@Experimental(Kind.FILESYSTEM)
@Deprecated
public Write to(DynamicDestinations<String, ?, String> dynamicDestinations) {
  return new Write(
      inner.to((DynamicDestinations) dynamicDestinations).withFormatFunction(null));
}
 
Example #12
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema. */
@Experimental(Kind.SCHEMAS)
public Schema getSchema() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getSchema when there is no schema");
  }
  return ((SchemaCoder) getCoder()).getSchema();
}
 
Example #13
Source File: PCollection.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Returns the attached schema's fromRowFunction. */
@Experimental(Kind.SCHEMAS)
public SerializableFunction<Row, T> getFromRowFunction() {
  if (!hasSchema()) {
    throw new IllegalStateException("Cannot call getFromRowFunction when there is no schema");
  }
  return ((SchemaCoder<T>) getCoder()).getFromRowFunction();
}
 
Example #14
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Construct a {@link FileBasedSink} with the given temp directory, producing uncompressed files.
 */
@Experimental(Kind.FILESYSTEM)
public FileBasedSink(
    ValueProvider<ResourceId> tempDirectoryProvider,
    DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations) {
  this(tempDirectoryProvider, dynamicDestinations, Compression.UNCOMPRESSED);
}
 
Example #15
Source File: ProtoByteBuddyUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
@Nullable
public static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator getBuilderCreator(
    Class<?> protoClass, Schema schema, FieldValueTypeSupplier fieldValueTypeSupplier) {
  Class<ProtoBuilderT> builderClass = getProtoGeneratedBuilder(protoClass);
  if (builderClass == null) {
    return null;
  }
  Multimap<String, Method> methods = ReflectUtils.getMethodsMap(builderClass);
  List<FieldValueSetter<ProtoBuilderT, Object>> setters =
      schema.getFields().stream()
          .map(f -> getProtoFieldValueSetter(f, methods, builderClass))
          .collect(Collectors.toList());
  return createBuilderCreator(protoClass, builderClass, setters, schema);
}
 
Example #16
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Use a {@link DynamicAvroDestinations} object to vend {@link FilenamePolicy} objects. These
 * objects can examine the input record when creating a {@link FilenamePolicy}. A directory for
 * temporary files must be specified using {@link #withTempDirectory}.
 *
 * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} instead.
 */
@Experimental(Kind.FILESYSTEM)
@Deprecated
public <NewDestinationT> TypedWrite<UserT, NewDestinationT, OutputT> to(
    DynamicAvroDestinations<UserT, NewDestinationT, OutputT> dynamicDestinations) {
  return toBuilder()
      .setDynamicDestinations((DynamicAvroDestinations) dynamicDestinations)
      .build();
}
 
Example #17
Source File: JdbcIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Read Beam {@link Row}s from a JDBC data source. */
@Experimental(Kind.SCHEMAS)
public static ReadRows readRows() {
  return new AutoValue_JdbcIO_ReadRows.Builder()
      .setFetchSize(DEFAULT_FETCH_SIZE)
      .setOutputParallelization(true)
      .setStatementPreparator(ignored -> {})
      .build();
}
 
Example #18
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link Read#watchForNewFiles}. */
@Experimental(Kind.SPLITTABLE_DO_FN)
public Parse<T> watchForNewFiles(
    Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
  return withMatchConfiguration(
      getMatchConfiguration().continuously(pollInterval, terminationCondition));
}
 
Example #19
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link Read#watchForNewFiles}. */
@Experimental(Kind.SPLITTABLE_DO_FN)
public ReadAll<T> watchForNewFiles(
    Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
  return withMatchConfiguration(
      getMatchConfiguration().continuously(pollInterval, terminationCondition));
}
 
Example #20
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
private static <T> PCollection<T> setBeamSchema(
    PCollection<T> pc, Class<T> clazz, @Nullable Schema schema) {
  org.apache.beam.sdk.schemas.Schema beamSchema =
      org.apache.beam.sdk.schemas.utils.AvroUtils.getSchema(clazz, schema);
  if (beamSchema != null) {
    pc.setSchema(
        beamSchema,
        TypeDescriptor.of(clazz),
        org.apache.beam.sdk.schemas.utils.AvroUtils.getToRowFunction(clazz, schema),
        org.apache.beam.sdk.schemas.utils.AvroUtils.getFromRowFunction(clazz));
  }
  return pc;
}
 
Example #21
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.FILESYSTEM)
public FileResult(
    ResourceId tempFilename,
    int shard,
    BoundedWindow window,
    PaneInfo paneInfo,
    DestinationT destination) {
  checkArgument(window != null, "window can not be null");
  checkArgument(paneInfo != null, "paneInfo can not be null");
  this.tempFilename = tempFilename;
  this.shard = shard;
  this.window = window;
  this.paneInfo = paneInfo;
  this.destination = destination;
}
 
Example #22
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */
@Experimental(Kind.FILESYSTEM)
public FileBasedSink(
    ValueProvider<ResourceId> tempDirectoryProvider,
    DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations,
    Compression compression) {
  this(tempDirectoryProvider, dynamicDestinations, CompressionType.fromCanonical(compression));
}
 
Example #23
Source File: FileBasedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */
@Experimental(Kind.FILESYSTEM)
public FileBasedSink(
    ValueProvider<ResourceId> tempDirectoryProvider,
    DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations,
    WritableByteChannelFactory writableByteChannelFactory) {
  this.tempDirectoryProvider =
      NestedValueProvider.of(tempDirectoryProvider, new ExtractDirectory());
  this.dynamicDestinations = checkNotNull(dynamicDestinations);
  this.writableByteChannelFactory = writableByteChannelFactory;
}
 
Example #24
Source File: TestStream.java    From beam with Apache License 2.0 5 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public static <T> Builder<T> create(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return create(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example #25
Source File: BigQueryIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
@Experimental(Kind.SCHEMAS)
abstract ToBeamRowFunction<T> getToBeamRowFn();
 
Example #26
Source File: AvroIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A
 * directory for temporary files must be specified using {@link #withTempDirectory}.
 */
@Experimental(Kind.FILESYSTEM)
public TypedWrite<UserT, DestinationT, OutputT> to(FilenamePolicy filenamePolicy) {
  return toBuilder().setFilenamePolicy(filenamePolicy).build();
}
 
Example #27
Source File: TextIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/** See {@link TypedWrite#to(ResourceId)}. */
@Experimental(Kind.FILESYSTEM)
public Write to(ResourceId filenamePrefix) {
  return new Write(
      inner.to(filenamePrefix).withFormatFunction(SerializableFunctions.identity()));
}
 
Example #28
Source File: TFRecordIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Like {@link #to(ResourceId)}. */
@Experimental(Kind.FILESYSTEM)
public Write toResource(ValueProvider<ResourceId> outputResource) {
  return toBuilder().setOutputPrefix(outputResource).build();
}
 
Example #29
Source File: AddFields.java    From beam with Apache License 2.0 4 votes vote down vote up
private static AddFieldsInformation getAddFieldsInformation(
    Schema inputSchema, Collection<NewField> fieldsToAdd) {
  List<NewField> newTopLevelFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  List<NewField> newNestedFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  // Group all nested fields together by the field at the current level. For example, if adding
  // a.b, a.c, a.d
  // this map will contain a -> {a.b, a.c, a.d}.
  Multimap<String, NewField> newNestedFieldsMap =
      Multimaps.index(newNestedFields, NewField::getName);

  Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap();
  Schema.Builder builder = Schema.builder();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    Schema.Field field = inputSchema.getField(i);
    Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName());

    // If this field is a nested field and new subfields are added further down the tree, add
    // those subfields before
    // adding to the current schema. Otherwise we just add this field as is to the new schema.
    if (!nestedFields.isEmpty()) {
      nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList());

      AddFieldsInformation nestedInformation =
          getAddFieldsInformation(field.getType(), nestedFields);
      field = field.withType(nestedInformation.getOutputFieldType());
      resolvedNestedNewValues.put(i, nestedInformation);
    }
    builder.addField(field);
  }

  // Add any new fields at this level.
  List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size());
  for (NewField newField : newTopLevelFields) {
    builder.addField(newField.getName(), newField.getFieldType());
    newValuesThisLevel.add(newField.getDefaultValue());
  }

  // If there are any nested field additions left that are not already processed, that means
  // that the root of the
  // nested field doesn't exist in the schema. In this case we'll walk down the new nested
  // fields and recursively create each nested level as necessary.
  for (Map.Entry<String, Collection<NewField>> newNested :
      newNestedFieldsMap.asMap().entrySet()) {
    String fieldName = newNested.getKey();

    // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x)
    FieldAccessDescriptor.FieldDescriptor fieldDescriptor =
        Iterables.getOnlyElement(
            newNested.getValue().stream()
                .map(NewField::getFieldDescriptor)
                .distinct()
                .collect(Collectors.toList()));
    FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true);
    for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
      // The problem with adding recursive map fields is that we don't know what the map key
      // type should be.
      // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in
      // the map value.
      // Since in this code path the mapField field does not already exist this means we need to
      // create the new
      // map field, and we have no way of knowing what type the key should be.
      // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or
      // extend our selector
      // syntax to allow specifying key types.
      checkArgument(
          !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here");
      fieldType = FieldType.array(fieldType).withNullable(true);
    }
    if (!inputSchema.hasField(fieldName)) {
      // This is a brand-new nested field with no matching field in the input schema. We will
      // recursively create a nested schema to match it.
      Collection<NewField> nestedNewFields =
          newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList());
      AddFieldsInformation addFieldsInformation =
          getAddFieldsInformation(fieldType, nestedNewFields);
      builder.addField(fieldName, addFieldsInformation.getOutputFieldType());
      resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation);
    }
  }
  Schema schema = builder.build();

  List<AddFieldsInformation> nestedNewValueList =
      new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null));
  for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) {
    nestedNewValueList.set(entry.getKey(), entry.getValue());
  }
  return AddFieldsInformation.of(
      Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList);
}
 
Example #30
Source File: Coder.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Returns the {@link TypeDescriptor} for the type encoded. */
@Experimental(Kind.CODER_TYPE_ENCODING)
public TypeDescriptor<T> getEncodedTypeDescriptor() {
  return (TypeDescriptor<T>)
      TypeDescriptor.of(getClass()).resolveType(new TypeDescriptor<T>() {}.getType());
}