org.apache.beam.sdk.transforms.Contextful Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.Contextful. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<MatchResult.Metadata> expand(PCollection<String> input) {
  PCollection<MatchResult.Metadata> res;
  if (getConfiguration().getWatchInterval() == null) {
    res =
        input.apply(
            "Match filepatterns",
            ParDo.of(new MatchFn(getConfiguration().getEmptyMatchTreatment())));
  } else {
    res =
        input
            .apply(
                "Continuously match filepatterns",
                Watch.growthOf(
                        Contextful.of(new MatchPollFn(), Requirements.empty()),
                        new ExtractFilenameFn())
                    .withPollInterval(getConfiguration().getWatchInterval())
                    .withTerminationPerInput(getConfiguration().getWatchTerminationCondition()))
            .apply(Values.create());
  }
  return res.apply(Reshuffle.viaRandomKey());
}
 
Example #2
Source File: Write.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public WithFailures.Result<PDone, PubsubMessage> expand(PCollection<PubsubMessage> input) {
  ValueProvider<DynamicPathTemplate> pathTemplate = NestedValueProvider.of(outputPrefix,
      DynamicPathTemplate::new);
  ValueProvider<String> staticPrefix = NestedValueProvider.of(pathTemplate,
      value -> value.staticPrefix);

  FileIO.Write<List<String>, PubsubMessage> write = FileIO
      .<List<String>, PubsubMessage>writeDynamic()
      // We can't pass the attribute map to by() directly since MapCoder isn't
      // deterministic;
      // instead, we extract an ordered list of the needed placeholder values.
      // That list is later available to withNaming() to determine output location.
      .by(message -> pathTemplate.get()
          .extractValuesFrom(DerivedAttributesMap.of(message.getAttributeMap())))
      .withDestinationCoder(ListCoder.of(StringUtf8Coder.of())) //
      .withCompression(compression) //
      .via(Contextful.fn(format::encodeSingleMessage), TextIO.sink()) //
      .to(staticPrefix) //
      .withNaming(placeholderValues -> NoColonFileNaming.defaultNaming(
          pathTemplate.get().replaceDynamicPart(placeholderValues), format.suffix()));

  if (inputType == InputType.pubsub) {
    // Passing a ValueProvider to withNumShards disables runner-determined sharding, so we
    // need to be careful to pass this only for streaming input (where runner-determined
    // sharding is not an option).
    write = write.withNumShards(numShards);
  }

  input //
      .apply(Window.<PubsubMessage>into(FixedWindows.of(windowDuration))
          // We allow lateness up to the maximum Cloud Pub/Sub retention of 7 days documented in
          // https://cloud.google.com/pubsub/docs/subscriber
          .withAllowedLateness(Duration.standardDays(7)) //
          .discardingFiredPanes())
      .apply(write);
  return WithFailures.Result.of(PDone.in(input.getPipeline()),
      EmptyErrors.in(input.getPipeline()));
}
 
Example #3
Source File: ParseJsons.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public WithFailures.Result<PCollection<OutputT>, FailureT> expand(PCollection<String> input) {
  return input.apply(
      MapElements.into(new TypeDescriptor<OutputT>() {})
          .via(
              Contextful.fn(
                  (Contextful.Fn<String, OutputT>) (input1, c) -> readValue(input1),
                  Requirements.empty()))
          .exceptionsInto(failureType)
          .exceptionsVia(exceptionHandler));
}
 
Example #4
Source File: AsJsons.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public WithFailures.Result<PCollection<String>, FailureT> expand(PCollection<InputT> input) {
  return input.apply(
      MapElements.into(TypeDescriptors.strings())
          .via(
              Contextful.fn(
                  (Contextful.Fn<InputT, String>) (input1, c) -> writeValue(input1),
                  Requirements.empty()))
          .exceptionsInto(failureType)
          .exceptionsVia(exceptionHandler));
}
 
Example #5
Source File: TypeDescriptors.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link #inputOf(ProcessFunction)} but for {@link Contextful.Fn}. */
public static <InputT, OutputT> TypeDescriptor<InputT> inputOf(
    Contextful.Fn<InputT, OutputT> fn) {
  return TypeDescriptors.extractFromTypeParameters(
      fn,
      Contextful.Fn.class,
      new TypeDescriptors.TypeVariableExtractor<Contextful.Fn<InputT, OutputT>, InputT>() {});
}
 
Example #6
Source File: TypeDescriptors.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link #outputOf(ProcessFunction)} but for {@link Contextful.Fn}. */
public static <InputT, OutputT> TypeDescriptor<OutputT> outputOf(
    Contextful.Fn<InputT, OutputT> fn) {
  return TypeDescriptors.extractFromTypeParameters(
      fn,
      Contextful.Fn.class,
      new TypeDescriptors.TypeVariableExtractor<Contextful.Fn<InputT, OutputT>, OutputT>() {});
}
 
Example #7
Source File: FileIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testFileIoDynamicNaming() throws IOException {
  // Test for BEAM-6407.

  String outputFileName = tmpFolder.newFile().getAbsolutePath();
  PCollectionView<String> outputFileNameView =
      p.apply("outputFileName", Create.of(outputFileName)).apply(View.asSingleton());

  Contextful.Fn<String, FileIO.Write.FileNaming> fileNaming =
      (element, c) ->
          (window, pane, numShards, shardIndex, compression) ->
              c.sideInput(outputFileNameView) + "-" + shardIndex;

  p.apply(Create.of(""))
      .apply(
          "WriteDynamicFilename",
          FileIO.<String, String>writeDynamic()
              .by(SerializableFunctions.constant(""))
              .withDestinationCoder(StringUtf8Coder.of())
              .via(TextIO.sink())
              .withTempDirectory(tmpFolder.newFolder().getAbsolutePath())
              .withNaming(
                  Contextful.of(
                      fileNaming, Requirements.requiresSideInputs(outputFileNameView))));

  // We need to run the TestPipeline with the default options.
  p.run(PipelineOptionsFactory.create()).waitUntilFinish();
  assertTrue(
      "Output file shard 0 exists after pipeline completes",
      new File(outputFileName + "-0").exists());
}
 
Example #8
Source File: FileIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Like {@link #via(Contextful, Contextful)}, but the output type of the sink is the same as the
 * type of the input collection. The sink function must create a new {@link Sink} instance every
 * time it is called.
 */
public Write<DestinationT, UserT> via(Contextful<Fn<DestinationT, Sink<UserT>>> sinkFn) {
  checkArgument(sinkFn != null, "sinkFn can not be null");
  return toBuilder()
      .setSinkFn((Contextful) sinkFn)
      .setOutputFn(fn(SerializableFunctions.<UserT>identity()))
      .build();
}
 
Example #9
Source File: FileIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link #via(Contextful, Contextful)}, but uses the same sink for all destinations. */
public <OutputT> Write<DestinationT, UserT> via(
    Contextful<Fn<UserT, OutputT>> outputFn, final Sink<OutputT> sink) {
  checkArgument(sink != null, "sink can not be null");
  checkArgument(outputFn != null, "outputFn can not be null");
  return via(outputFn, fn(SerializableFunctions.clonesOf(sink)));
}
 
Example #10
Source File: FileIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Specifies how to create a {@link Sink} for a particular destination and how to map the
 * element type to the sink's output type. The sink function must create a new {@link Sink}
 * instance every time it is called.
 */
public <OutputT> Write<DestinationT, UserT> via(
    Contextful<Fn<UserT, OutputT>> outputFn,
    Contextful<Fn<DestinationT, Sink<OutputT>>> sinkFn) {
  checkArgument(sinkFn != null, "sinkFn can not be null");
  checkArgument(outputFn != null, "outputFn can not be null");
  return toBuilder().setSinkFn((Contextful) sinkFn).setOutputFn(outputFn).build();
}
 
Example #11
Source File: Write.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public WithFailures.Result<PDone, PubsubMessage> expand(PCollection<PubsubMessage> input) {
  ValueProvider<String> staticPrefix = NestedValueProvider.of(pathTemplate,
      value -> value.staticPrefix);

  List<String> placeholders = pathTemplate.get().getPlaceholderNames();
  if (!placeholders
      .containsAll(Arrays.asList("document_namespace", "document_type", "document_version"))) {
    throw new RuntimeException(
        "Path template must contain document namespace, type, and version");
  }

  AvroEncoder encoder = new AvroEncoder();

  // A ParDo is opted over a PTransform extending MapElementsWithErrors.
  // While this leads to manual error handling with output-tags, this allows
  // for side-input of the singleton SchemaStore PCollection.
  ParDo.MultiOutput<PubsubMessage, PubsubMessage> encodePayloadAsAvro = ParDo.of(encoder)
      .withOutputTags(successTag, TupleTagList.of(errorTag));

  FileIO.Write<List<String>, PubsubMessage> write = FileIO
      .<List<String>, PubsubMessage>writeDynamic() //
      .by(message -> pathTemplate.get().extractValuesFrom(message.getAttributeMap()))
      .withDestinationCoder(ListCoder.of(StringUtf8Coder.of())) //
      .withCompression(compression) //
      .via(Contextful.fn(encoder::getSink)) //
      .to(staticPrefix) //
      .withNaming(placeholderValues -> NoColonFileNaming
          .defaultNaming(pathTemplate.get().replaceDynamicPart(placeholderValues), ".avro"));

  if (inputType == InputType.pubsub) {
    // Passing a ValueProvider to withNumShards disables runner-determined sharding, so we
    // need to be careful to pass this only for streaming input (where runner-determined
    // sharding is not an option).
    write = write.withNumShards(numShards);
  }

  // Without this, we may run into `Inputs to Flatten had incompatible window windowFns`
  Window<PubsubMessage> window = Window.<PubsubMessage>into(FixedWindows.of(windowDuration))
      // We allow lateness up to the maximum Cloud Pub/Sub retention of 7 days documented in
      // https://cloud.google.com/pubsub/docs/subscriber
      .withAllowedLateness(Duration.standardDays(7)) //
      .discardingFiredPanes();

  PCollectionTuple results = input.apply("encodePayloadAsAvro", encodePayloadAsAvro);
  results.get(successTag).apply(window).apply(write);

  return WithFailures.Result.of(PDone.in(input.getPipeline()), results.get(errorTag));
}
 
Example #12
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
Contextful<Fn<DestinationT, FileNaming>> resolveFileNamingFn() {
  if (getDynamic()) {
    checkArgument(
        getConstantFileNaming() == null,
        "when using writeDynamic(), must use versions of .withNaming() "
            + "that take functions from DestinationT");
    checkArgument(getFilenamePrefix() == null, ".withPrefix() requires write()");
    checkArgument(getFilenameSuffix() == null, ".withSuffix() requires write()");
    checkArgument(
        getFileNamingFn() != null,
        "when using writeDynamic(), must specify "
            + ".withNaming() taking a function form DestinationT");
    return fn(
        (element, c) -> {
          FileNaming naming = getFileNamingFn().getClosure().apply(element, c);
          return getOutputDirectory() == null
              ? naming
              : relativeFileNaming(getOutputDirectory(), naming);
        },
        getFileNamingFn().getRequirements());
  } else {
    checkArgument(
        getFileNamingFn() == null,
        ".withNaming() taking a function from DestinationT requires writeDynamic()");
    FileNaming constantFileNaming;
    if (getConstantFileNaming() == null) {
      constantFileNaming =
          defaultNaming(
              MoreObjects.firstNonNull(getFilenamePrefix(), StaticValueProvider.of("output")),
              MoreObjects.firstNonNull(getFilenameSuffix(), StaticValueProvider.of("")));
    } else {
      checkArgument(
          getFilenamePrefix() == null, ".to(FileNaming) is incompatible with .withSuffix()");
      checkArgument(
          getFilenameSuffix() == null, ".to(FileNaming) is incompatible with .withPrefix()");
      constantFileNaming = getConstantFileNaming();
    }
    if (getOutputDirectory() != null) {
      constantFileNaming = relativeFileNaming(getOutputDirectory(), constantFileNaming);
    }
    return fn(SerializableFunctions.<DestinationT, FileNaming>constant(constantFileNaming));
  }
}
 
Example #13
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Like {@link #withNaming(SerializableFunction)} but allows accessing context, such as side
 * inputs, from the function.
 */
public Write<DestinationT, UserT> withNaming(
    Contextful<Fn<DestinationT, FileNaming>> namingFn) {
  checkArgument(namingFn != null, "namingFn can not be null");
  return toBuilder().setFileNamingFn(namingFn).build();
}
 
Example #14
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Like {@link #by}, but with access to context such as side inputs. */
public Write<DestinationT, UserT> by(Contextful<Fn<UserT, DestinationT>> destinationFn) {
  checkArgument(destinationFn != null, "destinationFn can not be null");
  return toBuilder().setDestinationFn(destinationFn).build();
}
 
Example #15
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
abstract Builder<DestinationT, UserT> setFileNamingFn(
Contextful<Fn<DestinationT, FileNaming>> namingFn);
 
Example #16
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
abstract Builder<DestinationT, UserT> setDestinationFn(
Contextful<Fn<UserT, DestinationT>> destinationFn);
 
Example #17
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
abstract Contextful<Fn<DestinationT, FileNaming>> getFileNamingFn();
 
Example #18
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
abstract Contextful<Fn<UserT, DestinationT>> getDestinationFn();
 
Example #19
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
abstract Contextful<Fn<UserT, ?>> getOutputFn();
 
Example #20
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
abstract Contextful<Fn<DestinationT, Sink<?>>> getSinkFn();
 
Example #21
Source File: FileIO.java    From beam with Apache License 2.0 votes vote down vote up
abstract Builder<DestinationT, UserT> setOutputFn(Contextful<Fn<UserT, ?>> outputFn); 
Example #22
Source File: FileIO.java    From beam with Apache License 2.0 votes vote down vote up
abstract Builder<DestinationT, UserT> setSinkFn(Contextful<Fn<DestinationT, Sink<?>>> sink);