Java Code Examples for org.apache.beam.sdk.Pipeline#applyTransform()

The following examples show how to use org.apache.beam.sdk.Pipeline#applyTransform() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BeamSqlRelUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * A {@link BeamRelNode} is a recursive structure, the {@code BeamQueryPlanner} visits it with a
 * DFS(Depth-First-Search) algorithm.
 */
static PCollection<Row> toPCollection(
    Pipeline pipeline, BeamRelNode node, Map<Integer, PCollection<Row>> cache) {
  PCollection<Row> output = cache.get(node.getId());
  if (output != null) {
    return output;
  }

  String name = node.getClass().getSimpleName() + "_" + node.getId();
  PCollectionList<Row> input = buildPCollectionList(node.getPCollectionInputs(), pipeline, cache);
  PTransform<PCollectionList<Row>, PCollection<Row>> transform = node.buildPTransform();
  output = Pipeline.applyTransform(name, input, transform);

  cache.put(node.getId(), output);
  return output;
}
 
Example 2
Source File: KeyedPCollectionTuple.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Like {@link #apply(String, PTransform)} but defaulting to the name provided by the {@link
 * PTransform}.
 */
public <OutputT extends POutput> OutputT apply(
    PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
  return Pipeline.applyTransform(this, transform);
}
 
Example 3
Source File: PBegin.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Like {@link #apply(String, PTransform)} but defaulting to the name of the {@link PTransform}.
 */
public <OutputT extends POutput> OutputT apply(PTransform<? super PBegin, OutputT> t) {
  return Pipeline.applyTransform(this, t);
}
 
Example 4
Source File: PCollectionList.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Like {@link #apply(String, PTransform)} but defaulting to the name of the {@code PTransform}.
 */
public <OutputT extends POutput> OutputT apply(PTransform<PCollectionList<T>, OutputT> t) {
  return Pipeline.applyTransform(this, t);
}
 
Example 5
Source File: BatchViewOverrides.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, V, W extends BoundedWindow, ViewT> PCollection<?> applyForMapLike(
    DataflowRunner runner,
    PCollection<KV<K, V>> input,
    PCollectionView<ViewT> view,
    boolean uniqueKeysExpected)
    throws NonDeterministicException {

  @SuppressWarnings("unchecked")
  Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder();

  @SuppressWarnings({"rawtypes", "unchecked"})
  KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();

  // If our key coder is deterministic, we can use the key portion of each KV
  // part of a composite key containing the window , key and index.
  inputCoder.getKeyCoder().verifyDeterministic();

  IsmRecordCoder<WindowedValue<V>> ismCoder =
      coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());

  // Create the various output tags representing the main output containing the data stream
  // and the additional outputs containing the metadata about the size and entry set.
  TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();

  // Process all the elements grouped by key hash, and sorted by key and then window
  // outputting to all the outputs defined above.
  PCollectionTuple outputTuple =
      input
          .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
          .apply(
              ParDo.of(
                      new ToIsmRecordForMapLikeDoFn<>(
                          outputForSizeTag,
                          outputForEntrySetTag,
                          windowCoder,
                          inputCoder.getKeyCoder(),
                          ismCoder,
                          uniqueKeysExpected))
                  .withOutputTags(
                      mainOutputTag,
                      TupleTagList.of(
                          ImmutableList.of(outputForSizeTag, outputForEntrySetTag))));

  // Set the coder on the main data output.
  PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
      outputTuple.get(mainOutputTag);
  perHashWithReifiedWindows.setCoder(ismCoder);

  // Set the coder on the metadata output for size and process the entries
  // producing a [META, Window, 0L] record per window storing the number of unique keys
  // for each window.
  PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
  outputForSize.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, VarLongCoder.of())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata =
      outputForSize
          .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<>())
          .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
  windowMapSizeMetadata.setCoder(ismCoder);

  // Set the coder on the metadata output destined to build the entry set and process the
  // entries producing a [META, Window, Index] record per window key pair storing the key.
  PCollection<KV<Integer, KV<W, K>>> outputForEntrySet = outputTuple.get(outputForEntrySetTag);
  outputForEntrySet.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata =
      outputForEntrySet
          .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<>())
          .apply(
              ParDo.of(
                  new ToIsmMetadataRecordForKeyDoFn<K, V, W>(
                      inputCoder.getKeyCoder(), windowCoder)));
  windowMapKeysMetadata.setCoder(ismCoder);

  // Set that all these outputs should be materialized using an indexed format.
  runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
  runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
  runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);

  PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
      PCollectionList.of(
          ImmutableList.of(
              perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));

  PCollection<IsmRecord<WindowedValue<V>>> flattenedOutputs =
      Pipeline.applyTransform(outputs, Flatten.pCollections());
  flattenedOutputs.apply(CreateDataflowView.forBatch(view));
  return flattenedOutputs;
}
 
Example 6
Source File: KeyedPCollectionTuple.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Applies the given {@link PTransform} to this input {@code KeyedPCollectionTuple} and returns
 * its {@code OutputT}. This uses {@code name} to identify the specific application of the
 * transform. This name is used in various places, including the monitoring UI, logging, and to
 * stably identify this application node in the job graph.
 */
public <OutputT extends POutput> OutputT apply(
    String name, PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
  return Pipeline.applyTransform(name, this, transform);
}
 
Example 7
Source File: PBegin.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Applies the given {@link PTransform} to this {@link PBegin}, using {@code name} to identify
 * this specific application of the transform.
 *
 * <p>This name is used in various places, including the monitoring UI, logging, and to stably
 * identify this application node in the job graph.
 */
public <OutputT extends POutput> OutputT apply(
    String name, PTransform<? super PBegin, OutputT> t) {
  return Pipeline.applyTransform(name, this, t);
}
 
Example 8
Source File: PCollection.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * of the {@link PTransform}.
 *
 * @return the output of the applied {@link PTransform}
 */
public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>, OutputT> t) {
  return Pipeline.applyTransform(this, t);
}
 
Example 9
Source File: PCollection.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Applies the given {@link PTransform} to this input {@link PCollection}, using {@code name} to
 * identify this specific application of the transform. This name is used in various places,
 * including the monitoring UI, logging, and to stably identify this application node in the job
 * graph.
 *
 * @return the output of the applied {@link PTransform}
 */
public <OutputT extends POutput> OutputT apply(
    String name, PTransform<? super PCollection<T>, OutputT> t) {
  return Pipeline.applyTransform(name, this, t);
}
 
Example 10
Source File: PCollectionList.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Applies the given {@link PTransform} to this input {@link PCollectionList}, using {@code name}
 * to identify this specific application of the transform. This name is used in various places,
 * including the monitoring UI, logging, and to stably identify this application node in the job
 * graph.
 *
 * @return the output of the applied {@link PTransform}
 */
public <OutputT extends POutput> OutputT apply(
    String name, PTransform<PCollectionList<T>, OutputT> t) {
  return Pipeline.applyTransform(name, this, t);
}
 
Example 11
Source File: PCollectionTuple.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Like {@link #apply(String, PTransform)} but defaulting to the name of the {@link PTransform}.
 *
 * @return the output of the applied {@link PTransform}
 */
public <OutputT extends POutput> OutputT apply(PTransform<? super PCollectionTuple, OutputT> t) {
  return Pipeline.applyTransform(this, t);
}
 
Example 12
Source File: PCollectionTuple.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Applies the given {@link PTransform} to this input {@link PCollectionTuple}, using {@code name}
 * to identify this specific application of the transform. This name is used in various places,
 * including the monitoring UI, logging, and to stably identify this application node in the job
 * graph.
 *
 * @return the output of the applied {@link PTransform}
 */
public <OutputT extends POutput> OutputT apply(
    String name, PTransform<? super PCollectionTuple, OutputT> t) {
  return Pipeline.applyTransform(name, this, t);
}