Java Code Examples for org.apache.beam.sdk.values.PCollectionList#of()

The following examples show how to use org.apache.beam.sdk.values.PCollectionList#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PartitionTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testDroppedPartition() {

  // Compute the set of integers either 1 or 2 mod 3, the hard way.
  PCollectionList<Integer> outputs =
      pipeline
          .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))
          .apply(Partition.of(3, new ModFn()));

  List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll());
  outputsList.remove(0);
  outputs = PCollectionList.of(outputsList);
  assertTrue(outputs.size() == 2);

  PCollection<Integer> output = outputs.apply(Flatten.pCollections());
  PAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
  pipeline.run();
}
 
Example 2
Source File: TransformTreeTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionList<String> expand(PBegin b) {
  // Composite transform: apply delegates to other transformations,
  // here a Create transform.
  PCollection<String> result = b.apply(Create.of("hello", "world"));

  // Issue below: PCollection.createPrimitiveOutput should not be used
  // from within a composite transform.
  return PCollectionList.of(
      Arrays.asList(
          result,
          PCollection.createPrimitiveOutputInternal(
              b.getPipeline(),
              WindowingStrategy.globalDefault(),
              result.isBounded(),
              StringUtf8Coder.of())));
}
 
Example 3
Source File: WriteFeatureSetSpecAckTest.java    From feast with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldSendAckWhenAllSinksReady() {
  TestStream<FeatureSetReference> sink1 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .addElements(FeatureSetReference.of("project", "fs", 1))
          .addElements(FeatureSetReference.of("project", "fs", 2))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  TestStream<FeatureSetReference> sink2 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .addElements(FeatureSetReference.of("project", "fs_2", 1))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  TestStream<FeatureSetReference> sink3 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .advanceProcessingTime(Duration.standardSeconds(10))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  PCollectionList<FeatureSetReference> sinks =
      PCollectionList.of(
          ImmutableList.of(
              p.apply("sink1", sink1), p.apply("sink2", sink2), p.apply("sink3", sink3)));

  PCollection<FeatureSetReference> grouped =
      sinks.apply(Flatten.pCollections()).apply(new WriteFeatureSetSpecAck.PrepareWrite(3));

  PAssert.that(grouped)
      .inOnTimePane(GlobalWindow.INSTANCE)
      .containsInAnyOrder(FeatureSetReference.of("project", "fs", 3));

  p.run();
}
 
Example 4
Source File: Join.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<KeyT, OutputT>> output(OutputHint... outputHints) {
  @SuppressWarnings("unchecked")
  final PCollectionList<Object> inputs =
      PCollectionList.of(Arrays.asList((PCollection) left, (PCollection) right));
  return OperatorTransform.apply(createOperator(), inputs);
}
 
Example 5
Source File: Join.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<OutputT> outputValues(OutputHint... outputHints) {
  @SuppressWarnings("unchecked")
  final PCollectionList<Object> inputs =
      PCollectionList.of(Arrays.asList((PCollection) left, (PCollection) right));
  return OperatorTransform.apply(
      new OutputValues<>(name, outputType, createOperator()), inputs);
}
 
Example 6
Source File: BeamSqlRelUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Transforms the inputs into a PInput. */
private static PCollectionList<Row> buildPCollectionList(
    List<RelNode> inputRels, Pipeline pipeline, Map<Integer, PCollection<Row>> cache) {
  if (inputRels.isEmpty()) {
    return PCollectionList.empty(pipeline);
  } else {
    return PCollectionList.of(
        inputRels.stream()
            .map(input -> BeamSqlRelUtils.toPCollection(pipeline, (BeamRelNode) input, cache))
            .collect(Collectors.toList()));
  }
}
 
Example 7
Source File: FlattenTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <T> PCollectionList<T> makePCollectionList(
    Pipeline p, Coder<T> coder, List<List<T>> lists) {
  List<PCollection<T>> pcs = new ArrayList<>();
  int index = 0;
  for (List<T> list : lists) {
    PCollection<T> pc = p.apply("Create" + (index++), Create.of(list).withCoder(coder));
    pcs.add(pc);
  }
  return PCollectionList.of(pcs);
}
 
Example 8
Source File: TransformHierarchyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void emptyCompositeSucceeds() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  TransformHierarchy.Node emptyTransform =
      hierarchy.pushNode(
          "Extract",
          pcList,
          new PTransform<PCollectionList<Long>, PCollection<Long>>() {
            @Override
            public PCollection<Long> expand(PCollectionList<Long> input) {
              return input.get(0);
            }
          });
  hierarchy.setOutput(created);
  hierarchy.popNode();
  assertThat(hierarchy.getProducer(created), equalTo(node));
  assertThat(
      "A Transform that produces non-primitive output should be composite",
      emptyTransform.isCompositeNode(),
      is(true));
}
 
Example 9
Source File: TransformHierarchyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void producingOwnAndOthersOutputsFails() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  final PCollectionList<Long> appended =
      pcList.and(
          PCollection.createPrimitiveOutputInternal(
                  pipeline,
                  WindowingStrategy.globalDefault(),
                  IsBounded.BOUNDED,
                  VarLongCoder.of())
              .setName("prim"));
  hierarchy.pushNode(
      "AddPc",
      pcList,
      new PTransform<PCollectionList<Long>, PCollectionList<Long>>() {
        @Override
        public PCollectionList<Long> expand(PCollectionList<Long> input) {
          return appended;
        }
      });
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("contains a primitive POutput produced by it");
  thrown.expectMessage("AddPc");
  thrown.expectMessage("Create");
  thrown.expectMessage(appended.expand().toString());
  hierarchy.setOutput(appended);
}
 
Example 10
Source File: FixedInputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<IndexedRecord> expand(PBegin begin) {
    FixedDatasetRuntime runtime = new FixedDatasetRuntime();
    runtime.initialize(null, properties.getDatasetProperties());

    // The values to include in the PCollection
    List<IndexedRecord> values = new LinkedList<>();

    if (properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.NONE
            || properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.APPEND) {
        if (!properties.getDatasetProperties().values.getValue().trim().isEmpty()) {
            values.addAll(runtime.getValues(Integer.MAX_VALUE));
        }
    }

    if (properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.APPEND
            || properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.REPLACE) {
        properties.getDatasetProperties().values.setValue(properties.overrideValues.getValue());
        if (!properties.getDatasetProperties().values.getValue().trim().isEmpty()) {
            values.addAll(runtime.getValues(Integer.MAX_VALUE));
        }
    }

    if (values.size() != 0) {
        PCollection<IndexedRecord> out = (PCollection<IndexedRecord>) begin
                .apply(Create.of(values).withCoder((AvroCoder) AvroCoder.of(runtime.getSchema())));
        if (properties.repeat.getValue() > 1) {
            PCollectionList<IndexedRecord> merged = PCollectionList.of(out);
            for (int i = 2; i < properties.repeat.getValue(); i++)
                merged = merged.and(out);
            out = merged.apply(Flatten.<IndexedRecord> pCollections());
        }
        return out;
    } else {
        return begin.apply(RowGeneratorIO.read().withSchema(runtime.getSchema()) //
                .withSeed(0L) //
                .withPartitions(1) //
                .withRows(properties.repeat.getValue()));
    }
}
 
Example 11
Source File: BatchViewOverrides.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, V, W extends BoundedWindow, ViewT> PCollection<?> applyForMapLike(
    DataflowRunner runner,
    PCollection<KV<K, V>> input,
    PCollectionView<ViewT> view,
    boolean uniqueKeysExpected)
    throws NonDeterministicException {

  @SuppressWarnings("unchecked")
  Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder();

  @SuppressWarnings({"rawtypes", "unchecked"})
  KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();

  // If our key coder is deterministic, we can use the key portion of each KV
  // part of a composite key containing the window , key and index.
  inputCoder.getKeyCoder().verifyDeterministic();

  IsmRecordCoder<WindowedValue<V>> ismCoder =
      coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());

  // Create the various output tags representing the main output containing the data stream
  // and the additional outputs containing the metadata about the size and entry set.
  TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();

  // Process all the elements grouped by key hash, and sorted by key and then window
  // outputting to all the outputs defined above.
  PCollectionTuple outputTuple =
      input
          .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
          .apply(
              ParDo.of(
                      new ToIsmRecordForMapLikeDoFn<>(
                          outputForSizeTag,
                          outputForEntrySetTag,
                          windowCoder,
                          inputCoder.getKeyCoder(),
                          ismCoder,
                          uniqueKeysExpected))
                  .withOutputTags(
                      mainOutputTag,
                      TupleTagList.of(
                          ImmutableList.of(outputForSizeTag, outputForEntrySetTag))));

  // Set the coder on the main data output.
  PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
      outputTuple.get(mainOutputTag);
  perHashWithReifiedWindows.setCoder(ismCoder);

  // Set the coder on the metadata output for size and process the entries
  // producing a [META, Window, 0L] record per window storing the number of unique keys
  // for each window.
  PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
  outputForSize.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, VarLongCoder.of())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata =
      outputForSize
          .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<>())
          .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
  windowMapSizeMetadata.setCoder(ismCoder);

  // Set the coder on the metadata output destined to build the entry set and process the
  // entries producing a [META, Window, Index] record per window key pair storing the key.
  PCollection<KV<Integer, KV<W, K>>> outputForEntrySet = outputTuple.get(outputForEntrySetTag);
  outputForEntrySet.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata =
      outputForEntrySet
          .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<>())
          .apply(
              ParDo.of(
                  new ToIsmMetadataRecordForKeyDoFn<K, V, W>(
                      inputCoder.getKeyCoder(), windowCoder)));
  windowMapKeysMetadata.setCoder(ismCoder);

  // Set that all these outputs should be materialized using an indexed format.
  runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
  runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
  runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);

  PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
      PCollectionList.of(
          ImmutableList.of(
              perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));

  PCollection<IsmRecord<WindowedValue<V>>> flattenedOutputs =
      Pipeline.applyTransform(outputs, Flatten.pCollections());
  flattenedOutputs.apply(CreateDataflowView.forBatch(view));
  return flattenedOutputs;
}