Java Code Examples for org.apache.beam.sdk.values.PCollectionList#of()

The following examples show how to use org.apache.beam.sdk.values.PCollectionList#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: PartitionTest.java From beam with Apache License 2.0

6 votes

@Test
@Category(NeedsRunner.class)
public void testDroppedPartition() {

  // Compute the set of integers either 1 or 2 mod 3, the hard way.
  PCollectionList<Integer> outputs =
      pipeline
          .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))
          .apply(Partition.of(3, new ModFn()));

  List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll());
  outputsList.remove(0);
  outputs = PCollectionList.of(outputsList);
  assertTrue(outputs.size() == 2);

  PCollection<Integer> output = outputs.apply(Flatten.pCollections());
  PAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
  pipeline.run();
}

Example 2

Source File: TransformTreeTest.java From beam with Apache License 2.0

6 votes

@Override
public PCollectionList<String> expand(PBegin b) {
  // Composite transform: apply delegates to other transformations,
  // here a Create transform.
  PCollection<String> result = b.apply(Create.of("hello", "world"));

  // Issue below: PCollection.createPrimitiveOutput should not be used
  // from within a composite transform.
  return PCollectionList.of(
      Arrays.asList(
          result,
          PCollection.createPrimitiveOutputInternal(
              b.getPipeline(),
              WindowingStrategy.globalDefault(),
              result.isBounded(),
              StringUtf8Coder.of())));
}

Example 3

Source File: WriteFeatureSetSpecAckTest.java From feast with Apache License 2.0

5 votes

@Test
public void shouldSendAckWhenAllSinksReady() {
  TestStream<FeatureSetReference> sink1 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .addElements(FeatureSetReference.of("project", "fs", 1))
          .addElements(FeatureSetReference.of("project", "fs", 2))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  TestStream<FeatureSetReference> sink2 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .addElements(FeatureSetReference.of("project", "fs_2", 1))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  TestStream<FeatureSetReference> sink3 =
      TestStream.create(AvroCoder.of(FeatureSetReference.class))
          .advanceProcessingTime(Duration.standardSeconds(10))
          .addElements(FeatureSetReference.of("project", "fs", 3))
          .advanceWatermarkToInfinity();

  PCollectionList<FeatureSetReference> sinks =
      PCollectionList.of(
          ImmutableList.of(
              p.apply("sink1", sink1), p.apply("sink2", sink2), p.apply("sink3", sink3)));

  PCollection<FeatureSetReference> grouped =
      sinks.apply(Flatten.pCollections()).apply(new WriteFeatureSetSpecAck.PrepareWrite(3));

  PAssert.that(grouped)
      .inOnTimePane(GlobalWindow.INSTANCE)
      .containsInAnyOrder(FeatureSetReference.of("project", "fs", 3));

  p.run();
}

Example 4

Source File: Join.java From beam with Apache License 2.0

5 votes

@Override
public PCollection<KV<KeyT, OutputT>> output(OutputHint... outputHints) {
  @SuppressWarnings("unchecked")
  final PCollectionList<Object> inputs =
      PCollectionList.of(Arrays.asList((PCollection) left, (PCollection) right));
  return OperatorTransform.apply(createOperator(), inputs);
}

Example 5

Source File: Join.java From beam with Apache License 2.0

5 votes

@Override
public PCollection<OutputT> outputValues(OutputHint... outputHints) {
  @SuppressWarnings("unchecked")
  final PCollectionList<Object> inputs =
      PCollectionList.of(Arrays.asList((PCollection) left, (PCollection) right));
  return OperatorTransform.apply(
      new OutputValues<>(name, outputType, createOperator()), inputs);
}

Example 6

Source File: BeamSqlRelUtils.java From beam with Apache License 2.0

5 votes

/** Transforms the inputs into a PInput. */
private static PCollectionList<Row> buildPCollectionList(
    List<RelNode> inputRels, Pipeline pipeline, Map<Integer, PCollection<Row>> cache) {
  if (inputRels.isEmpty()) {
    return PCollectionList.empty(pipeline);
  } else {
    return PCollectionList.of(
        inputRels.stream()
            .map(input -> BeamSqlRelUtils.toPCollection(pipeline, (BeamRelNode) input, cache))
            .collect(Collectors.toList()));
  }
}

Example 7

Source File: FlattenTest.java From beam with Apache License 2.0

5 votes

private <T> PCollectionList<T> makePCollectionList(
    Pipeline p, Coder<T> coder, List<List<T>> lists) {
  List<PCollection<T>> pcs = new ArrayList<>();
  int index = 0;
  for (List<T> list : lists) {
    PCollection<T> pc = p.apply("Create" + (index++), Create.of(list).withCoder(coder));
    pcs.add(pc);
  }
  return PCollectionList.of(pcs);
}

Example 8

Source File: TransformHierarchyTest.java From beam with Apache License 2.0

5 votes

@Test
public void emptyCompositeSucceeds() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  TransformHierarchy.Node emptyTransform =
      hierarchy.pushNode(
          "Extract",
          pcList,
          new PTransform<PCollectionList<Long>, PCollection<Long>>() {
            @Override
            public PCollection<Long> expand(PCollectionList<Long> input) {
              return input.get(0);
            }
          });
  hierarchy.setOutput(created);
  hierarchy.popNode();
  assertThat(hierarchy.getProducer(created), equalTo(node));
  assertThat(
      "A Transform that produces non-primitive output should be composite",
      emptyTransform.isCompositeNode(),
      is(true));
}

Example 9

Source File: TransformHierarchyTest.java From beam with Apache License 2.0

5 votes

@Test
public void producingOwnAndOthersOutputsFails() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  final PCollectionList<Long> appended =
      pcList.and(
          PCollection.createPrimitiveOutputInternal(
                  pipeline,
                  WindowingStrategy.globalDefault(),
                  IsBounded.BOUNDED,
                  VarLongCoder.of())
              .setName("prim"));
  hierarchy.pushNode(
      "AddPc",
      pcList,
      new PTransform<PCollectionList<Long>, PCollectionList<Long>>() {
        @Override
        public PCollectionList<Long> expand(PCollectionList<Long> input) {
          return appended;
        }
      });
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("contains a primitive POutput produced by it");
  thrown.expectMessage("AddPc");
  thrown.expectMessage("Create");
  thrown.expectMessage(appended.expand().toString());
  hierarchy.setOutput(appended);
}

Example 10

Source File: FixedInputRuntime.java From components with Apache License 2.0

5 votes

@Override
public PCollection<IndexedRecord> expand(PBegin begin) {
    FixedDatasetRuntime runtime = new FixedDatasetRuntime();
    runtime.initialize(null, properties.getDatasetProperties());

    // The values to include in the PCollection
    List<IndexedRecord> values = new LinkedList<>();

    if (properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.NONE
            || properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.APPEND) {
        if (!properties.getDatasetProperties().values.getValue().trim().isEmpty()) {
            values.addAll(runtime.getValues(Integer.MAX_VALUE));
        }
    }

    if (properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.APPEND
            || properties.overrideValuesAction.getValue() == FixedInputProperties.OverrideValuesAction.REPLACE) {
        properties.getDatasetProperties().values.setValue(properties.overrideValues.getValue());
        if (!properties.getDatasetProperties().values.getValue().trim().isEmpty()) {
            values.addAll(runtime.getValues(Integer.MAX_VALUE));
        }
    }

    if (values.size() != 0) {
        PCollection<IndexedRecord> out = (PCollection<IndexedRecord>) begin
                .apply(Create.of(values).withCoder((AvroCoder) AvroCoder.of(runtime.getSchema())));
        if (properties.repeat.getValue() > 1) {
            PCollectionList<IndexedRecord> merged = PCollectionList.of(out);
            for (int i = 2; i < properties.repeat.getValue(); i++)
                merged = merged.and(out);
            out = merged.apply(Flatten.<IndexedRecord> pCollections());
        }
        return out;
    } else {
        return begin.apply(RowGeneratorIO.read().withSchema(runtime.getSchema()) //
                .withSeed(0L) //
                .withPartitions(1) //
                .withRows(properties.repeat.getValue()));
    }
}

Example 11

Source File: BatchViewOverrides.java From beam with Apache License 2.0

4 votes

private static <K, V, W extends BoundedWindow, ViewT> PCollection<?> applyForMapLike(
    DataflowRunner runner,
    PCollection<KV<K, V>> input,
    PCollectionView<ViewT> view,
    boolean uniqueKeysExpected)
    throws NonDeterministicException {

  @SuppressWarnings("unchecked")
  Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder();

  @SuppressWarnings({"rawtypes", "unchecked"})
  KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();

  // If our key coder is deterministic, we can use the key portion of each KV
  // part of a composite key containing the window , key and index.
  inputCoder.getKeyCoder().verifyDeterministic();

  IsmRecordCoder<WindowedValue<V>> ismCoder =
      coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());

  // Create the various output tags representing the main output containing the data stream
  // and the additional outputs containing the metadata about the size and entry set.
  TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
  TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();

  // Process all the elements grouped by key hash, and sorted by key and then window
  // outputting to all the outputs defined above.
  PCollectionTuple outputTuple =
      input
          .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
          .apply(
              ParDo.of(
                      new ToIsmRecordForMapLikeDoFn<>(
                          outputForSizeTag,
                          outputForEntrySetTag,
                          windowCoder,
                          inputCoder.getKeyCoder(),
                          ismCoder,
                          uniqueKeysExpected))
                  .withOutputTags(
                      mainOutputTag,
                      TupleTagList.of(
                          ImmutableList.of(outputForSizeTag, outputForEntrySetTag))));

  // Set the coder on the main data output.
  PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
      outputTuple.get(mainOutputTag);
  perHashWithReifiedWindows.setCoder(ismCoder);

  // Set the coder on the metadata output for size and process the entries
  // producing a [META, Window, 0L] record per window storing the number of unique keys
  // for each window.
  PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
  outputForSize.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, VarLongCoder.of())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata =
      outputForSize
          .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<>())
          .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
  windowMapSizeMetadata.setCoder(ismCoder);

  // Set the coder on the metadata output destined to build the entry set and process the
  // entries producing a [META, Window, Index] record per window key pair storing the key.
  PCollection<KV<Integer, KV<W, K>>> outputForEntrySet = outputTuple.get(outputForEntrySetTag);
  outputForEntrySet.setCoder(
      KvCoder.of(VarIntCoder.of(), KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
  PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata =
      outputForEntrySet
          .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<>())
          .apply(
              ParDo.of(
                  new ToIsmMetadataRecordForKeyDoFn<K, V, W>(
                      inputCoder.getKeyCoder(), windowCoder)));
  windowMapKeysMetadata.setCoder(ismCoder);

  // Set that all these outputs should be materialized using an indexed format.
  runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
  runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
  runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);

  PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
      PCollectionList.of(
          ImmutableList.of(
              perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));

  PCollection<IsmRecord<WindowedValue<V>>> flattenedOutputs =
      Pipeline.applyTransform(outputs, Flatten.pCollections());
  flattenedOutputs.apply(CreateDataflowView.forBatch(view));
  return flattenedOutputs;
}