Java Code Examples for org.apache.beam.sdk.values.TupleTag

The following examples show how to use org.apache.beam.sdk.values.TupleTag. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: DoFnOperator.java    License: Apache License 2.0 6 votes vote down vote up
BufferedOutputManager(
    Output<StreamRecord<WindowedValue<OutputT>>> output,
    TupleTag<OutputT> mainTag,
    Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags,
    Map<TupleTag<?>, Integer> tagsToIds,
    Lock bufferLock,
    PushedBackElementsHandler<KV<Integer, WindowedValue<?>>> pushedBackElementsHandler) {
  this.output = output;
  this.mainTag = mainTag;
  this.tagsToOutputTags = tagsToOutputTags;
  this.tagsToIds = tagsToIds;
  this.bufferLock = bufferLock;
  this.idsToTags = new HashMap<>();
  for (Map.Entry<TupleTag<?>, Integer> entry : tagsToIds.entrySet()) {
    idsToTags.put(entry.getValue(), entry.getKey());
  }
  this.pushedBackElementsHandler = pushedBackElementsHandler;
}
 
Example 2
Source Project: beam   Source File: PTransformMatchersTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void flattenWithDuplicateInputsWithoutDuplicates() {
  AppliedPTransform application =
      AppliedPTransform.of(
          "Flatten",
          Collections.singletonMap(
              new TupleTag<Integer>(),
              PCollection.createPrimitiveOutputInternal(
                  p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
          Collections.singletonMap(
              new TupleTag<Integer>(),
              PCollection.createPrimitiveOutputInternal(
                  p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
          Flatten.pCollections(),
          p);

  assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(false));
}
 
Example 3
Source Project: beam   Source File: ExecutableStageDoFnOperator.java    License: Apache License 2.0 6 votes vote down vote up
public SdkHarnessDoFnRunner(
    DoFn<InputT, OutputT> doFn,
    StageBundleFactory stageBundleFactory,
    StateRequestHandler stateRequestHandler,
    BundleProgressHandler progressHandler,
    BufferedOutputManager<OutputT> outputManager,
    Map<String, TupleTag<?>> outputMap,
    Coder<BoundedWindow> windowCoder,
    BiConsumer<Timer<?>, TimerInternals.TimerData> timerRegistration,
    Supplier<Object> keyForTimer) {

  this.doFn = doFn;
  this.stageBundleFactory = stageBundleFactory;
  this.stateRequestHandler = stateRequestHandler;
  this.progressHandler = progressHandler;
  this.outputManager = outputManager;
  this.outputMap = outputMap;
  this.timerRegistration = timerRegistration;
  this.keyForTimer = keyForTimer;
  this.windowCoder = windowCoder;
  this.outputQueue = new LinkedBlockingQueue<>();
}
 
Example 4
Source Project: beam   Source File: ToIsmRecordForMultimapDoFnFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ParDoFn create(
    PipelineOptions options,
    CloudObject cloudUserFn,
    List<SideInputInfo> sideInputInfos,
    TupleTag<?> mainOutputTag,
    Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  Coder<?> coder =
      CloudObjects.coderFromCloudObject(
          CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
  checkState(
      coder instanceof IsmRecordCoder,
      "Expected to received an instanceof an %s but got %s",
      IsmRecordCoder.class.getSimpleName(),
      coder);
  IsmRecordCoder<?> ismRecordCoder = (IsmRecordCoder<?>) coder;
  return new ToIsmRecordForMultimapParDoFn(
      KvCoder.of(
          ismRecordCoder.getCoderArguments().get(0), ismRecordCoder.getCoderArguments().get(1)));
}
 
Example 5
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testTaggedOutputUnknownCoder() throws Exception {

  PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));

  final TupleTag<Integer> mainOutputTag = new TupleTag<>("main");
  final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unknownSide");
  input.apply(
      ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag))
          .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));

  thrown.expect(IllegalStateException.class);
  thrown.expectMessage("Unable to return a default Coder");
  pipeline.run();
}
 
Example 6
Source Project: beam   Source File: Partition.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructs a PartitionDoFn.
 *
 * @throws IllegalArgumentException if {@code numPartitions <= 0}
 */
private PartitionDoFn(
    int numPartitions,
    Contextful<Contextful.Fn<X, Integer>> ctxFn,
    Object originalFnClassForDisplayData) {
  this.ctxFn = ctxFn;
  this.originalFnClassForDisplayData = originalFnClassForDisplayData;
  if (numPartitions <= 0) {
    throw new IllegalArgumentException("numPartitions must be > 0");
  }

  this.numPartitions = numPartitions;

  TupleTagList buildOutputTags = TupleTagList.empty();
  for (int partition = 0; partition < numPartitions; partition++) {
    buildOutputTags = buildOutputTags.and(new TupleTag<X>());
  }
  outputTags = buildOutputTags;
}
 
Example 7
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testParDoWithEmptyTaggedOutput() {
  TupleTag<String> mainOutputTag = new TupleTag<String>("main") {};
  TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {};
  TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {};

  PCollectionTuple outputs =
      pipeline
          .apply(Create.empty(VarIntCoder.of()))
          .apply(
              ParDo.of(new TestNoOutputDoFn())
                  .withOutputTags(
                      mainOutputTag,
                      TupleTagList.of(additionalOutputTag1).and(additionalOutputTag2)));

  PAssert.that(outputs.get(mainOutputTag)).empty();

  PAssert.that(outputs.get(additionalOutputTag1)).empty();
  PAssert.that(outputs.get(additionalOutputTag2)).empty();

  pipeline.run();
}
 
Example 8
Source Project: beam   Source File: TransformTranslator.java    License: Apache License 2.0 6 votes vote down vote up
private static <K, V, OutputT>
    PairFlatMapFunction<Iterator<Tuple2<ByteArray, byte[]>>, TupleTag<?>, WindowedValue<?>>
        wrapDoFnFromSortedRDD(
            MultiDoFnFunction<KV<K, V>, OutputT> doFnFunction,
            Coder<K> keyCoder,
            Coder<WindowedValue<V>> wvCoder) {

  return (Iterator<Tuple2<ByteArray, byte[]>> in) -> {
    Iterator<Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>>> mappedGroups;
    mappedGroups =
        Iterators.transform(
            splitBySameKey(in, keyCoder, wvCoder),
            group -> {
              try {
                return doFnFunction.call(group);
              } catch (Exception ex) {
                throw new RuntimeException(ex);
              }
            });
    return flatten(mappedGroups);
  };
}
 
Example 9
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testMainOutputUnregisteredExplicitCoder() {

  PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));

  final TupleTag<TestDummy> mainOutputTag = new TupleTag<>("unregisteredMain");
  final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {};
  PCollectionTuple outputTuple =
      input.apply(
          ParDo.of(new MainOutputDummyFn(mainOutputTag, additionalOutputTag))
              .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));

  outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder());

  pipeline.run();
}
 
Example 10
Source Project: beam   Source File: WritePartition.java    License: Apache License 2.0 6 votes vote down vote up
WritePartition(
    boolean singletonTable,
    DynamicDestinations<?, DestinationT> dynamicDestinations,
    PCollectionView<String> tempFilePrefix,
    int maxNumFiles,
    long maxSizeBytes,
    TupleTag<KV<ShardedKey<DestinationT>, List<String>>> multiPartitionsTag,
    TupleTag<KV<ShardedKey<DestinationT>, List<String>>> singlePartitionTag,
    RowWriterFactory<?, DestinationT> rowWriterFactory) {
  this.singletonTable = singletonTable;
  this.dynamicDestinations = dynamicDestinations;
  this.tempFilePrefix = tempFilePrefix;
  this.maxNumFiles = maxNumFiles;
  this.maxSizeBytes = maxSizeBytes;
  this.multiPartitionsTag = multiPartitionsTag;
  this.singlePartitionTag = singlePartitionTag;
  this.rowWriterFactory = rowWriterFactory;
}
 
Example 11
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWithOutputTagsDisplayData() {
  DoFn<String, String> fn =
      new DoFn<String, String>() {
        @ProcessElement
        public void proccessElement(ProcessContext c) {}

        @Override
        public void populateDisplayData(Builder builder) {
          builder.add(DisplayData.item("fnMetadata", "foobar"));
        }
      };

  ParDo.MultiOutput<String, String> parDo =
      ParDo.of(fn).withOutputTags(new TupleTag<>(), TupleTagList.empty());

  DisplayData displayData = DisplayData.from(parDo);
  assertThat(displayData, includesDisplayDataFor("fn", fn));
  assertThat(displayData, hasDisplayItem("fn", fn.getClass()));
}
 
Example 12
Source Project: beam   Source File: TransformInputsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void nonAdditionalInputsWithOnlyAdditionalInputsThrows() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional-only",
          additionalInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("at least one");
  TransformInputs.nonAdditionalInputs(transform);
}
 
Example 13
Source Project: beam   Source File: PTransformMatchersTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void flattenWithDuplicateInputsWithDuplicates() {
  PCollection<Integer> duplicate =
      PCollection.createPrimitiveOutputInternal(
          p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of());
  AppliedPTransform application =
      AppliedPTransform.of(
          "Flatten",
          ImmutableMap.<TupleTag<?>, PValue>builder()
              .put(new TupleTag<Integer>(), duplicate)
              .put(new TupleTag<Integer>(), duplicate)
              .build(),
          Collections.singletonMap(
              new TupleTag<Integer>(),
              PCollection.createPrimitiveOutputInternal(
                  p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
          Flatten.pCollections(),
          p);

  assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(true));
}
 
Example 14
Source Project: beam   Source File: CoGroupByKeyTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesSideInputs.class})
public void testCoGroupByKeyGetOnly() {
  final TupleTag<String> tag1 = new TupleTag<>();
  final TupleTag<String> tag2 = new TupleTag<>();

  PCollection<KV<Integer, CoGbkResult>> coGbkResults = buildGetOnlyGbk(p, tag1, tag2);

  PAssert.thatMap(coGbkResults)
      .satisfies(
          results -> {
            assertEquals("collection1-1", results.get(1).getOnly(tag1));
            assertEquals("collection1-2", results.get(2).getOnly(tag1));
            assertEquals("collection2-2", results.get(2).getOnly(tag2));
            assertEquals("collection2-3", results.get(3).getOnly(tag2));
            return null;
          });

  p.run();
}
 
Example 15
Source Project: beam   Source File: TransformHierarchy.java    License: Apache License 2.0 6 votes vote down vote up
@Internal
public Node addFinalizedPrimitiveNode(
    String name,
    Map<TupleTag<?>, PValue> inputs,
    PTransform<?, ?> transform,
    Map<TupleTag<?>, PValue> outputs) {
  checkNotNull(
      transform, "A %s must be provided for all Nodes", PTransform.class.getSimpleName());
  checkNotNull(
      name, "A name must be provided for all %s Nodes", PTransform.class.getSimpleName());
  checkNotNull(
      inputs, "Inputs must be provided for all %s Nodes", PTransform.class.getSimpleName());
  checkNotNull(
      outputs, "Outputs must be provided for all %s Nodes", PTransform.class.getSimpleName());
  Node node = new Node(current, transform, name, inputs, outputs);
  node.finishedSpecifying = true;
  for (PValue output : outputs.values()) {
    producers.put(output, node);
  }
  current.addComposite(node);
  return node;
}
 
Example 16
Source Project: beam   Source File: SplittableParDo.java    License: Apache License 2.0 6 votes vote down vote up
public static <OutputT> PCollectionTuple createPrimitiveOutputFor(
    PCollection<?> input,
    DoFn<?, OutputT> fn,
    TupleTag<OutputT> mainOutputTag,
    TupleTagList additionalOutputTags,
    Map<TupleTag<?>, Coder<?>> outputTagsToCoders,
    WindowingStrategy<?, ?> windowingStrategy) {
  DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
  PCollectionTuple outputs =
      PCollectionTuple.ofPrimitiveOutputsInternal(
          input.getPipeline(),
          TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()),
          outputTagsToCoders,
          windowingStrategy,
          input.isBounded().and(signature.isBoundedPerElement()));

  // Set output type descriptor similarly to how ParDo.MultiOutput does it.
  outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor());

  return outputs;
}
 
Example 17
Source Project: beam   Source File: IntrinsicMapTaskExecutorFactory.java    License: Apache License 2.0 5 votes vote down vote up
private OperationNode createParDoOperation(
    Network<Node, Edge> network,
    ParallelInstructionNode node,
    PipelineOptions options,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {

  ParallelInstruction instruction = node.getParallelInstruction();
  ParDoInstruction parDo = instruction.getParDo();

  TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
  ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder =
      ImmutableMap.builder();
  int successorOffset = 0;
  for (Node successor : network.successors(node)) {
    for (Edge edge : network.edgesConnecting(node, successor)) {
      outputTagsToReceiverIndicesBuilder.put(
          tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
    }
    successorOffset += 1;
  }
  ParDoFn fn =
      parDoFnFactory.create(
          options,
          CloudObject.fromSpec(parDo.getUserFn()),
          parDo.getSideInputs(),
          mainOutputTag,
          outputTagsToReceiverIndicesBuilder.build(),
          executionContext,
          operationContext);

  OutputReceiver[] receivers = getOutputReceivers(network, node);
  return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
 
Example 18
@Override
public <T> void outputWithTimestamp(TupleTag<T> tag, T value, Instant timestamp) {
  noteOutput();
  if (watermarkEstimator instanceof TimestampObservingWatermarkEstimator) {
    ((TimestampObservingWatermarkEstimator) watermarkEstimator).observeTimestamp(timestamp);
  }
  output.outputWindowedValue(tag, value, timestamp, element.getWindows(), element.getPane());
}
 
Example 19
Source Project: beam   Source File: PipelineTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Map<PValue, ReplacementOutput> mapOutputs(
    Map<TupleTag<?>, PValue> outputs, PCollection<Long> newOutput) {
  Map.Entry<TupleTag<?>, PValue> original = Iterables.getOnlyElement(outputs.entrySet());
  Map.Entry<TupleTag<?>, PValue> replacement =
      Iterables.getOnlyElement(newOutput.expand().entrySet());
  return Collections.singletonMap(
      newOutput,
      ReplacementOutput.of(
          TaggedPValue.of(original.getKey(), original.getValue()),
          TaggedPValue.of(replacement.getKey(), replacement.getValue())));
}
 
Example 20
Source Project: beam   Source File: DoFnTester.java    License: Apache License 2.0 5 votes vote down vote up
/** @deprecated Use {@link TestPipeline} with the {@code DirectRunner}. */
@Deprecated
public <T> List<T> peekOutputElements(TupleTag<T> tag) {
  // TODO: Should we return an unmodifiable list?
  return getImmutableOutput(tag).stream()
      .map(ValueInSingleWindow::getValue)
      .collect(Collectors.toList());
}
 
Example 21
Source Project: beam   Source File: KeyedPCollectionTuple.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Expands the component {@link PCollection PCollections}, stripping off any tag-specific
 * information.
 */
@Override
public Map<TupleTag<?>, PValue> expand() {
  ImmutableMap.Builder<TupleTag<?>, PValue> retval = ImmutableMap.builder();
  for (TaggedKeyedPCollection<K, ?> taggedPCollection : keyedCollections) {
    retval.put(taggedPCollection.tupleTag, taggedPCollection.pCollection);
  }
  return retval.build();
}
 
Example 22
Source Project: beam   Source File: TranslationUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns a pair function to convert bytes to value via coder.
 *
 * @param coderMap - mapping between TupleTag and a coder
 * @return a pair function to convert bytes to value via coder
 */
public static PairFunction<
        Tuple2<TupleTag<?>, ValueAndCoderLazySerializable<WindowedValue<?>>>,
        TupleTag<?>,
        WindowedValue<?>>
    getTupleTagDecodeFunction(final Map<TupleTag<?>, Coder<WindowedValue<?>>> coderMap) {
  return tuple2 -> {
    TupleTag<?> tupleTag = tuple2._1;
    ValueAndCoderLazySerializable<WindowedValue<?>> windowedByteValue = tuple2._2;
    return new Tuple2<>(tupleTag, windowedByteValue.getOrDecode(coderMap.get(tupleTag)));
  };
}
 
Example 23
Source Project: beam   Source File: PCollectionViewTranslation.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link PCollectionView} from a side input spec and an already-deserialized {@link
 * PCollection} that should be wired up.
 */
public static PCollectionView<?> viewFromProto(
    RunnerApi.SideInput sideInput,
    String localName,
    PCollection<?> pCollection,
    RunnerApi.PTransform parDoTransform,
    RehydratedComponents components)
    throws IOException {
  checkArgument(
      localName != null,
      "%s.viewFromProto: localName must not be null",
      ParDoTranslation.class.getSimpleName());
  TupleTag<?> tag = new TupleTag<>(localName);
  WindowMappingFn<?> windowMappingFn = windowMappingFnFromProto(sideInput.getWindowMappingFn());
  ViewFn<?, ?> viewFn = viewFnFromProto(sideInput.getViewFn());
  WindowingStrategy<?, ?> windowingStrategy = pCollection.getWindowingStrategy().fixDefaults();

  PCollectionView<?> view =
      new RunnerPCollectionView<>(
          pCollection,
          (TupleTag) tag,
          (ViewFn) viewFn,
          windowMappingFn,
          windowingStrategy,
          (Coder) pCollection.getCoder());
  return view;
}
 
Example 24
Source Project: beam   Source File: KeyedPCollectionTuple.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns a new {@code KeyedPCollectionTuple<K>} that is the same as this, appended with the
 * given PCollection.
 */
public <V> KeyedPCollectionTuple<K> and(TupleTag<V> tag, PCollection<KV<K, V>> pc) {
  if (pc.getPipeline() != getPipeline()) {
    throw new IllegalArgumentException("PCollections come from different Pipelines");
  }
  TaggedKeyedPCollection<K, ?> wrapper = new TaggedKeyedPCollection<>(tag, pc);
  Coder<K> myKeyCoder = keyCoder == null ? getKeyCoder(pc) : keyCoder;
  List<TaggedKeyedPCollection<K, ?>> newKeyedCollections = copyAddLast(keyedCollections, wrapper);
  return new KeyedPCollectionTuple<>(
      getPipeline(), newKeyedCollections, schema.getTupleTagList().and(tag), myKeyCoder);
}
 
Example 25
Source Project: beam   Source File: AbstractParDoP.java    License: Apache License 2.0 5 votes vote down vote up
AbstractSupplier(
    String stepId,
    String ownerId,
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Set<TupleTag<OutputT>> allOutputTags,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    List<PCollectionView<?>> sideInputs) {
  this.stepId = stepId;
  this.ownerId = ownerId;
  this.pipelineOptions = pipelineOptions;
  this.doFn = doFn;
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals =
      allOutputTags.stream()
          .collect(Collectors.toMap(Function.identity(), t -> new ArrayList<>()));
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders = sideInputCoders;
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.sideInputs = sideInputs;
}
 
Example 26
Source Project: beam   Source File: CoGbkResultTest.java    License: Apache License 2.0 5 votes vote down vote up
private CoGbkResultSchema createSchema(int size) {
  List<TupleTag<?>> tags = new ArrayList<>();
  for (int i = 0; i < size; i++) {
    tags.add(new TupleTag<Integer>("tag" + i));
  }
  return new CoGbkResultSchema(TupleTagList.of(tags));
}
 
Example 27
Source Project: beam   Source File: CoGbkResult.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns a new {@link CoGbkResult} based on this, with the given tag and given data added to it.
 */
public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
  if (nextTestUnionId != schema.size()) {
    throw new IllegalArgumentException(
        "Attempting to call and() on a CoGbkResult apparently not created by" + " of().");
  }
  List<Iterable<?>> valueMap = new ArrayList<>(this.valueMap);
  valueMap.add(data);
  return new CoGbkResult(
      new CoGbkResultSchema(schema.getTupleTagList().and(tag)), valueMap, nextTestUnionId + 1);
}
 
Example 28
Source Project: beam   Source File: ReplacementOutputsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void singletonMultipleOriginalsThrows() {
  thrown.expect(IllegalArgumentException.class);
  ReplacementOutputs.singleton(
      ImmutableMap.<TupleTag<?>, PValue>builder()
          .putAll(ints.expand())
          .putAll(moreInts.expand())
          .build(),
      replacementInts);
}
 
Example 29
Source Project: dataflow-java   Source File: VerifyBamIdTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPileupAndJoinReads() throws Exception {
  final ReadBaseQuality srq = new ReadBaseQuality("A", 10);
  PCollection<KV<Position, ReadBaseQuality>> readCounts = p.apply(
      "createInput", Create.of(KV.of(position1, srq)));
  PAssert.that(readCounts).containsInAnyOrder(KV.of(position1, srq));

  PCollection<KV<Position, AlleleFreq>> refFreq = p.apply(Create.of(refCountList));

  PAssert.that(refFreq).containsInAnyOrder(refCountList);

  final TupleTag<ReadBaseQuality> readCountsTag = new TupleTag<>();
  TupleTag<AlleleFreq> refFreqTag = new TupleTag<>();
  PCollection<KV<Position, CoGbkResult>> joined = KeyedPCollectionTuple
      .of(readCountsTag, readCounts)
      .and(refFreqTag, refFreq)
      .apply(CoGroupByKey.<Position>create());

  PCollection<KV<Position, ReadCounts>> result = joined.apply(
      ParDo.of(new PileupAndJoinReads(readCountsTag, refFreqTag)));

  KV<Position, ReadCounts> expectedResult1 = KV.of(position1, rc1);
  KV<Position, ReadCounts> expectedResult2 = KV.of(position2, rc2);
  KV<Position, ReadCounts> expectedResult3 = KV.of(position3, rc3);

  PAssert.that(result).containsInAnyOrder(expectedResult1, expectedResult2, expectedResult3);
  p.run();
}
 
Example 30
Source Project: beam   Source File: ExecutableStageDoFnOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void sdkErrorsSurfaceOnClose() throws Exception {
  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of());
  ExecutableStageDoFnOperator<Integer, Integer> operator =
      getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);

  OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness =
      new OneInputStreamOperatorTestHarness<>(operator);

  testHarness.open();

  @SuppressWarnings("unchecked")
  RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
  when(stageBundleFactory.getBundle(any(), any(), any(), any())).thenReturn(bundle);

  @SuppressWarnings("unchecked")
  FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
  when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));

  Exception expected = new RuntimeException(new Exception());
  doThrow(expected).when(bundle).close();
  thrown.expectCause(is(expected));

  operator.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(0)));
  testHarness.close();
}