org.apache.beam.sdk.values.TupleTag Java Examples

The following examples show how to use org.apache.beam.sdk.values.TupleTag. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutableStageDoFnOperator.java    From beam with Apache License 2.0 7 votes vote down vote up
public SdkHarnessDoFnRunner(
    DoFn<InputT, OutputT> doFn,
    StageBundleFactory stageBundleFactory,
    StateRequestHandler stateRequestHandler,
    BundleProgressHandler progressHandler,
    BufferedOutputManager<OutputT> outputManager,
    Map<String, TupleTag<?>> outputMap,
    Coder<BoundedWindow> windowCoder,
    BiConsumer<Timer<?>, TimerInternals.TimerData> timerRegistration,
    Supplier<Object> keyForTimer) {

  this.doFn = doFn;
  this.stageBundleFactory = stageBundleFactory;
  this.stateRequestHandler = stateRequestHandler;
  this.progressHandler = progressHandler;
  this.outputManager = outputManager;
  this.outputMap = outputMap;
  this.timerRegistration = timerRegistration;
  this.keyForTimer = keyForTimer;
  this.windowCoder = windowCoder;
  this.outputQueue = new LinkedBlockingQueue<>();
}
 
Example #2
Source File: DoFnOperator.java    From beam with Apache License 2.0 6 votes vote down vote up
BufferedOutputManager(
    Output<StreamRecord<WindowedValue<OutputT>>> output,
    TupleTag<OutputT> mainTag,
    Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags,
    Map<TupleTag<?>, Integer> tagsToIds,
    Lock bufferLock,
    PushedBackElementsHandler<KV<Integer, WindowedValue<?>>> pushedBackElementsHandler) {
  this.output = output;
  this.mainTag = mainTag;
  this.tagsToOutputTags = tagsToOutputTags;
  this.tagsToIds = tagsToIds;
  this.bufferLock = bufferLock;
  this.idsToTags = new HashMap<>();
  for (Map.Entry<TupleTag<?>, Integer> entry : tagsToIds.entrySet()) {
    idsToTags.put(entry.getValue(), entry.getKey());
  }
  this.pushedBackElementsHandler = pushedBackElementsHandler;
}
 
Example #3
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void flattenWithDuplicateInputsWithoutDuplicates() {
  AppliedPTransform application =
      AppliedPTransform.of(
          "Flatten",
          Collections.singletonMap(
              new TupleTag<Integer>(),
              PCollection.createPrimitiveOutputInternal(
                  p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
          Collections.singletonMap(
              new TupleTag<Integer>(),
              PCollection.createPrimitiveOutputInternal(
                  p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
          Flatten.pCollections(),
          p);

  assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(false));
}
 
Example #4
Source File: ToIsmRecordForMultimapDoFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ParDoFn create(
    PipelineOptions options,
    CloudObject cloudUserFn,
    List<SideInputInfo> sideInputInfos,
    TupleTag<?> mainOutputTag,
    Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  Coder<?> coder =
      CloudObjects.coderFromCloudObject(
          CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
  checkState(
      coder instanceof IsmRecordCoder,
      "Expected to received an instanceof an %s but got %s",
      IsmRecordCoder.class.getSimpleName(),
      coder);
  IsmRecordCoder<?> ismRecordCoder = (IsmRecordCoder<?>) coder;
  return new ToIsmRecordForMultimapParDoFn(
      KvCoder.of(
          ismRecordCoder.getCoderArguments().get(0), ismRecordCoder.getCoderArguments().get(1)));
}
 
Example #5
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testTaggedOutputUnknownCoder() throws Exception {

  PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));

  final TupleTag<Integer> mainOutputTag = new TupleTag<>("main");
  final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unknownSide");
  input.apply(
      ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag))
          .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));

  thrown.expect(IllegalStateException.class);
  thrown.expectMessage("Unable to return a default Coder");
  pipeline.run();
}
 
Example #6
Source File: Partition.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Constructs a PartitionDoFn.
 *
 * @throws IllegalArgumentException if {@code numPartitions <= 0}
 */
private PartitionDoFn(
    int numPartitions,
    Contextful<Contextful.Fn<X, Integer>> ctxFn,
    Object originalFnClassForDisplayData) {
  this.ctxFn = ctxFn;
  this.originalFnClassForDisplayData = originalFnClassForDisplayData;
  if (numPartitions <= 0) {
    throw new IllegalArgumentException("numPartitions must be > 0");
  }

  this.numPartitions = numPartitions;

  TupleTagList buildOutputTags = TupleTagList.empty();
  for (int partition = 0; partition < numPartitions; partition++) {
    buildOutputTags = buildOutputTags.and(new TupleTag<X>());
  }
  outputTags = buildOutputTags;
}
 
Example #7
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testParDoWithEmptyTaggedOutput() {
  TupleTag<String> mainOutputTag = new TupleTag<String>("main") {};
  TupleTag<String> additionalOutputTag1 = new TupleTag<String>("additional1") {};
  TupleTag<String> additionalOutputTag2 = new TupleTag<String>("additional2") {};

  PCollectionTuple outputs =
      pipeline
          .apply(Create.empty(VarIntCoder.of()))
          .apply(
              ParDo.of(new TestNoOutputDoFn())
                  .withOutputTags(
                      mainOutputTag,
                      TupleTagList.of(additionalOutputTag1).and(additionalOutputTag2)));

  PAssert.that(outputs.get(mainOutputTag)).empty();

  PAssert.that(outputs.get(additionalOutputTag1)).empty();
  PAssert.that(outputs.get(additionalOutputTag2)).empty();

  pipeline.run();
}
 
Example #8
Source File: TransformTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, V, OutputT>
    PairFlatMapFunction<Iterator<Tuple2<ByteArray, byte[]>>, TupleTag<?>, WindowedValue<?>>
        wrapDoFnFromSortedRDD(
            MultiDoFnFunction<KV<K, V>, OutputT> doFnFunction,
            Coder<K> keyCoder,
            Coder<WindowedValue<V>> wvCoder) {

  return (Iterator<Tuple2<ByteArray, byte[]>> in) -> {
    Iterator<Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>>> mappedGroups;
    mappedGroups =
        Iterators.transform(
            splitBySameKey(in, keyCoder, wvCoder),
            group -> {
              try {
                return doFnFunction.call(group);
              } catch (Exception ex) {
                throw new RuntimeException(ex);
              }
            });
    return flatten(mappedGroups);
  };
}
 
Example #9
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testMainOutputUnregisteredExplicitCoder() {

  PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));

  final TupleTag<TestDummy> mainOutputTag = new TupleTag<>("unregisteredMain");
  final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {};
  PCollectionTuple outputTuple =
      input.apply(
          ParDo.of(new MainOutputDummyFn(mainOutputTag, additionalOutputTag))
              .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));

  outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder());

  pipeline.run();
}
 
Example #10
Source File: WritePartition.java    From beam with Apache License 2.0 6 votes vote down vote up
WritePartition(
    boolean singletonTable,
    DynamicDestinations<?, DestinationT> dynamicDestinations,
    PCollectionView<String> tempFilePrefix,
    int maxNumFiles,
    long maxSizeBytes,
    TupleTag<KV<ShardedKey<DestinationT>, List<String>>> multiPartitionsTag,
    TupleTag<KV<ShardedKey<DestinationT>, List<String>>> singlePartitionTag,
    RowWriterFactory<?, DestinationT> rowWriterFactory) {
  this.singletonTable = singletonTable;
  this.dynamicDestinations = dynamicDestinations;
  this.tempFilePrefix = tempFilePrefix;
  this.maxNumFiles = maxNumFiles;
  this.maxSizeBytes = maxSizeBytes;
  this.multiPartitionsTag = multiPartitionsTag;
  this.singlePartitionTag = singlePartitionTag;
  this.rowWriterFactory = rowWriterFactory;
}
 
Example #11
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithOutputTagsDisplayData() {
  DoFn<String, String> fn =
      new DoFn<String, String>() {
        @ProcessElement
        public void proccessElement(ProcessContext c) {}

        @Override
        public void populateDisplayData(Builder builder) {
          builder.add(DisplayData.item("fnMetadata", "foobar"));
        }
      };

  ParDo.MultiOutput<String, String> parDo =
      ParDo.of(fn).withOutputTags(new TupleTag<>(), TupleTagList.empty());

  DisplayData displayData = DisplayData.from(parDo);
  assertThat(displayData, includesDisplayDataFor("fn", fn));
  assertThat(displayData, hasDisplayItem("fn", fn.getClass()));
}
 
Example #12
Source File: TransformInputsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void nonAdditionalInputsWithOnlyAdditionalInputsThrows() {
  Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
  additionalInputs.put(new TupleTag<String>() {}, pipeline.apply(Create.of("1, 2", "3")));
  additionalInputs.put(new TupleTag<Long>() {}, pipeline.apply(GenerateSequence.from(3L)));

  AppliedPTransform<PInput, POutput, TestTransform> transform =
      AppliedPTransform.of(
          "additional-only",
          additionalInputs,
          Collections.emptyMap(),
          new TestTransform(additionalInputs),
          pipeline);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("at least one");
  TransformInputs.nonAdditionalInputs(transform);
}
 
Example #13
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void flattenWithDuplicateInputsWithDuplicates() {
  PCollection<Integer> duplicate =
      PCollection.createPrimitiveOutputInternal(
          p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of());
  AppliedPTransform application =
      AppliedPTransform.of(
          "Flatten",
          ImmutableMap.<TupleTag<?>, PValue>builder()
              .put(new TupleTag<Integer>(), duplicate)
              .put(new TupleTag<Integer>(), duplicate)
              .build(),
          Collections.singletonMap(
              new TupleTag<Integer>(),
              PCollection.createPrimitiveOutputInternal(
                  p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
          Flatten.pCollections(),
          p);

  assertThat(PTransformMatchers.flattenWithDuplicateInputs().matches(application), is(true));
}
 
Example #14
Source File: CoGroupByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesSideInputs.class})
public void testCoGroupByKeyGetOnly() {
  final TupleTag<String> tag1 = new TupleTag<>();
  final TupleTag<String> tag2 = new TupleTag<>();

  PCollection<KV<Integer, CoGbkResult>> coGbkResults = buildGetOnlyGbk(p, tag1, tag2);

  PAssert.thatMap(coGbkResults)
      .satisfies(
          results -> {
            assertEquals("collection1-1", results.get(1).getOnly(tag1));
            assertEquals("collection1-2", results.get(2).getOnly(tag1));
            assertEquals("collection2-2", results.get(2).getOnly(tag2));
            assertEquals("collection2-3", results.get(3).getOnly(tag2));
            return null;
          });

  p.run();
}
 
Example #15
Source File: TransformHierarchy.java    From beam with Apache License 2.0 6 votes vote down vote up
@Internal
public Node addFinalizedPrimitiveNode(
    String name,
    Map<TupleTag<?>, PValue> inputs,
    PTransform<?, ?> transform,
    Map<TupleTag<?>, PValue> outputs) {
  checkNotNull(
      transform, "A %s must be provided for all Nodes", PTransform.class.getSimpleName());
  checkNotNull(
      name, "A name must be provided for all %s Nodes", PTransform.class.getSimpleName());
  checkNotNull(
      inputs, "Inputs must be provided for all %s Nodes", PTransform.class.getSimpleName());
  checkNotNull(
      outputs, "Outputs must be provided for all %s Nodes", PTransform.class.getSimpleName());
  Node node = new Node(current, transform, name, inputs, outputs);
  node.finishedSpecifying = true;
  for (PValue output : outputs.values()) {
    producers.put(output, node);
  }
  current.addComposite(node);
  return node;
}
 
Example #16
Source File: SplittableParDo.java    From beam with Apache License 2.0 6 votes vote down vote up
public static <OutputT> PCollectionTuple createPrimitiveOutputFor(
    PCollection<?> input,
    DoFn<?, OutputT> fn,
    TupleTag<OutputT> mainOutputTag,
    TupleTagList additionalOutputTags,
    Map<TupleTag<?>, Coder<?>> outputTagsToCoders,
    WindowingStrategy<?, ?> windowingStrategy) {
  DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
  PCollectionTuple outputs =
      PCollectionTuple.ofPrimitiveOutputsInternal(
          input.getPipeline(),
          TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()),
          outputTagsToCoders,
          windowingStrategy,
          input.isBounded().and(signature.isBoundedPerElement()));

  // Set output type descriptor similarly to how ParDo.MultiOutput does it.
  outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor());

  return outputs;
}
 
Example #17
Source File: IntrinsicMapTaskExecutorFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private OperationNode createParDoOperation(
    Network<Node, Edge> network,
    ParallelInstructionNode node,
    PipelineOptions options,
    DataflowExecutionContext<?> executionContext,
    DataflowOperationContext operationContext)
    throws Exception {

  ParallelInstruction instruction = node.getParallelInstruction();
  ParDoInstruction parDo = instruction.getParDo();

  TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
  ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder =
      ImmutableMap.builder();
  int successorOffset = 0;
  for (Node successor : network.successors(node)) {
    for (Edge edge : network.edgesConnecting(node, successor)) {
      outputTagsToReceiverIndicesBuilder.put(
          tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
    }
    successorOffset += 1;
  }
  ParDoFn fn =
      parDoFnFactory.create(
          options,
          CloudObject.fromSpec(parDo.getUserFn()),
          parDo.getSideInputs(),
          mainOutputTag,
          outputTagsToReceiverIndicesBuilder.build(),
          executionContext,
          operationContext);

  OutputReceiver[] receivers = getOutputReceivers(network, node);
  return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
 
Example #18
Source File: OutputAndTimeBoundedSplittableProcessElementInvoker.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public <T> void outputWithTimestamp(TupleTag<T> tag, T value, Instant timestamp) {
  noteOutput();
  if (watermarkEstimator instanceof TimestampObservingWatermarkEstimator) {
    ((TimestampObservingWatermarkEstimator) watermarkEstimator).observeTimestamp(timestamp);
  }
  output.outputWindowedValue(tag, value, timestamp, element.getWindows(), element.getPane());
}
 
Example #19
Source File: PipelineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Map<PValue, ReplacementOutput> mapOutputs(
    Map<TupleTag<?>, PValue> outputs, PCollection<Long> newOutput) {
  Map.Entry<TupleTag<?>, PValue> original = Iterables.getOnlyElement(outputs.entrySet());
  Map.Entry<TupleTag<?>, PValue> replacement =
      Iterables.getOnlyElement(newOutput.expand().entrySet());
  return Collections.singletonMap(
      newOutput,
      ReplacementOutput.of(
          TaggedPValue.of(original.getKey(), original.getValue()),
          TaggedPValue.of(replacement.getKey(), replacement.getValue())));
}
 
Example #20
Source File: DoFnTester.java    From beam with Apache License 2.0 5 votes vote down vote up
/** @deprecated Use {@link TestPipeline} with the {@code DirectRunner}. */
@Deprecated
public <T> List<T> peekOutputElements(TupleTag<T> tag) {
  // TODO: Should we return an unmodifiable list?
  return getImmutableOutput(tag).stream()
      .map(ValueInSingleWindow::getValue)
      .collect(Collectors.toList());
}
 
Example #21
Source File: KeyedPCollectionTuple.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Expands the component {@link PCollection PCollections}, stripping off any tag-specific
 * information.
 */
@Override
public Map<TupleTag<?>, PValue> expand() {
  ImmutableMap.Builder<TupleTag<?>, PValue> retval = ImmutableMap.builder();
  for (TaggedKeyedPCollection<K, ?> taggedPCollection : keyedCollections) {
    retval.put(taggedPCollection.tupleTag, taggedPCollection.pCollection);
  }
  return retval.build();
}
 
Example #22
Source File: TranslationUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a pair function to convert bytes to value via coder.
 *
 * @param coderMap - mapping between TupleTag and a coder
 * @return a pair function to convert bytes to value via coder
 */
public static PairFunction<
        Tuple2<TupleTag<?>, ValueAndCoderLazySerializable<WindowedValue<?>>>,
        TupleTag<?>,
        WindowedValue<?>>
    getTupleTagDecodeFunction(final Map<TupleTag<?>, Coder<WindowedValue<?>>> coderMap) {
  return tuple2 -> {
    TupleTag<?> tupleTag = tuple2._1;
    ValueAndCoderLazySerializable<WindowedValue<?>> windowedByteValue = tuple2._2;
    return new Tuple2<>(tupleTag, windowedByteValue.getOrDecode(coderMap.get(tupleTag)));
  };
}
 
Example #23
Source File: PCollectionViewTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link PCollectionView} from a side input spec and an already-deserialized {@link
 * PCollection} that should be wired up.
 */
public static PCollectionView<?> viewFromProto(
    RunnerApi.SideInput sideInput,
    String localName,
    PCollection<?> pCollection,
    RunnerApi.PTransform parDoTransform,
    RehydratedComponents components)
    throws IOException {
  checkArgument(
      localName != null,
      "%s.viewFromProto: localName must not be null",
      ParDoTranslation.class.getSimpleName());
  TupleTag<?> tag = new TupleTag<>(localName);
  WindowMappingFn<?> windowMappingFn = windowMappingFnFromProto(sideInput.getWindowMappingFn());
  ViewFn<?, ?> viewFn = viewFnFromProto(sideInput.getViewFn());
  WindowingStrategy<?, ?> windowingStrategy = pCollection.getWindowingStrategy().fixDefaults();

  PCollectionView<?> view =
      new RunnerPCollectionView<>(
          pCollection,
          (TupleTag) tag,
          (ViewFn) viewFn,
          windowMappingFn,
          windowingStrategy,
          (Coder) pCollection.getCoder());
  return view;
}
 
Example #24
Source File: KeyedPCollectionTuple.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a new {@code KeyedPCollectionTuple<K>} that is the same as this, appended with the
 * given PCollection.
 */
public <V> KeyedPCollectionTuple<K> and(TupleTag<V> tag, PCollection<KV<K, V>> pc) {
  if (pc.getPipeline() != getPipeline()) {
    throw new IllegalArgumentException("PCollections come from different Pipelines");
  }
  TaggedKeyedPCollection<K, ?> wrapper = new TaggedKeyedPCollection<>(tag, pc);
  Coder<K> myKeyCoder = keyCoder == null ? getKeyCoder(pc) : keyCoder;
  List<TaggedKeyedPCollection<K, ?>> newKeyedCollections = copyAddLast(keyedCollections, wrapper);
  return new KeyedPCollectionTuple<>(
      getPipeline(), newKeyedCollections, schema.getTupleTagList().and(tag), myKeyCoder);
}
 
Example #25
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
AbstractSupplier(
    String stepId,
    String ownerId,
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Set<TupleTag<OutputT>> allOutputTags,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    List<PCollectionView<?>> sideInputs) {
  this.stepId = stepId;
  this.ownerId = ownerId;
  this.pipelineOptions = pipelineOptions;
  this.doFn = doFn;
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals =
      allOutputTags.stream()
          .collect(Collectors.toMap(Function.identity(), t -> new ArrayList<>()));
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders = sideInputCoders;
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.sideInputs = sideInputs;
}
 
Example #26
Source File: CoGbkResultTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private CoGbkResultSchema createSchema(int size) {
  List<TupleTag<?>> tags = new ArrayList<>();
  for (int i = 0; i < size; i++) {
    tags.add(new TupleTag<Integer>("tag" + i));
  }
  return new CoGbkResultSchema(TupleTagList.of(tags));
}
 
Example #27
Source File: CoGbkResult.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a new {@link CoGbkResult} based on this, with the given tag and given data added to it.
 */
public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
  if (nextTestUnionId != schema.size()) {
    throw new IllegalArgumentException(
        "Attempting to call and() on a CoGbkResult apparently not created by" + " of().");
  }
  List<Iterable<?>> valueMap = new ArrayList<>(this.valueMap);
  valueMap.add(data);
  return new CoGbkResult(
      new CoGbkResultSchema(schema.getTupleTagList().and(tag)), valueMap, nextTestUnionId + 1);
}
 
Example #28
Source File: ReplacementOutputsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void singletonMultipleOriginalsThrows() {
  thrown.expect(IllegalArgumentException.class);
  ReplacementOutputs.singleton(
      ImmutableMap.<TupleTag<?>, PValue>builder()
          .putAll(ints.expand())
          .putAll(moreInts.expand())
          .build(),
      replacementInts);
}
 
Example #29
Source File: VerifyBamIdTest.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testPileupAndJoinReads() throws Exception {
  final ReadBaseQuality srq = new ReadBaseQuality("A", 10);
  PCollection<KV<Position, ReadBaseQuality>> readCounts = p.apply(
      "createInput", Create.of(KV.of(position1, srq)));
  PAssert.that(readCounts).containsInAnyOrder(KV.of(position1, srq));

  PCollection<KV<Position, AlleleFreq>> refFreq = p.apply(Create.of(refCountList));

  PAssert.that(refFreq).containsInAnyOrder(refCountList);

  final TupleTag<ReadBaseQuality> readCountsTag = new TupleTag<>();
  TupleTag<AlleleFreq> refFreqTag = new TupleTag<>();
  PCollection<KV<Position, CoGbkResult>> joined = KeyedPCollectionTuple
      .of(readCountsTag, readCounts)
      .and(refFreqTag, refFreq)
      .apply(CoGroupByKey.<Position>create());

  PCollection<KV<Position, ReadCounts>> result = joined.apply(
      ParDo.of(new PileupAndJoinReads(readCountsTag, refFreqTag)));

  KV<Position, ReadCounts> expectedResult1 = KV.of(position1, rc1);
  KV<Position, ReadCounts> expectedResult2 = KV.of(position2, rc2);
  KV<Position, ReadCounts> expectedResult3 = KV.of(position3, rc3);

  PAssert.that(result).containsInAnyOrder(expectedResult1, expectedResult2, expectedResult3);
  p.run();
}
 
Example #30
Source File: ExecutableStageDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void sdkErrorsSurfaceOnClose() throws Exception {
  TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
  DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory =
      new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of());
  ExecutableStageDoFnOperator<Integer, Integer> operator =
      getOperator(mainOutput, Collections.emptyList(), outputManagerFactory);

  OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness =
      new OneInputStreamOperatorTestHarness<>(operator);

  testHarness.open();

  @SuppressWarnings("unchecked")
  RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
  when(stageBundleFactory.getBundle(any(), any(), any(), any())).thenReturn(bundle);

  @SuppressWarnings("unchecked")
  FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
  when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));

  Exception expected = new RuntimeException(new Exception());
  doThrow(expected).when(bundle).close();
  thrown.expectCause(is(expected));

  operator.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(0)));
  testHarness.close();
}