org.apache.beam.sdk.values.TupleTagList Java Examples

The following examples show how to use org.apache.beam.sdk.values.TupleTagList. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void parDoRequiresStableInput() {
  DoFn<Object, Object> doFnRSI =
      new DoFn<Object, Object>() {
        @RequiresStableInput
        @ProcessElement
        public void process(ProcessContext ctxt) {}
      };

  AppliedPTransform<?, ?, ?> single = getAppliedTransform(ParDo.of(doFn));
  AppliedPTransform<?, ?, ?> singleRSI = getAppliedTransform(ParDo.of(doFnRSI));
  AppliedPTransform<?, ?, ?> multi =
      getAppliedTransform(ParDo.of(doFn).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
  AppliedPTransform<?, ?, ?> multiRSI =
      getAppliedTransform(
          ParDo.of(doFnRSI).withOutputTags(new TupleTag<>(), TupleTagList.empty()));

  assertThat(PTransformMatchers.requiresStableInputParDoSingle().matches(single), is(false));
  assertThat(PTransformMatchers.requiresStableInputParDoSingle().matches(singleRSI), is(true));
  assertThat(PTransformMatchers.requiresStableInputParDoSingle().matches(multi), is(false));
  assertThat(PTransformMatchers.requiresStableInputParDoSingle().matches(multiRSI), is(false));
  assertThat(PTransformMatchers.requiresStableInputParDoMulti().matches(single), is(false));
  assertThat(PTransformMatchers.requiresStableInputParDoMulti().matches(singleRSI), is(false));
  assertThat(PTransformMatchers.requiresStableInputParDoMulti().matches(multi), is(false));
  assertThat(PTransformMatchers.requiresStableInputParDoMulti().matches(multiRSI), is(true));
}
 
Example #2
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void testAdditionalOutput(IsBounded bounded) {
  TupleTag<String> mainOutputTag = new TupleTag<String>("main") {};
  TupleTag<String> additionalOutputTag = new TupleTag<String>("additional") {};

  PCollectionTuple res =
      p.apply("input", Create.of(0, 1, 2))
          .apply(
              ParDo.of(sdfWithAdditionalOutput(bounded, additionalOutputTag))
                  .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));

  PAssert.that(res.get(mainOutputTag))
      .containsInAnyOrder(Arrays.asList("main:0", "main:1", "main:2"));
  PAssert.that(res.get(additionalOutputTag))
      .containsInAnyOrder(Arrays.asList("additional:0", "additional:1", "additional:2"));

  p.run();
}
 
Example #3
Source File: PubSubToElasticsearch.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<PubsubMessage> input) {

  // Map the incoming messages into FailsafeElements so we can recover from failures
  // across multiple transforms.
  PCollection<FailsafeElement<PubsubMessage, String>> failsafeElements =
      input.apply("MapToRecord", ParDo.of(new PubsubMessageToFailsafeElementFn()));

  // If a Udf is supplied then use it to parse the PubSubMessages.
  if (javascriptTextTransformGcsPath() != null) {
    return failsafeElements.apply(
        "InvokeUDF",
        FailsafeJavascriptUdf.<PubsubMessage>newBuilder()
            .setFileSystemPath(javascriptTextTransformGcsPath())
            .setFunctionName(javascriptTextTransformFunctionName())
            .setSuccessTag(TRANSFORM_OUT)
            .setFailureTag(TRANSFORM_DEADLETTER_OUT)
            .build());
  } else {
    return failsafeElements.apply(
        "ProcessPubSubMessages",
        ParDo.of(new ProcessFailsafePubSubFn())
            .withOutputTags(TRANSFORM_OUT, TupleTagList.of(TRANSFORM_DEADLETTER_OUT)));
  }
}
 
Example #4
Source File: PubSubToMongoDB.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<PubsubMessage> input) {

  // Map the incoming messages into FailsafeElements so we can recover from failures
  // across multiple transforms.
  PCollection<FailsafeElement<PubsubMessage, String>> failsafeElements =
          input.apply("MapToRecord", ParDo.of(new PubsubMessageToFailsafeElementFn()));

  // If a Udf is supplied then use it to parse the PubSubMessages.
  if (javascriptTextTransformGcsPath() != null) {
    return failsafeElements.apply(
            "InvokeUDF",
            JavascriptTextTransformer.FailsafeJavascriptUdf.<PubsubMessage>newBuilder()
                    .setFileSystemPath(javascriptTextTransformGcsPath())
                    .setFunctionName(javascriptTextTransformFunctionName())
                    .setSuccessTag(TRANSFORM_OUT)
                    .setFailureTag(TRANSFORM_DEADLETTER_OUT)
                    .build());
  } else {
    return failsafeElements.apply(
            "ProcessPubSubMessages",
            ParDo.of(new ProcessFailsafePubSubFn())
                    .withOutputTags(TRANSFORM_OUT, TupleTagList.of(TRANSFORM_DEADLETTER_OUT)));
  }
}
 
Example #5
Source File: BigQueryConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<FailsafeElement<T, String>> failsafeElements) {
  return failsafeElements.apply(
      "JsonToTableRow",
      ParDo.of(
              new DoFn<FailsafeElement<T, String>, TableRow>() {
                @ProcessElement
                public void processElement(ProcessContext context) {
                  FailsafeElement<T, String> element = context.element();
                  String json = element.getPayload();

                  try {
                    TableRow row = convertJsonToTableRow(json);
                    context.output(row);
                  } catch (Exception e) {
                    context.output(
                        failureTag(),
                        FailsafeElement.of(element)
                            .setErrorMessage(e.getMessage())
                            .setStacktrace(Throwables.getStackTraceAsString(e)));
                  }
                }
              })
          .withOutputTags(successTag(), TupleTagList.of(failureTag())));
}
 
Example #6
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiOutputOverrideNonCrashing() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions();
  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);

  TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
  TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};

  DummyStatefulDoFn fn = new DummyStatefulDoFn();
  pipeline
      .apply(Create.of(KV.of(1, 2)))
      .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
 
Example #7
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<ParDo.MultiOutput<?, ?>> data() {
  return ImmutableList.of(
      ParDo.of(new DropElementsFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty()),
      ParDo.of(new DropElementsFn())
          .withOutputTags(new TupleTag<>(), TupleTagList.empty())
          .withSideInputs(singletonSideInput, multimapSideInput),
      ParDo.of(new DropElementsFn())
          .withOutputTags(
              new TupleTag<>(),
              TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {}))
          .withSideInputs(singletonSideInput, multimapSideInput),
      ParDo.of(new DropElementsFn())
          .withOutputTags(
              new TupleTag<>(),
              TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {})),
      ParDo.of(new SplittableDropElementsFn())
          .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
      ParDo.of(new StateTimerDropElementsFn())
          .withOutputTags(new TupleTag<>(), TupleTagList.empty()));
}
 
Example #8
Source File: CsvConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionTuple expand(PBegin input) {

  if (hasHeaders()) {
    return input
        .apply("MatchFilePattern", FileIO.match().filepattern(inputFileSpec()))
        .apply("ReadMatches", FileIO.readMatches())
        .apply(
            "ReadCsvWithHeaders",
            ParDo.of(new GetCsvHeadersFn(headerTag(), lineTag(), csvFormat(), delimiter()))
                .withOutputTags(headerTag(), TupleTagList.of(lineTag())));
  }

  return PCollectionTuple.of(
      lineTag(), input.apply("ReadCsvWithoutHeaders", TextIO.read().from(inputFileSpec())));
}
 
Example #9
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTaggedOutputUnregisteredExplicitCoder() throws Exception {
  pipeline.enableAbandonedNodeEnforcement(false);

  PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3)));

  final TupleTag<Integer> mainOutputTag = new TupleTag<>("main");
  final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unregisteredSide");
  ParDo.MultiOutput<Integer, Integer> pardo =
      ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag))
          .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag));
  PCollectionTuple outputTuple = input.apply(pardo);

  outputTuple.get(additionalOutputTag).setCoder(new TestDummyCoder());

  outputTuple.get(additionalOutputTag).apply(View.asSingleton());

  assertEquals(new TestDummyCoder(), outputTuple.get(additionalOutputTag).getCoder());
  outputTuple
      .get(additionalOutputTag)
      .finishSpecifyingOutput("ParDo", input, pardo); // Check for crashes
  assertEquals(
      new TestDummyCoder(),
      outputTuple.get(additionalOutputTag).getCoder()); // Check for corruption
}
 
Example #10
Source File: BigQueryConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<FailsafeElement<T, String>> failsafeElements) {
  return failsafeElements.apply(
      "JsonToTableRow",
      ParDo.of(
              new DoFn<FailsafeElement<T, String>, TableRow>() {
                @ProcessElement
                public void processElement(ProcessContext context) {
                  FailsafeElement<T, String> element = context.element();
                  String json = element.getPayload();

                  try {
                    TableRow row = convertJsonToTableRow(json);
                    context.output(row);
                  } catch (Exception e) {
                    context.output(
                        failureTag(),
                        FailsafeElement.of(element)
                            .setErrorMessage(e.getMessage())
                            .setStacktrace(Throwables.getStackTraceAsString(e)));
                  }
                }
              })
          .withOutputTags(successTag(), TupleTagList.of(failureTag())));
}
 
Example #11
Source File: IndexerPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
/**
 * @param filteredIndexes
 * @return
 */
private static PCollection<ContentIndexSummary> enrichWithCNLP(
		PCollection<ContentIndexSummary> filteredIndexes, Float ratio) {
	
	PCollectionTuple splitAB = filteredIndexes
		.apply(ParDo.of(new SplitAB(ratio))
			.withOutputTags(PipelineTags.BranchA,  
				TupleTagList.of(PipelineTags.BranchB))); 
	
	PCollection<ContentIndexSummary> branchACol = splitAB.get(PipelineTags.BranchA);
	PCollection<ContentIndexSummary> branchBCol = splitAB.get(PipelineTags.BranchB);
	
	PCollection<ContentIndexSummary> enrichedBCol = branchBCol.apply(
		ParDo.of(new EnrichWithCNLPEntities()));
	
	//Merge all collections with WebResource table records
	PCollectionList<ContentIndexSummary> contentIndexSummariesList = 
		PCollectionList.of(branchACol).and(enrichedBCol);
	PCollection<ContentIndexSummary> allIndexSummaries = 
		contentIndexSummariesList.apply(Flatten.<ContentIndexSummary>pCollections());

	filteredIndexes = allIndexSummaries;
	return filteredIndexes;
}
 
Example #12
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFinishBundle() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new FinishBundleDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example #13
Source File: Task.java    From beam with Apache License 2.0 6 votes vote down vote up
static PCollectionTuple applyTransform(
    PCollection<Integer> numbers, TupleTag<Integer> numBelow100Tag,
    TupleTag<Integer> numAbove100Tag) {

  return numbers.apply(ParDo.of(new DoFn<Integer, Integer>() {

    @ProcessElement
    public void processElement(@Element Integer number, MultiOutputReceiver out) {
      if (number <= 100) {
        out.get(numBelow100Tag).output(number);
      } else {
        out.get(numAbove100Tag).output(number);
      }
    }

  }).withOutputTags(numBelow100Tag, TupleTagList.of(numAbove100Tag)));
}
 
Example #14
Source File: JsonToRow.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public ParseResult expand(PCollection<String> jsonStrings) {

  PCollectionTuple result =
      jsonStrings.apply(
          ParDo.of(ParseWithError.create(this))
              .withOutputTags(PARSED_LINE, TupleTagList.of(PARSE_ERROR)));

  PCollection<Row> failures;

  if (getExtendedErrorInfo()) {
    failures =
        result.get(PARSE_ERROR).setRowSchema(JsonToRowWithErrFn.ERROR_ROW_WITH_ERR_MSG_SCHEMA);
  } else {
    failures = result.get(PARSE_ERROR).setRowSchema(JsonToRowWithErrFn.ERROR_ROW_SCHEMA);
  }

  return ParseResult.resultBuilder()
      .setCallingPipeline(jsonStrings.getPipeline())
      .setJsonToRowWithErrFn(this)
      .setParsedLine(result.get(PARSED_LINE).setRowSchema(this.getSchema()))
      .setFailedParse(failures)
      .build();
}
 
Example #15
Source File: PTransformTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
  PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.asSingleton());
  PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
  ParDo.MultiOutput<Long, KV<Long, String>> parDo =
      ParDo.of(new TestDoFn())
          .withSideInputs(view)
          .withOutputTags(
              new TupleTag<KV<Long, String>>() {},
              TupleTagList.of(new TupleTag<KV<String, Long>>() {}));
  PCollectionTuple output = input.apply(parDo);

  Map<TupleTag<?>, PValue> inputs = new HashMap<>();
  inputs.putAll(parDo.getAdditionalInputs());
  inputs.putAll(input.expand());

  return AppliedPTransform
      .<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of(
          "MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}
 
Example #16
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore(
    "TODO: BEAM-2902 Add support for user state in a ParDo.Multi once PTransformMatcher "
        + "exposes a way to know when the replacement is not required by checking that the "
        + "preceding ParDos to a GBK are key preserving.")
public void testFnApiMultiOutputOverrideNonCrashing() throws Exception {
  DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api");
  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);

  TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
  TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};

  DummyStatefulDoFn fn = new DummyStatefulDoFn();
  pipeline
      .apply(Create.of(KV.of(1, 2)))
      .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));

  DataflowRunner runner = DataflowRunner.fromOptions(options);
  runner.replaceTransforms(pipeline);
  assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
 
Example #17
Source File: Partition.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollectionList<T> expand(PCollection<T> in) {
  final TupleTagList outputTags = partitionDoFn.getOutputTags();

  PCollectionTuple outputs =
      in.apply(
          ParDo.of(partitionDoFn)
              .withOutputTags(new TupleTag<Void>() {}, outputTags)
              .withSideInputs(partitionDoFn.getSideInputs()));

  PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline());
  Coder<T> coder = in.getCoder();

  for (TupleTag<?> outputTag : outputTags.getAll()) {
    // All the tuple tags are actually TupleTag<T>
    // And all the collections are actually PCollection<T>
    @SuppressWarnings("unchecked")
    TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag;
    pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder));
  }
  return pcs;
}
 
Example #18
Source File: Partition.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Constructs a PartitionDoFn.
 *
 * @throws IllegalArgumentException if {@code numPartitions <= 0}
 */
private PartitionDoFn(
    int numPartitions,
    Contextful<Contextful.Fn<X, Integer>> ctxFn,
    Object originalFnClassForDisplayData) {
  this.ctxFn = ctxFn;
  this.originalFnClassForDisplayData = originalFnClassForDisplayData;
  if (numPartitions <= 0) {
    throw new IllegalArgumentException("numPartitions must be > 0");
  }

  this.numPartitions = numPartitions;

  TupleTagList buildOutputTags = TupleTagList.empty();
  for (int partition = 0; partition < numPartitions; partition++) {
    buildOutputTags = buildOutputTags.and(new TupleTag<X>());
  }
  outputTags = buildOutputTags;
}
 
Example #19
Source File: PassThroughThenCleanup.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<T> expand(PCollection<T> input) {
  TupleTag<T> mainOutput = new TupleTag<>();
  TupleTag<Void> cleanupSignal = new TupleTag<>();
  PCollectionTuple outputs =
      input.apply(
          ParDo.of(new IdentityFn<T>())
              .withOutputTags(mainOutput, TupleTagList.of(cleanupSignal)));

  PCollectionView<Iterable<Void>> cleanupSignalView =
      outputs.get(cleanupSignal).setCoder(VoidCoder.of()).apply(View.asIterable());

  input
      .getPipeline()
      .apply("Create(CleanupOperation)", Create.of(cleanupOperation))
      .apply(
          "Cleanup",
          ParDo.of(
                  new DoFn<CleanupOperation, Void>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) throws Exception {
                      c.element().cleanup(new ContextContainer(c, jobIdSideInput));
                    }
                  })
              .withSideInputs(jobIdSideInput, cleanupSignalView));

  return outputs.get(mainOutput).setCoder(input.getCoder());
}
 
Example #20
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void parDoSplittable() {
  AppliedPTransform<?, ?, ?> parDoApplication =
      getAppliedTransform(
          ParDo.of(splittableDoFn).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
  assertThat(PTransformMatchers.splittableParDo().matches(parDoApplication), is(true));

  assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
  assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
  assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
 
Example #21
Source File: CoGbkResultTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private CoGbkResultSchema createSchema(int size) {
  List<TupleTag<?>> tags = new ArrayList<>();
  for (int i = 0; i < size; i++) {
    tags.add(new TupleTag<Integer>("tag" + i));
  }
  return new CoGbkResultSchema(TupleTagList.of(tags));
}
 
Example #22
Source File: KeyedPCollectionTuple.java    From beam with Apache License 2.0 5 votes vote down vote up
KeyedPCollectionTuple(
    Pipeline pipeline,
    List<TaggedKeyedPCollection<K, ?>> keyedCollections,
    TupleTagList tupleTagList,
    @Nullable Coder<K> keyCoder) {
  this.pipeline = pipeline;
  this.keyedCollections = keyedCollections;
  this.schema = new CoGbkResultSchema(tupleTagList);
  this.keyCoder = keyCoder;
}
 
Example #23
Source File: FlattenTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFlattenMultiplePCollectionsHavingMultipleConsumers() {
  PCollection<String> input = p.apply(Create.of("AA", "BBB", "CC"));
  final TupleTag<String> outputEvenLengthTag = new TupleTag<String>() {};
  final TupleTag<String> outputOddLengthTag = new TupleTag<String>() {};

  PCollectionTuple tuple =
      input.apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) {
                      if (c.element().length() % 2 == 0) {
                        c.output(c.element());
                      } else {
                        c.output(outputOddLengthTag, c.element());
                      }
                    }
                  })
              .withOutputTags(outputEvenLengthTag, TupleTagList.of(outputOddLengthTag)));

  PCollection<String> outputEvenLength = tuple.get(outputEvenLengthTag);
  PCollection<String> outputOddLength = tuple.get(outputOddLengthTag);

  PCollection<String> outputMerged =
      PCollectionList.of(outputEvenLength).and(outputOddLength).apply(Flatten.pCollections());

  PAssert.that(outputMerged).containsInAnyOrder("AA", "BBB", "CC");
  PAssert.that(outputEvenLength).containsInAnyOrder("AA", "CC");
  PAssert.that(outputOddLength).containsInAnyOrder("BBB");

  p.run();
}
 
Example #24
Source File: FlinkRequiresStableInputTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static Pipeline createPipeline(
    PipelineOptions options, String singleOutputPrefix, String multiOutputPrefix) {
  Pipeline p = Pipeline.create(options);

  SerializableFunction<Void, Void> firstTime =
      (SerializableFunction<Void, Void>)
          value -> {
            latch.countDown();
            return null;
          };

  PCollection<String> impulse = p.apply("CreatePCollectionOfOneValue", Create.of(VALUE));
  impulse
      .apply(
          "Single-PairWithRandomKey",
          MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn()))
      .apply(
          "Single-MakeSideEffectAndThenFail",
          ParDo.of(
              new RequiresStableInputIT.MakeSideEffectAndThenFailFn(
                  singleOutputPrefix, firstTime)));
  impulse
      .apply(
          "Multi-PairWithRandomKey",
          MapElements.via(new RequiresStableInputIT.PairWithRandomKeyFn()))
      .apply(
          "Multi-MakeSideEffectAndThenFail",
          ParDo.of(
                  new RequiresStableInputIT.MakeSideEffectAndThenFailFn(
                      multiOutputPrefix, firstTime))
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()));

  return p;
}
 
Example #25
Source File: TextTableProvider.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Row> expand(PCollection<String> input) {
  PCollectionTuple rows =
      input.apply(
          ParDo.of(
                  new DoFn<String, Row>() {
                    @ProcessElement
                    public void processElement(ProcessContext context) {
                      try {
                        context.output(jsonToRow(getObjectMapper(), context.element()));
                      } catch (UnsupportedRowJsonException jsonException) {
                        if (deadLetterFile() != null) {
                          context.output(DLF_TAG, context.element());
                        } else {
                          throw new RuntimeException("Error parsing JSON", jsonException);
                        }
                      }
                    }
                  })
              .withOutputTags(
                  MAIN_TAG,
                  deadLetterFile() != null ? TupleTagList.of(DLF_TAG) : TupleTagList.empty()));

  if (deadLetterFile() != null) {
    rows.get(DLF_TAG).setCoder(StringUtf8Coder.of()).apply(writeJsonToDlf());
  }
  return rows.get(MAIN_TAG).setRowSchema(schema());
}
 
Example #26
Source File: PubsubMessageToRow.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<PubsubMessage> input) {
  PCollectionTuple rows =
      input.apply(
          ParDo.of(
                  useFlatSchema()
                      ? new FlatSchemaPubsubMessageToRoW(messageSchema(), useDlq())
                      : new NestedSchemaPubsubMessageToRow(messageSchema(), useDlq()))
              .withOutputTags(
                  MAIN_TAG, useDlq() ? TupleTagList.of(DLQ_TAG) : TupleTagList.empty()));
  rows.get(MAIN_TAG).setRowSchema(messageSchema());
  return rows;
}
 
Example #27
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void parDoMulti() {
  AppliedPTransform<?, ?, ?> parDoApplication =
      getAppliedTransform(ParDo.of(doFn).withOutputTags(new TupleTag<>(), TupleTagList.empty()));

  assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false));
  assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false));
  assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false));
  assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false));
}
 
Example #28
Source File: ParDoLifecycleTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesStatefulParDo.class, UsesParDoLifecycle.class})
public void testFnCallSequenceStateful() {
  PCollectionList.of(p.apply("Impolite", Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 4))))
      .and(
          p.apply(
              "Polite", Create.of(KV.of("b", 3), KV.of("a", 5), KV.of("c", 6), KV.of("c", 7))))
      .apply(Flatten.pCollections())
      .apply(
          ParDo.of(new CallSequenceEnforcingStatefulFn<String, Integer>())
              .withOutputTags(new TupleTag<KV<String, Integer>>() {}, TupleTagList.empty()));

  p.run();
}
 
Example #29
Source File: BigQueryConverters.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<TableRow> input) {

  PCollectionTuple udfOut;

  PCollectionTuple failsafeTableRows =
      input.apply(
          "TableRowToFailsafeElement",
          ParDo.of(new TableRowToFailsafeElementFn(transformDeadletterOutTag()))
              .withOutputTags(transformOutTag(), TupleTagList.of(transformDeadletterOutTag())));

  // Use Udf to parse table rows if supplied.
  if (options().getJavascriptTextTransformGcsPath() != null) {
    udfOut =
        failsafeTableRows
            .get(transformOutTag())
            .apply(
                "ProcessFailsafeRowsUdf",
                JavascriptTextTransformer.FailsafeJavascriptUdf.<TableRow>newBuilder()
                    .setFileSystemPath(options().getJavascriptTextTransformGcsPath())
                    .setFunctionName(options().getJavascriptTextTransformFunctionName())
                    .setSuccessTag(udfOutTag())
                    .setFailureTag(udfDeadletterOutTag())
                    .build());

    PCollection<FailsafeElement<TableRow, String>> failedOut =
        PCollectionList.of(udfOut.get(udfDeadletterOutTag()))
            .and(failsafeTableRows.get(transformDeadletterOutTag()))
            .apply("FlattenFailedOut", Flatten.pCollections());

    return PCollectionTuple.of(transformOutTag(), udfOut.get(udfOutTag()))
        .and(transformDeadletterOutTag(), failedOut);
  } else {
    return failsafeTableRows;
  }
}
 
Example #30
Source File: QueryablePipelineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void retainOnlyPrimitivesWithOnlyPrimitivesUnchanged() {
  Pipeline p = Pipeline.create();
  p.apply("Read", Read.from(CountingSource.unbounded()))
      .apply(
          "multi-do",
          ParDo.of(new TestFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty()));

  Components originalComponents = PipelineTranslation.toProto(p).getComponents();
  Collection<String> primitiveComponents =
      QueryablePipeline.getPrimitiveTransformIds(originalComponents);

  assertThat(primitiveComponents, equalTo(originalComponents.getTransformsMap().keySet()));
}