org.apache.beam.sdk.transforms.DoFnSchemaInformation Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.DoFnSchemaInformation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DoFnInfo.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Creates a {@link DoFnInfo} for the given {@link DoFn}. */
public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new DoFnInfo<>(
      doFn,
      windowingStrategy,
      sideInputViews,
      inputCoder,
      outputCoders,
      mainOutput,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example #2
Source File: StreamingSideInputDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow> createRunner(
    WindowFn<?, ?> windowFn,
    DoFnRunners.OutputManager outputManager,
    List<PCollectionView<String>> views,
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher)
    throws Exception {
  DoFnRunner<String, String> simpleDoFnRunner =
      DoFnRunners.simpleRunner(
          PipelineOptionsFactory.create(),
          new SideInputFn(views),
          mockSideInputReader,
          outputManager,
          mainOutputTag,
          Arrays.<TupleTag<?>>asList(),
          stepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(windowFn),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());
  return new StreamingSideInputDoFnRunner<>(simpleDoFnRunner, sideInputFetcher);
}
 
Example #3
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testToProto() throws Exception {
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          parDo,
          PCollection.createPrimitiveOutputInternal(
              p,
              WindowingStrategy.globalDefault(),
              IsBounded.BOUNDED,
              KvCoder.of(VarLongCoder.of(), StringUtf8Coder.of())),
          DoFnSchemaInformation.create(),
          p,
          components);

  assertThat(ParDoTranslation.getDoFn(payload), equalTo(parDo.getFn()));
  assertThat(ParDoTranslation.getMainOutputTag(payload), equalTo(parDo.getMainOutputTag()));
  for (PCollectionView<?> view : parDo.getSideInputs().values()) {
    payload.getSideInputsOrThrow(view.getTagInternal().getId());
  }
  assertFalse(payload.getRequestsFinalization());
  assertEquals(
      parDo.getFn() instanceof StateTimerDropElementsFn,
      components.requirements().contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
}
 
Example #4
Source File: FlinkStatefulDoFnFunctionTest.java    From beam with Apache License 2.0 6 votes vote down vote up
public TestDoFnFunction(
    String stepName,
    WindowingStrategy windowingStrategy,
    Map sideInputs,
    PipelineOptions options,
    Map outputMap,
    TupleTag mainOutputTag,
    Coder inputCoder,
    Map outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map sideInputMapping) {
  super(
      new IdentityFn(),
      stepName,
      windowingStrategy,
      sideInputs,
      options,
      outputMap,
      mainOutputTag,
      inputCoder,
      outputCoderMap,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example #5
Source File: DoFnInstanceManagersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testInstanceIgnoresAbort() throws Exception {
  DoFnInfo<?, ?> info =
      DoFnInfo.forFn(
          initialFn,
          WindowingStrategy.globalDefault(),
          null /* side input views */,
          null /* input coder */,
          new TupleTag<>(PropertyNames.OUTPUT) /* main output id */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  DoFnInstanceManager mgr = DoFnInstanceManagers.singleInstance(info);
  mgr.abort(mgr.get());
  // TestFn#teardown would fail the test after multiple calls
  mgr.abort(mgr.get());
  // The returned info is still the initial info
  assertThat(mgr.get(), Matchers.<DoFnInfo<?, ?>>theInstance(info));
  assertThat(mgr.get().getDoFn(), theInstance(initialFn));
}
 
Example #6
Source File: ParDoTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
public static ParDoPayload translateParDo(
    AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components)
    throws IOException {
  final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
  final Pipeline pipeline = appliedPTransform.getPipeline();
  final DoFn<?, ?> doFn = parDo.getFn();

  // Get main input.
  Set<String> allInputs =
      appliedPTransform.getInputs().keySet().stream()
          .map(TupleTag::getId)
          .collect(Collectors.toSet());
  Set<String> sideInputs =
      parDo.getSideInputs().values().stream()
          .map(s -> s.getTagInternal().getId())
          .collect(Collectors.toSet());
  String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
  PCollection<?> mainInput =
      (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));

  final DoFnSchemaInformation doFnSchemaInformation =
      ParDo.getDoFnSchemaInformation(doFn, mainInput);
  return translateParDo(
      (ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
 
Example #7
Source File: DoFnInstanceManagersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCloningPoolReusesAfterComplete() throws Exception {
  DoFnInfo<?, ?> info =
      DoFnInfo.forFn(
          initialFn,
          WindowingStrategy.globalDefault(),
          null /* side input views */,
          null /* input coder */,
          new TupleTag<>(PropertyNames.OUTPUT) /* main output id */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  DoFnInstanceManager mgr = DoFnInstanceManagers.cloningPool(info);
  DoFnInfo<?, ?> retrievedInfo = mgr.get();
  assertThat(retrievedInfo, not(Matchers.<DoFnInfo<?, ?>>theInstance(info)));
  assertThat(retrievedInfo.getDoFn(), not(theInstance(info.getDoFn())));

  mgr.complete(retrievedInfo);
  DoFnInfo<?, ?> afterCompleteInfo = mgr.get();
  assertThat(afterCompleteInfo, Matchers.<DoFnInfo<?, ?>>theInstance(retrievedInfo));
  assertThat(afterCompleteInfo.getDoFn(), theInstance(retrievedInfo.getDoFn()));
}
 
Example #8
Source File: SimplePushbackSideInputDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner(
    DoFn<KV<String, Integer>, Integer> fn) {
  return new SimpleDoFnRunner<>(
      null,
      fn,
      NullSideInputReader.empty(),
      null,
      null,
      Collections.emptyList(),
      mockStepContext,
      null,
      Collections.emptyMap(),
      WINDOWING_STRATEGY,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #9
Source File: FlinkDoFnFunctionTest.java    From beam with Apache License 2.0 6 votes vote down vote up
public TestDoFnFunction(
    String stepName,
    WindowingStrategy windowingStrategy,
    Map sideInputs,
    PipelineOptions options,
    Map outputMap,
    TupleTag mainOutputTag,
    Coder inputCoder,
    Map outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map sideInputMapping) {
  super(
      new IdentityFn(),
      stepName,
      windowingStrategy,
      sideInputs,
      options,
      outputMap,
      mainOutputTag,
      inputCoder,
      outputCoderMap,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example #10
Source File: StatefulDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner(
    DoFn<KV<String, Integer>, Integer> fn, @Nullable OutputManager outputManager) {
  return new SimpleDoFnRunner<>(
      null,
      fn,
      NullSideInputReader.empty(),
      MoreObjects.firstNonNull(outputManager, discardingOutputManager()),
      outputTag,
      Collections.emptyList(),
      mockStepContext,
      null,
      Collections.emptyMap(),
      WINDOWING_STRATEGY,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #11
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 6 votes vote down vote up
DoFnOperator<InputT, OutputT> createDoFnOperator(
DoFn<InputT, OutputT> doFn,
String stepName,
List<PCollectionView<?>> sideInputs,
TupleTag<OutputT> mainOutputTag,
List<TupleTag<?>> additionalOutputTags,
FlinkStreamingTranslationContext context,
WindowingStrategy<?, ?> windowingStrategy,
Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags,
Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders,
Map<TupleTag<?>, Integer> tagsToIds,
Coder<WindowedValue<InputT>> windowedInputCoder,
Map<TupleTag<?>, Coder<?>> outputCoders,
Coder keyCoder,
KeySelector<WindowedValue<InputT>, ?> keySelector,
Map<Integer, PCollectionView<?>> transformedSideInputs,
DoFnSchemaInformation doFnSchemaInformation,
Map<String, PCollectionView<?>> sideInputMapping);
 
Example #12
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testStartBundle() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new StartBundleDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example #13
Source File: DoFnInfo.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link DoFnInfo} for the given {@link DoFn}.
 *
 * <p>This method exists for backwards compatibility with the Dataflow runner. Once the Dataflow
 * runner has been updated to use the new constructor, remove this one.
 */
public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  return new DoFnInfo<>(
      doFn,
      windowingStrategy,
      sideInputViews,
      inputCoder,
      Collections.emptyMap(),
      mainOutput,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example #14
Source File: CombineValuesFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn =
      new ExtractOutputDoFn<>(combineFnRunner, sideInputReader);

  KvCoder<K, AccumT> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder());
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #15
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testProcessContext() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new ProcessContextDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example #16
Source File: ParDoTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFinishBundle() throws Exception {
  Pipeline p = Pipeline.create();
  SdkComponents sdkComponents = SdkComponents.create();
  sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
  ParDoPayload payload =
      ParDoTranslation.translateParDo(
          ParDo.of(new FinishBundleDoFn())
              .withOutputTags(new TupleTag<>(), TupleTagList.empty()),
          PCollection.createPrimitiveOutputInternal(
              p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()),
          DoFnSchemaInformation.create(),
          TestPipeline.create(),
          sdkComponents);

  assertTrue(payload.getRequestsFinalization());
}
 
Example #17
Source File: FlinkStatefulDoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkStatefulDoFnFunction(
    DoFn<KV<K, V>, OutputT> dofn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<KV<K, V>> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.dofn = dofn;
  this.stepName = stepName;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #18
Source File: DoFnInstanceManagersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCloningPoolTearsDownAfterAbort() throws Exception {
  DoFnInfo<?, ?> info =
      DoFnInfo.forFn(
          initialFn,
          WindowingStrategy.globalDefault(),
          null /* side input views */,
          null /* input coder */,
          new TupleTag<>(PropertyNames.OUTPUT) /* main output id */,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  DoFnInstanceManager mgr = DoFnInstanceManagers.cloningPool(info);
  DoFnInfo<?, ?> retrievedInfo = mgr.get();

  mgr.abort(retrievedInfo);
  TestFn fn = (TestFn) retrievedInfo.getDoFn();
  assertThat(fn.tornDown, is(true));

  DoFnInfo<?, ?> afterAbortInfo = mgr.get();
  assertThat(afterAbortInfo, not(Matchers.<DoFnInfo<?, ?>>theInstance(retrievedInfo)));
  assertThat(afterAbortInfo.getDoFn(), not(theInstance(retrievedInfo.getDoFn())));
  assertThat(((TestFn) afterAbortInfo.getDoFn()).tornDown, is(false));
}
 
Example #19
Source File: FlinkPipelineOptionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test(expected = Exception.class)
public void parDoBaseClassPipelineOptionsNullTest() {
  TupleTag<String> mainTag = new TupleTag<>("main-output");
  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
  new DoFnOperator<>(
      new TestDoFn(),
      "stepName",
      coder,
      Collections.emptyMap(),
      mainTag,
      Collections.emptyList(),
      new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder),
      WindowingStrategy.globalDefault(),
      new HashMap<>(),
      Collections.emptyList(),
      null,
      null, /* key coder */
      null /* key selector */,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #20
Source File: SimpleDoFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFinishBundleExceptionsWrappedAsUserCodeException() {
  ThrowingDoFn fn = new ThrowingDoFn();
  DoFnRunner<String, String> runner =
      new SimpleDoFnRunner<>(
          null,
          fn,
          NullSideInputReader.empty(),
          null,
          null,
          Collections.emptyList(),
          mockStepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(new GlobalWindows()),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  thrown.expect(UserCodeException.class);
  thrown.expectCause(is(fn.exceptionToThrow));

  runner.finishBundle();
}
 
Example #21
Source File: CombineValuesFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo(
    AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) {
  GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner =
      GlobalCombineFnRunners.create(combineFn.getFn());
  DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> doFn =
      new CombineValuesDoFn<>(combineFnRunner, sideInputReader);

  Coder<KV<K, Iterable<InputT>>> inputCoder = null;
  if (combineFn.getKvCoder() != null) {
    inputCoder =
        KvCoder.of(
            combineFn.getKvCoder().getKeyCoder(),
            IterableCoder.of(combineFn.getKvCoder().getValueCoder()));
  }
  return DoFnInfo.forFn(
      doFn,
      combineFn.getWindowingStrategy(),
      combineFn.getSideInputViews(),
      inputCoder,
      Collections.emptyMap(), // Not needed here.
      new TupleTag<>(PropertyNames.OUTPUT),
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #22
Source File: DoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testSingleOutput() throws Exception {

  Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());

  TupleTag<String> outputTag = new TupleTag<>("main-output");

  DoFnOperator<String, String> doFnOperator =
      new DoFnOperator<>(
          new IdentityDoFn<>(),
          "stepName",
          coder,
          Collections.emptyMap(),
          outputTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
          WindowingStrategy.globalDefault(),
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          PipelineOptionsFactory.as(FlinkPipelineOptions.class),
          null,
          null,
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness =
      new OneInputStreamOperatorTestHarness<>(doFnOperator);

  testHarness.open();

  testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello")));

  assertThat(
      stripStreamRecordFromWindowedValue(testHarness.getOutput()),
      contains(WindowedValue.valueInGlobalWindow("Hello")));

  testHarness.close();
}
 
Example #23
Source File: ParDoMultiOverrideFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
public GbkThenStatefulParDo(
    DoFn<KV<K, InputT>, OutputT> doFn,
    TupleTag<OutputT> mainOutputTag,
    TupleTagList additionalOutputTags,
    List<PCollectionView<?>> sideInputs,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.additionalOutputTags = additionalOutputTags;
  this.mainOutputTag = mainOutputTag;
  this.sideInputs = sideInputs;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #24
Source File: StatefulParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
public Supplier(
    String stepId,
    String ownerId,
    DoFn<KV<?, ?>, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Set<TupleTag<OutputT>> allOutputTags,
    Coder<KV<?, ?>> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<KV<?, ?>> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    List<PCollectionView<?>> sideInputs) {
  super(
      stepId,
      ownerId,
      doFn,
      windowingStrategy,
      doFnSchemaInformation,
      pipelineOptions,
      mainOutputTag,
      allOutputTags,
      inputCoder,
      sideInputCoders,
      outputCoders,
      inputValueCoder,
      outputValueCoders,
      sideInputs);
}
 
Example #25
Source File: ParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
public Supplier(
    String stepId,
    String ownerId,
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Set<TupleTag<OutputT>> allOutputTags,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    List<PCollectionView<?>> sideInputs) {
  super(
      stepId,
      ownerId,
      doFn,
      windowingStrategy,
      doFnSchemaInformation,
      pipelineOptions,
      mainOutputTag,
      allOutputTags,
      inputCoder,
      sideInputCoders,
      outputCoders,
      inputValueCoder,
      outputValueCoders,
      sideInputs);
}
 
Example #26
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
AbstractSupplier(
    String stepId,
    String ownerId,
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Set<TupleTag<OutputT>> allOutputTags,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    List<PCollectionView<?>> sideInputs) {
  this.stepId = stepId;
  this.ownerId = ownerId;
  this.pipelineOptions = pipelineOptions;
  this.doFn = doFn;
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals =
      allOutputTags.stream()
          .collect(Collectors.toMap(Function.identity(), t -> new ArrayList<>()));
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders = sideInputCoders;
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.sideInputs = sideInputs;
}
 
Example #27
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
protected abstract DoFnRunner<InputT, OutputT> getDoFnRunner(
PipelineOptions pipelineOptions,
DoFn<InputT, OutputT> doFn,
SideInputReader sideInputReader,
JetOutputManager outputManager,
TupleTag<OutputT> mainOutputTag,
List<TupleTag<?>> additionalOutputTags,
Coder<InputT> inputValueCoder,
Map<TupleTag<?>, Coder<?>> outputValueCoders,
WindowingStrategy<?, ?> windowingStrategy,
DoFnSchemaInformation doFnSchemaInformation,
Map<String, PCollectionView<?>> sideInputMapping);
 
Example #28
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
AbstractParDoP(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  this.pipelineOptions = pipelineOptions;
  this.doFn = Utils.serde(doFn);
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals = outputCollToOrdinals;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders =
      sideInputCoders.entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey,
                  e ->
                      Utils.deriveIterableValueCoder(
                          (WindowedValue.FullWindowedValueCoder) e.getValue())));
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.ordinalToSideInput = ordinalToSideInput;
  this.ownerId = ownerId;
  this.stepId = stepId;
  this.cooperative = isCooperativenessAllowed(pipelineOptions) && hasOutput();
}
 
Example #29
Source File: SimpleDoFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Demonstrates that attempting to output an element with a timestamp before the current one
 * always succeeds when {@link DoFn#getAllowedTimestampSkew()} is equal to {@link Long#MAX_VALUE}
 * milliseconds.
 */
@Test
public void testInfiniteSkew() {
  SkewingDoFn fn = new SkewingDoFn(Duration.millis(Long.MAX_VALUE));
  DoFnRunner<Duration, Duration> runner =
      new SimpleDoFnRunner<>(
          null,
          fn,
          NullSideInputReader.empty(),
          new ListOutputManager(),
          new TupleTag<>(),
          Collections.emptyList(),
          mockStepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(new GlobalWindows()),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  runner.startBundle();
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(Duration.millis(1L), new Instant(0)));
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(
          Duration.millis(1L), BoundedWindow.TIMESTAMP_MIN_VALUE.plus(Duration.millis(1))));
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(
          // This is the maximum amount a timestamp in beam can move (from the maximum timestamp
          // to the minimum timestamp).
          Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis())
              .minus(Duration.millis(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis())),
          BoundedWindow.TIMESTAMP_MAX_VALUE));
}
 
Example #30
Source File: SimpleDoFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Demonstrates that attempting to output an element before the timestamp of the current element
 * plus the value of {@link DoFn#getAllowedTimestampSkew()} throws, but between that value and the
 * current timestamp succeeds.
 */
@Test
public void testSkew() {
  SkewingDoFn fn = new SkewingDoFn(Duration.standardMinutes(10L));
  DoFnRunner<Duration, Duration> runner =
      new SimpleDoFnRunner<>(
          null,
          fn,
          NullSideInputReader.empty(),
          new ListOutputManager(),
          new TupleTag<>(),
          Collections.emptyList(),
          mockStepContext,
          null,
          Collections.emptyMap(),
          WindowingStrategy.of(new GlobalWindows()),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  runner.startBundle();
  // Outputting between "now" and "now - allowed skew" succeeds.
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(Duration.standardMinutes(5L), new Instant(0)));
  thrown.expect(UserCodeException.class);
  thrown.expectCause(isA(IllegalArgumentException.class));
  thrown.expectMessage("must be no earlier");
  thrown.expectMessage(
      String.format("timestamp of the current input (%s)", new Instant(0).toString()));
  thrown.expectMessage(
      String.format(
          "the allowed skew (%s)",
          PeriodFormat.getDefault().print(Duration.standardMinutes(10L).toPeriod())));
  // Outputting before "now - allowed skew" fails.
  runner.processElement(
      WindowedValue.timestampedValueInGlobalWindow(Duration.standardHours(1L), new Instant(0)));
}