org.apache.beam.sdk.transforms.DoFnSchemaInformation Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.DoFnSchemaInformation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DoFnInfo.java From beam with Apache License 2.0 | 6 votes |
/** Creates a {@link DoFnInfo} for the given {@link DoFn}. */ public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn( DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, Iterable<PCollectionView<?>> sideInputViews, Coder<InputT> inputCoder, Map<TupleTag<?>, Coder<?>> outputCoders, TupleTag<OutputT> mainOutput, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { return new DoFnInfo<>( doFn, windowingStrategy, sideInputViews, inputCoder, outputCoders, mainOutput, doFnSchemaInformation, sideInputMapping); }
Example #2
Source File: StreamingSideInputDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow> createRunner( WindowFn<?, ?> windowFn, DoFnRunners.OutputManager outputManager, List<PCollectionView<String>> views, StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher) throws Exception { DoFnRunner<String, String> simpleDoFnRunner = DoFnRunners.simpleRunner( PipelineOptionsFactory.create(), new SideInputFn(views), mockSideInputReader, outputManager, mainOutputTag, Arrays.<TupleTag<?>>asList(), stepContext, null, Collections.emptyMap(), WindowingStrategy.of(windowFn), DoFnSchemaInformation.create(), Collections.emptyMap()); return new StreamingSideInputDoFnRunner<>(simpleDoFnRunner, sideInputFetcher); }
Example #3
Source File: ParDoTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testToProto() throws Exception { SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); ParDoPayload payload = ParDoTranslation.translateParDo( parDo, PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, KvCoder.of(VarLongCoder.of(), StringUtf8Coder.of())), DoFnSchemaInformation.create(), p, components); assertThat(ParDoTranslation.getDoFn(payload), equalTo(parDo.getFn())); assertThat(ParDoTranslation.getMainOutputTag(payload), equalTo(parDo.getMainOutputTag())); for (PCollectionView<?> view : parDo.getSideInputs().values()) { payload.getSideInputsOrThrow(view.getTagInternal().getId()); } assertFalse(payload.getRequestsFinalization()); assertEquals( parDo.getFn() instanceof StateTimerDropElementsFn, components.requirements().contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN)); }
Example #4
Source File: FlinkStatefulDoFnFunctionTest.java From beam with Apache License 2.0 | 6 votes |
public TestDoFnFunction( String stepName, WindowingStrategy windowingStrategy, Map sideInputs, PipelineOptions options, Map outputMap, TupleTag mainOutputTag, Coder inputCoder, Map outputCoderMap, DoFnSchemaInformation doFnSchemaInformation, Map sideInputMapping) { super( new IdentityFn(), stepName, windowingStrategy, sideInputs, options, outputMap, mainOutputTag, inputCoder, outputCoderMap, doFnSchemaInformation, sideInputMapping); }
Example #5
Source File: DoFnInstanceManagersTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testInstanceIgnoresAbort() throws Exception { DoFnInfo<?, ?> info = DoFnInfo.forFn( initialFn, WindowingStrategy.globalDefault(), null /* side input views */, null /* input coder */, new TupleTag<>(PropertyNames.OUTPUT) /* main output id */, DoFnSchemaInformation.create(), Collections.emptyMap()); DoFnInstanceManager mgr = DoFnInstanceManagers.singleInstance(info); mgr.abort(mgr.get()); // TestFn#teardown would fail the test after multiple calls mgr.abort(mgr.get()); // The returned info is still the initial info assertThat(mgr.get(), Matchers.<DoFnInfo<?, ?>>theInstance(info)); assertThat(mgr.get().getDoFn(), theInstance(initialFn)); }
Example #6
Source File: ParDoTranslation.java From beam with Apache License 2.0 | 6 votes |
public static ParDoPayload translateParDo( AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException { final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform(); final Pipeline pipeline = appliedPTransform.getPipeline(); final DoFn<?, ?> doFn = parDo.getFn(); // Get main input. Set<String> allInputs = appliedPTransform.getInputs().keySet().stream() .map(TupleTag::getId) .collect(Collectors.toSet()); Set<String> sideInputs = parDo.getSideInputs().values().stream() .map(s -> s.getTagInternal().getId()) .collect(Collectors.toSet()); String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs)); PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName)); final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput); return translateParDo( (ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components); }
Example #7
Source File: DoFnInstanceManagersTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCloningPoolReusesAfterComplete() throws Exception { DoFnInfo<?, ?> info = DoFnInfo.forFn( initialFn, WindowingStrategy.globalDefault(), null /* side input views */, null /* input coder */, new TupleTag<>(PropertyNames.OUTPUT) /* main output id */, DoFnSchemaInformation.create(), Collections.emptyMap()); DoFnInstanceManager mgr = DoFnInstanceManagers.cloningPool(info); DoFnInfo<?, ?> retrievedInfo = mgr.get(); assertThat(retrievedInfo, not(Matchers.<DoFnInfo<?, ?>>theInstance(info))); assertThat(retrievedInfo.getDoFn(), not(theInstance(info.getDoFn()))); mgr.complete(retrievedInfo); DoFnInfo<?, ?> afterCompleteInfo = mgr.get(); assertThat(afterCompleteInfo, Matchers.<DoFnInfo<?, ?>>theInstance(retrievedInfo)); assertThat(afterCompleteInfo.getDoFn(), theInstance(retrievedInfo.getDoFn())); }
Example #8
Source File: SimplePushbackSideInputDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner( DoFn<KV<String, Integer>, Integer> fn) { return new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WINDOWING_STRATEGY, DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #9
Source File: FlinkDoFnFunctionTest.java From beam with Apache License 2.0 | 6 votes |
public TestDoFnFunction( String stepName, WindowingStrategy windowingStrategy, Map sideInputs, PipelineOptions options, Map outputMap, TupleTag mainOutputTag, Coder inputCoder, Map outputCoderMap, DoFnSchemaInformation doFnSchemaInformation, Map sideInputMapping) { super( new IdentityFn(), stepName, windowingStrategy, sideInputs, options, outputMap, mainOutputTag, inputCoder, outputCoderMap, doFnSchemaInformation, sideInputMapping); }
Example #10
Source File: StatefulDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private DoFnRunner<KV<String, Integer>, Integer> getDoFnRunner( DoFn<KV<String, Integer>, Integer> fn, @Nullable OutputManager outputManager) { return new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), MoreObjects.firstNonNull(outputManager, discardingOutputManager()), outputTag, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WINDOWING_STRATEGY, DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #11
Source File: FlinkStreamingTransformTranslators.java From beam with Apache License 2.0 | 6 votes |
DoFnOperator<InputT, OutputT> createDoFnOperator( DoFn<InputT, OutputT> doFn, String stepName, List<PCollectionView<?>> sideInputs, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, FlinkStreamingTranslationContext context, WindowingStrategy<?, ?> windowingStrategy, Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags, Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders, Map<TupleTag<?>, Integer> tagsToIds, Coder<WindowedValue<InputT>> windowedInputCoder, Map<TupleTag<?>, Coder<?>> outputCoders, Coder keyCoder, KeySelector<WindowedValue<InputT>, ?> keySelector, Map<Integer, PCollectionView<?>> transformedSideInputs, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping);
Example #12
Source File: ParDoTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testStartBundle() throws Exception { Pipeline p = Pipeline.create(); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); ParDoPayload payload = ParDoTranslation.translateParDo( ParDo.of(new StartBundleDoFn()) .withOutputTags(new TupleTag<>(), TupleTagList.empty()), PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()), DoFnSchemaInformation.create(), TestPipeline.create(), sdkComponents); assertTrue(payload.getRequestsFinalization()); }
Example #13
Source File: DoFnInfo.java From beam with Apache License 2.0 | 6 votes |
/** * Creates a {@link DoFnInfo} for the given {@link DoFn}. * * <p>This method exists for backwards compatibility with the Dataflow runner. Once the Dataflow * runner has been updated to use the new constructor, remove this one. */ public static <InputT, OutputT> DoFnInfo<InputT, OutputT> forFn( DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, Iterable<PCollectionView<?>> sideInputViews, Coder<InputT> inputCoder, TupleTag<OutputT> mainOutput, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { return new DoFnInfo<>( doFn, windowingStrategy, sideInputViews, inputCoder, Collections.emptyMap(), mainOutput, doFnSchemaInformation, sideInputMapping); }
Example #14
Source File: CombineValuesFnFactory.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo( AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) { GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner = GlobalCombineFnRunners.create(combineFn.getFn()); DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn = new ExtractOutputDoFn<>(combineFnRunner, sideInputReader); KvCoder<K, AccumT> inputCoder = null; if (combineFn.getKvCoder() != null) { inputCoder = KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder()); } return DoFnInfo.forFn( doFn, combineFn.getWindowingStrategy(), combineFn.getSideInputViews(), inputCoder, Collections.emptyMap(), // Not needed here. new TupleTag<>(PropertyNames.OUTPUT), DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #15
Source File: ParDoTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testProcessContext() throws Exception { Pipeline p = Pipeline.create(); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); ParDoPayload payload = ParDoTranslation.translateParDo( ParDo.of(new ProcessContextDoFn()) .withOutputTags(new TupleTag<>(), TupleTagList.empty()), PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()), DoFnSchemaInformation.create(), TestPipeline.create(), sdkComponents); assertTrue(payload.getRequestsFinalization()); }
Example #16
Source File: ParDoTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFinishBundle() throws Exception { Pipeline p = Pipeline.create(); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); ParDoPayload payload = ParDoTranslation.translateParDo( ParDo.of(new FinishBundleDoFn()) .withOutputTags(new TupleTag<>(), TupleTagList.empty()), PCollection.createPrimitiveOutputInternal( p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()), DoFnSchemaInformation.create(), TestPipeline.create(), sdkComponents); assertTrue(payload.getRequestsFinalization()); }
Example #17
Source File: FlinkStatefulDoFnFunction.java From beam with Apache License 2.0 | 6 votes |
public FlinkStatefulDoFnFunction( DoFn<KV<K, V>, OutputT> dofn, String stepName, WindowingStrategy<?, ?> windowingStrategy, Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs, PipelineOptions pipelineOptions, Map<TupleTag<?>, Integer> outputMap, TupleTag<OutputT> mainOutputTag, Coder<KV<K, V>> inputCoder, Map<TupleTag<?>, Coder<?>> outputCoderMap, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { this.dofn = dofn; this.stepName = stepName; this.windowingStrategy = windowingStrategy; this.sideInputs = sideInputs; this.serializedOptions = new SerializablePipelineOptions(pipelineOptions); this.outputMap = outputMap; this.mainOutputTag = mainOutputTag; this.inputCoder = inputCoder; this.outputCoderMap = outputCoderMap; this.doFnSchemaInformation = doFnSchemaInformation; this.sideInputMapping = sideInputMapping; }
Example #18
Source File: DoFnInstanceManagersTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCloningPoolTearsDownAfterAbort() throws Exception { DoFnInfo<?, ?> info = DoFnInfo.forFn( initialFn, WindowingStrategy.globalDefault(), null /* side input views */, null /* input coder */, new TupleTag<>(PropertyNames.OUTPUT) /* main output id */, DoFnSchemaInformation.create(), Collections.emptyMap()); DoFnInstanceManager mgr = DoFnInstanceManagers.cloningPool(info); DoFnInfo<?, ?> retrievedInfo = mgr.get(); mgr.abort(retrievedInfo); TestFn fn = (TestFn) retrievedInfo.getDoFn(); assertThat(fn.tornDown, is(true)); DoFnInfo<?, ?> afterAbortInfo = mgr.get(); assertThat(afterAbortInfo, not(Matchers.<DoFnInfo<?, ?>>theInstance(retrievedInfo))); assertThat(afterAbortInfo.getDoFn(), not(theInstance(retrievedInfo.getDoFn()))); assertThat(((TestFn) afterAbortInfo.getDoFn()).tornDown, is(false)); }
Example #19
Source File: FlinkPipelineOptionsTest.java From beam with Apache License 2.0 | 6 votes |
@Test(expected = Exception.class) public void parDoBaseClassPipelineOptionsNullTest() { TupleTag<String> mainTag = new TupleTag<>("main-output"); Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()); new DoFnOperator<>( new TestDoFn(), "stepName", coder, Collections.emptyMap(), mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, coder), WindowingStrategy.globalDefault(), new HashMap<>(), Collections.emptyList(), null, null, /* key coder */ null /* key selector */, DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #20
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFinishBundleExceptionsWrappedAsUserCodeException() { ThrowingDoFn fn = new ThrowingDoFn(); DoFnRunner<String, String> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); thrown.expect(UserCodeException.class); thrown.expectCause(is(fn.exceptionToThrow)); runner.finishBundle(); }
Example #21
Source File: CombineValuesFnFactory.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT, OutputT> DoFnInfo<?, ?> createDoFnInfo( AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, SideInputReader sideInputReader) { GlobalCombineFnRunner<InputT, AccumT, OutputT> combineFnRunner = GlobalCombineFnRunners.create(combineFn.getFn()); DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> doFn = new CombineValuesDoFn<>(combineFnRunner, sideInputReader); Coder<KV<K, Iterable<InputT>>> inputCoder = null; if (combineFn.getKvCoder() != null) { inputCoder = KvCoder.of( combineFn.getKvCoder().getKeyCoder(), IterableCoder.of(combineFn.getKvCoder().getValueCoder())); } return DoFnInfo.forFn( doFn, combineFn.getWindowingStrategy(), combineFn.getSideInputViews(), inputCoder, Collections.emptyMap(), // Not needed here. new TupleTag<>(PropertyNames.OUTPUT), DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #22
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") public void testSingleOutput() throws Exception { Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()); TupleTag<String> outputTag = new TupleTag<>("main-output"); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator); testHarness.open(); testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello"))); assertThat( stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("Hello"))); testHarness.close(); }
Example #23
Source File: ParDoMultiOverrideFactory.java From beam with Apache License 2.0 | 5 votes |
public GbkThenStatefulParDo( DoFn<KV<K, InputT>, OutputT> doFn, TupleTag<OutputT> mainOutputTag, TupleTagList additionalOutputTags, List<PCollectionView<?>> sideInputs, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { this.doFn = doFn; this.additionalOutputTags = additionalOutputTags; this.mainOutputTag = mainOutputTag; this.sideInputs = sideInputs; this.doFnSchemaInformation = doFnSchemaInformation; this.sideInputMapping = sideInputMapping; }
Example #24
Source File: StatefulParDoP.java From beam with Apache License 2.0 | 5 votes |
public Supplier( String stepId, String ownerId, DoFn<KV<?, ?>, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, SerializablePipelineOptions pipelineOptions, TupleTag<OutputT> mainOutputTag, Set<TupleTag<OutputT>> allOutputTags, Coder<KV<?, ?>> inputCoder, Map<PCollectionView<?>, Coder<?>> sideInputCoders, Map<TupleTag<?>, Coder<?>> outputCoders, Coder<KV<?, ?>> inputValueCoder, Map<TupleTag<?>, Coder<?>> outputValueCoders, List<PCollectionView<?>> sideInputs) { super( stepId, ownerId, doFn, windowingStrategy, doFnSchemaInformation, pipelineOptions, mainOutputTag, allOutputTags, inputCoder, sideInputCoders, outputCoders, inputValueCoder, outputValueCoders, sideInputs); }
Example #25
Source File: ParDoP.java From beam with Apache License 2.0 | 5 votes |
public Supplier( String stepId, String ownerId, DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, SerializablePipelineOptions pipelineOptions, TupleTag<OutputT> mainOutputTag, Set<TupleTag<OutputT>> allOutputTags, Coder<InputT> inputCoder, Map<PCollectionView<?>, Coder<?>> sideInputCoders, Map<TupleTag<?>, Coder<?>> outputCoders, Coder<InputT> inputValueCoder, Map<TupleTag<?>, Coder<?>> outputValueCoders, List<PCollectionView<?>> sideInputs) { super( stepId, ownerId, doFn, windowingStrategy, doFnSchemaInformation, pipelineOptions, mainOutputTag, allOutputTags, inputCoder, sideInputCoders, outputCoders, inputValueCoder, outputValueCoders, sideInputs); }
Example #26
Source File: AbstractParDoP.java From beam with Apache License 2.0 | 5 votes |
AbstractSupplier( String stepId, String ownerId, DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, SerializablePipelineOptions pipelineOptions, TupleTag<OutputT> mainOutputTag, Set<TupleTag<OutputT>> allOutputTags, Coder<InputT> inputCoder, Map<PCollectionView<?>, Coder<?>> sideInputCoders, Map<TupleTag<?>, Coder<?>> outputCoders, Coder<InputT> inputValueCoder, Map<TupleTag<?>, Coder<?>> outputValueCoders, List<PCollectionView<?>> sideInputs) { this.stepId = stepId; this.ownerId = ownerId; this.pipelineOptions = pipelineOptions; this.doFn = doFn; this.windowingStrategy = windowingStrategy; this.doFnSchemaInformation = doFnSchemaInformation; this.outputCollToOrdinals = allOutputTags.stream() .collect(Collectors.toMap(Function.identity(), t -> new ArrayList<>())); this.mainOutputTag = mainOutputTag; this.inputCoder = inputCoder; this.sideInputCoders = sideInputCoders; this.outputCoders = outputCoders; this.inputValueCoder = inputValueCoder; this.outputValueCoders = outputValueCoders; this.sideInputs = sideInputs; }
Example #27
Source File: AbstractParDoP.java From beam with Apache License 2.0 | 5 votes |
protected abstract DoFnRunner<InputT, OutputT> getDoFnRunner( PipelineOptions pipelineOptions, DoFn<InputT, OutputT> doFn, SideInputReader sideInputReader, JetOutputManager outputManager, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, Coder<InputT> inputValueCoder, Map<TupleTag<?>, Coder<?>> outputValueCoders, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping);
Example #28
Source File: AbstractParDoP.java From beam with Apache License 2.0 | 5 votes |
AbstractParDoP( DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, Map<TupleTag<?>, int[]> outputCollToOrdinals, SerializablePipelineOptions pipelineOptions, TupleTag<OutputT> mainOutputTag, Coder<InputT> inputCoder, Map<PCollectionView<?>, Coder<?>> sideInputCoders, Map<TupleTag<?>, Coder<?>> outputCoders, Coder<InputT> inputValueCoder, Map<TupleTag<?>, Coder<?>> outputValueCoders, Map<Integer, PCollectionView<?>> ordinalToSideInput, String ownerId, String stepId) { this.pipelineOptions = pipelineOptions; this.doFn = Utils.serde(doFn); this.windowingStrategy = windowingStrategy; this.doFnSchemaInformation = doFnSchemaInformation; this.outputCollToOrdinals = outputCollToOrdinals; this.mainOutputTag = mainOutputTag; this.inputCoder = inputCoder; this.sideInputCoders = sideInputCoders.entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> Utils.deriveIterableValueCoder( (WindowedValue.FullWindowedValueCoder) e.getValue()))); this.outputCoders = outputCoders; this.inputValueCoder = inputValueCoder; this.outputValueCoders = outputValueCoders; this.ordinalToSideInput = ordinalToSideInput; this.ownerId = ownerId; this.stepId = stepId; this.cooperative = isCooperativenessAllowed(pipelineOptions) && hasOutput(); }
Example #29
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Demonstrates that attempting to output an element with a timestamp before the current one * always succeeds when {@link DoFn#getAllowedTimestampSkew()} is equal to {@link Long#MAX_VALUE} * milliseconds. */ @Test public void testInfiniteSkew() { SkewingDoFn fn = new SkewingDoFn(Duration.millis(Long.MAX_VALUE)); DoFnRunner<Duration, Duration> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), new ListOutputManager(), new TupleTag<>(), Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); runner.startBundle(); runner.processElement( WindowedValue.timestampedValueInGlobalWindow(Duration.millis(1L), new Instant(0))); runner.processElement( WindowedValue.timestampedValueInGlobalWindow( Duration.millis(1L), BoundedWindow.TIMESTAMP_MIN_VALUE.plus(Duration.millis(1)))); runner.processElement( WindowedValue.timestampedValueInGlobalWindow( // This is the maximum amount a timestamp in beam can move (from the maximum timestamp // to the minimum timestamp). Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) .minus(Duration.millis(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis())), BoundedWindow.TIMESTAMP_MAX_VALUE)); }
Example #30
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Demonstrates that attempting to output an element before the timestamp of the current element * plus the value of {@link DoFn#getAllowedTimestampSkew()} throws, but between that value and the * current timestamp succeeds. */ @Test public void testSkew() { SkewingDoFn fn = new SkewingDoFn(Duration.standardMinutes(10L)); DoFnRunner<Duration, Duration> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), new ListOutputManager(), new TupleTag<>(), Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); runner.startBundle(); // Outputting between "now" and "now - allowed skew" succeeds. runner.processElement( WindowedValue.timestampedValueInGlobalWindow(Duration.standardMinutes(5L), new Instant(0))); thrown.expect(UserCodeException.class); thrown.expectCause(isA(IllegalArgumentException.class)); thrown.expectMessage("must be no earlier"); thrown.expectMessage( String.format("timestamp of the current input (%s)", new Instant(0).toString())); thrown.expectMessage( String.format( "the allowed skew (%s)", PeriodFormat.getDefault().print(Duration.standardMinutes(10L).toPeriod()))); // Outputting before "now - allowed skew" fails. runner.processElement( WindowedValue.timestampedValueInGlobalWindow(Duration.standardHours(1L), new Instant(0))); }