Java Code Examples for org.apache.beam.sdk.transforms.reflect.DoFnInvokers#invokerFor()
The following examples show how to use
org.apache.beam.sdk.transforms.reflect.DoFnInvokers#invokerFor() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DoTransform.java From nemo with Apache License 2.0 | 5 votes |
@Override public void onData(final Iterator<I> elements, final String srcVertexId) { final StartBundleContext startBundleContext = new StartBundleContext(doFn, serializedOptions); final FinishBundleContext finishBundleContext = new FinishBundleContext(doFn, outputCollector, serializedOptions); final ProcessContext processContext = new ProcessContext(doFn, outputCollector, sideInputs, serializedOptions); final DoFnInvoker invoker = DoFnInvokers.invokerFor(doFn); invoker.invokeSetup(); invoker.invokeStartBundle(startBundleContext); elements.forEachRemaining(element -> { // No need to check for input index, since it is always 0 for DoTransform processContext.setElement(element); invoker.invokeProcessElement(processContext); }); invoker.invokeFinishBundle(finishBundleContext); invoker.invokeTeardown(); }
Example 2
Source File: DoFnTester.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private void initializeState() throws Exception { checkState(state == State.UNINITIALIZED, "Already initialized"); checkState(fn == null, "Uninitialized but fn != null"); if (cloningBehavior.equals(CloningBehavior.DO_NOT_CLONE)) { fn = origFn; } else { fn = (DoFn<InputT, OutputT>) SerializableUtils.deserializeFromByteArray( SerializableUtils.serializeToByteArray(origFn), origFn.toString()); } fnInvoker = DoFnInvokers.invokerFor(fn); fnInvoker.invokeSetup(); }
Example 3
Source File: AbstractParDoP.java From beam with Apache License 2.0 | 5 votes |
@Override public void init(@Nonnull Outbox outbox, @Nonnull Context context) { this.outbox = outbox; this.metricsContainer = new JetMetricsContainer(stepId, ownerId, context); doFnInvoker = DoFnInvokers.invokerFor(doFn); doFnInvoker.invokeSetup(); if (ordinalToSideInput.isEmpty()) { sideInputReader = NullSideInputReader.of(Collections.emptyList()); } else { bufferedItems = new SimpleInbox(); sideInputHandler = new SideInputHandler(ordinalToSideInput.values(), InMemoryStateInternals.forKey(null)); sideInputReader = sideInputHandler; } outputManager = new JetOutputManager(outbox, outputCoders, outputCollToOrdinals); doFnRunner = getDoFnRunner( pipelineOptions.get(), doFn, sideInputReader, outputManager, mainOutputTag, Lists.newArrayList(outputCollToOrdinals.keySet()), inputValueCoder, outputValueCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping); }
Example 4
Source File: AbstractDoFnTransform.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public final void prepare(final Context context, final OutputCollector<WindowedValue<OutputT>> oc) { // deserialize pipeline option final NemoPipelineOptions options = serializedOptions.get().as(NemoPipelineOptions.class); this.outputCollector = wrapOutputCollector(oc); this.bundleMillis = options.getMaxBundleTimeMills(); this.bundleSize = options.getMaxBundleSize(); // create output manager outputManager = new DefaultOutputManager<>(outputCollector, mainOutputTag); // create side input reader sideInputReader = new InMemorySideInputReader(new ArrayList<>(sideInputs.values())); // this transform does not support state and timer. final StepContext stepContext = new StepContext() { @Override public StateInternals stateInternals() { throw new UnsupportedOperationException("Not support stateInternals in DoFnTransform"); } @Override public TimerInternals timerInternals() { throw new UnsupportedOperationException("Not support timerInternals in DoFnTransform"); } }; final DoFn wrappedDoFn = wrapDoFn(doFn); // invoker doFnInvoker = DoFnInvokers.invokerFor(wrappedDoFn); doFnInvoker.invokeSetup(); // DoFnRunners.simpleRunner takes care of all the hard stuff of running the DoFn // and that this approach is the standard used by most of the Beam runners doFnRunner = DoFnRunners.simpleRunner( options, wrappedDoFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping); pushBackRunner = sideInputs.isEmpty() ? null : SimplePushbackSideInputDoFnRunner.<InterT, OutputT>create(doFnRunner, sideInputs.values(), sideInputReader); }
Example 5
Source File: ReadTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testInstantiationOfBoundedSourceAsSDFWrapper() { DoFn dofn = new Read.BoundedSourceAsSDFWrapperFn<>(); DoFnInvokers.invokerFor(dofn); }
Example 6
Source File: SplittableParDo.java From beam with Apache License 2.0 | 4 votes |
@Setup public void setup() { invoker = DoFnInvokers.invokerFor(fn); invoker.invokeSetup(); }
Example 7
Source File: SplittableParDo.java From beam with Apache License 2.0 | 4 votes |
@Setup public void setup() { invoker = DoFnInvokers.invokerFor(splittableFn); invoker.invokeSetup(); }
Example 8
Source File: SplittableParDoNaiveBounded.java From beam with Apache License 2.0 | 4 votes |
@Setup public void setup() { this.invoker = DoFnInvokers.invokerFor(fn); invoker.invokeSetup(); }
Example 9
Source File: SplittableParDoViaKeyedWorkItems.java From beam with Apache License 2.0 | 4 votes |
@Setup public void setup() throws Exception { invoker = DoFnInvokers.invokerFor(fn); invoker.invokeSetup(); }
Example 10
Source File: SimpleDoFnRunner.java From beam with Apache License 2.0 | 4 votes |
/** Constructor. */ public SimpleDoFnRunner( PipelineOptions options, DoFn<InputT, OutputT> fn, SideInputReader sideInputReader, OutputManager outputManager, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, StepContext stepContext, @Nullable Coder<InputT> inputCoder, Map<TupleTag<?>, Coder<?>> outputCoders, WindowingStrategy<?, ?> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { this.options = options; this.fn = fn; this.signature = DoFnSignatures.getSignature(fn.getClass()); this.observesWindow = signature.processElement().observesWindow() || !sideInputReader.isEmpty(); this.invoker = DoFnInvokers.invokerFor(fn); this.sideInputReader = sideInputReader; this.schemaCoder = (inputCoder instanceof SchemaCoder) ? (SchemaCoder<InputT>) inputCoder : null; this.outputCoders = outputCoders; if (outputCoders != null && !outputCoders.isEmpty()) { Coder<OutputT> outputCoder = (Coder<OutputT>) outputCoders.get(mainOutputTag); mainOutputSchemaCoder = (outputCoder instanceof SchemaCoder) ? (SchemaCoder<OutputT>) outputCoder : null; } else { mainOutputSchemaCoder = null; } this.outputManager = outputManager; this.mainOutputTag = mainOutputTag; this.outputTags = Sets.newHashSet(FluentIterable.<TupleTag<?>>of(mainOutputTag).append(additionalOutputTags)); this.stepContext = stepContext; // This is a cast of an _invariant_ coder. But we are assured by pipeline validation // that it really is the coder for whatever BoundedWindow subclass is provided @SuppressWarnings("unchecked") Coder<BoundedWindow> untypedCoder = (Coder<BoundedWindow>) windowingStrategy.getWindowFn().windowCoder(); this.windowCoder = untypedCoder; this.allowedLateness = windowingStrategy.getAllowedLateness(); this.doFnSchemaInformation = doFnSchemaInformation; this.sideInputMapping = sideInputMapping; }
Example 11
Source File: DoFnOperator.java From beam with Apache License 2.0 | 4 votes |
@Override public void open() throws Exception { // WindowDoFnOperator need use state and timer to get DoFn. // So must wait StateInternals and TimerInternals ready. // This will be called after initializeState() this.doFn = getDoFn(); doFnInvoker = DoFnInvokers.invokerFor(doFn); doFnInvoker.invokeSetup(); FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class); StepContext stepContext = new FlinkStepContext(); doFnRunner = DoFnRunners.simpleRunner( options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping); if (requiresStableInput) { // put this in front of the root FnRunner before any additional wrappers doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create( doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints()); } doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext); earlyBindStateIfNeeded(); if (!options.getDisableMetrics()) { flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext()); doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer); String checkpointMetricNamespace = options.getReportCheckpointDuration(); if (checkpointMetricNamespace != null) { MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration"); checkpointStats = new CheckpointStats( () -> flinkMetricContainer .getMetricsContainer(stepName) .getDistribution(checkpointMetric)); } } elementCount = 0L; lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime(); // Schedule timer to check timeout of finish bundle. long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1); checkFinishBundleTimer = getProcessingTimeService() .scheduleAtFixedRate( timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod); if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) { pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler); } else { pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler); } }
Example 12
Source File: DoFnOp.java From beam with Apache License 2.0 | 4 votes |
@Override public void open( Config config, Context context, Scheduler<KeyedTimerData<Void>> timerRegistry, OpEmitter<OutT> emitter) { this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE; this.currentBundleElementCount = new AtomicLong(0L); this.bundleStartTime = new AtomicLong(Long.MAX_VALUE); this.isBundleStarted = new AtomicBoolean(false); this.bundleWatermarkHold = null; final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext(); this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions(); this.maxBundleSize = samzaPipelineOptions.getMaxBundleSize(); this.maxBundleTimeMs = samzaPipelineOptions.getMaxBundleTimeMs(); this.bundleTimerScheduler = timerRegistry; if (this.maxBundleSize > 1) { scheduleNextBundleCheck(); } final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createStateInternalFactory( transformId, null, context.getTaskContext(), samzaPipelineOptions, signature); this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory( keyCoder, (Scheduler) timerRegistry, getTimerStateId(signature), nonKeyedStateInternalsFactory, windowingStrategy, isBounded, samzaPipelineOptions); this.sideInputHandler = new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null)); if (isPortable) { // storing events within a bundle in states final BagState<WindowedValue<InT>> bundledEventsBagState = nonKeyedStateInternalsFactory .stateInternalsForKey(null) .state(StateNamespaces.global(), StateTags.bag(bundleStateId, windowedValueCoder)); final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload); stageBundleFactory = samzaExecutionContext.getJobBundleFactory().forStage(executableStage); this.fnRunner = SamzaDoFnRunners.createPortable( samzaPipelineOptions, bundledEventsBagState, outputManagerFactory.create(emitter), stageBundleFactory, mainOutputTag, idToTupleTagMap, context, transformFullName); } else { this.fnRunner = SamzaDoFnRunners.create( samzaPipelineOptions, doFn, windowingStrategy, transformFullName, transformId, context, mainOutputTag, sideInputHandler, timerInternalsFactory, keyCoder, outputManagerFactory.create(emitter), inputCoder, sideOutputTags, outputCoders, doFnSchemaInformation, sideInputMapping); } this.pushbackFnRunner = SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler); this.pushbackValues = new ArrayList<>(); final Iterator<SamzaDoFnInvokerRegistrar> invokerReg = ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator(); if (!invokerReg.hasNext()) { // use the default invoker here doFnInvoker = DoFnInvokers.invokerFor(doFn); } else { doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerFor(doFn, context); } doFnInvoker.invokeSetup(); }
Example 13
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <InputT, OutputT> void translateMultiHelper( ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "ParallelDo"); DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputs(transform).entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs().values(), context); translateOutputs(context.getOutputs(transform), stepContext); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), context.getInput(transform).getWindowingStrategy(), transform.getSideInputs().values(), context.getInput(transform).getCoder(), context, transform.getMainOutputTag(), outputCoders, doFnSchemaInformation, sideInputMapping); // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is // removed. if (context.isFnApi()) { DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn()); if (signature.processElement().isSplittable()) { DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn()); Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of( doFnInvoker.invokeGetRestrictionCoder( context.getInput(transform).getPipeline().getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder( context.getInput(transform).getPipeline().getCoderRegistry())); stepContext.addInput( PropertyNames.RESTRICTION_ENCODING, translateCoder(restrictionAndWatermarkStateCoder, context)); } } }
Example 14
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <InputT, OutputT> void translateSingleHelper( ParDoSingle<InputT, OutputT> transform, TranslationContext context) { DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); StepTranslationContext stepContext = context.addStep(transform, "ParallelDo"); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputs(transform).entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs().values(), context); stepContext.addOutput( transform.getMainOutputTag().getId(), context.getOutput(transform)); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), context.getInput(transform).getWindowingStrategy(), transform.getSideInputs().values(), context.getInput(transform).getCoder(), context, transform.getMainOutputTag(), outputCoders, doFnSchemaInformation, sideInputMapping); // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is // removed. if (context.isFnApi()) { DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn()); if (signature.processElement().isSplittable()) { DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn()); Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of( doFnInvoker.invokeGetRestrictionCoder( context.getInput(transform).getPipeline().getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder( context.getInput(transform).getPipeline().getCoderRegistry())); stepContext.addInput( PropertyNames.RESTRICTION_ENCODING, translateCoder(restrictionAndWatermarkStateCoder, context)); } } }