Java Code Examples for org.apache.beam.sdk.transforms.reflect.DoFnInvokers#invokerFor()

The following examples show how to use org.apache.beam.sdk.transforms.reflect.DoFnInvokers#invokerFor() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DoTransform.java    From nemo with Apache License 2.0 5 votes vote down vote up
@Override
public void onData(final Iterator<I> elements, final String srcVertexId) {
  final StartBundleContext startBundleContext = new StartBundleContext(doFn, serializedOptions);
  final FinishBundleContext finishBundleContext = new FinishBundleContext(doFn, outputCollector, serializedOptions);
  final ProcessContext processContext = new ProcessContext(doFn, outputCollector, sideInputs, serializedOptions);
  final DoFnInvoker invoker = DoFnInvokers.invokerFor(doFn);
  invoker.invokeSetup();
  invoker.invokeStartBundle(startBundleContext);
  elements.forEachRemaining(element -> { // No need to check for input index, since it is always 0 for DoTransform
    processContext.setElement(element);
    invoker.invokeProcessElement(processContext);
  });
  invoker.invokeFinishBundle(finishBundleContext);
  invoker.invokeTeardown();
}
 
Example 2
Source File: DoFnTester.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void initializeState() throws Exception {
  checkState(state == State.UNINITIALIZED, "Already initialized");
  checkState(fn == null, "Uninitialized but fn != null");
  if (cloningBehavior.equals(CloningBehavior.DO_NOT_CLONE)) {
    fn = origFn;
  } else {
    fn =
        (DoFn<InputT, OutputT>)
            SerializableUtils.deserializeFromByteArray(
                SerializableUtils.serializeToByteArray(origFn), origFn.toString());
  }
  fnInvoker = DoFnInvokers.invokerFor(fn);
  fnInvoker.invokeSetup();
}
 
Example 3
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@Nonnull Outbox outbox, @Nonnull Context context) {
  this.outbox = outbox;
  this.metricsContainer = new JetMetricsContainer(stepId, ownerId, context);

  doFnInvoker = DoFnInvokers.invokerFor(doFn);
  doFnInvoker.invokeSetup();

  if (ordinalToSideInput.isEmpty()) {
    sideInputReader = NullSideInputReader.of(Collections.emptyList());
  } else {
    bufferedItems = new SimpleInbox();
    sideInputHandler =
        new SideInputHandler(ordinalToSideInput.values(), InMemoryStateInternals.forKey(null));
    sideInputReader = sideInputHandler;
  }

  outputManager = new JetOutputManager(outbox, outputCoders, outputCollToOrdinals);

  doFnRunner =
      getDoFnRunner(
          pipelineOptions.get(),
          doFn,
          sideInputReader,
          outputManager,
          mainOutputTag,
          Lists.newArrayList(outputCollToOrdinals.keySet()),
          inputValueCoder,
          outputValueCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);
}
 
Example 4
Source File: AbstractDoFnTransform.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public final void prepare(final Context context, final OutputCollector<WindowedValue<OutputT>> oc) {
  // deserialize pipeline option
  final NemoPipelineOptions options = serializedOptions.get().as(NemoPipelineOptions.class);
  this.outputCollector = wrapOutputCollector(oc);

  this.bundleMillis = options.getMaxBundleTimeMills();
  this.bundleSize = options.getMaxBundleSize();

  // create output manager
  outputManager = new DefaultOutputManager<>(outputCollector, mainOutputTag);

  // create side input reader
  sideInputReader = new InMemorySideInputReader(new ArrayList<>(sideInputs.values()));

  // this transform does not support state and timer.
  final StepContext stepContext = new StepContext() {
    @Override
    public StateInternals stateInternals() {
      throw new UnsupportedOperationException("Not support stateInternals in DoFnTransform");
    }

    @Override
    public TimerInternals timerInternals() {
      throw new UnsupportedOperationException("Not support timerInternals in DoFnTransform");
    }
  };

  final DoFn wrappedDoFn = wrapDoFn(doFn);

  // invoker
  doFnInvoker = DoFnInvokers.invokerFor(wrappedDoFn);
  doFnInvoker.invokeSetup();

  // DoFnRunners.simpleRunner takes care of all the hard stuff of running the DoFn
  // and that this approach is the standard used by most of the Beam runners
  doFnRunner = DoFnRunners.simpleRunner(
    options,
    wrappedDoFn,
    sideInputReader,
    outputManager,
    mainOutputTag,
    additionalOutputTags,
    stepContext,
    inputCoder,
    outputCoders,
    windowingStrategy,
    doFnSchemaInformation,
    sideInputMapping);

  pushBackRunner = sideInputs.isEmpty()
    ? null
    : SimplePushbackSideInputDoFnRunner.<InterT, OutputT>create(doFnRunner, sideInputs.values(), sideInputReader);
}
 
Example 5
Source File: ReadTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testInstantiationOfBoundedSourceAsSDFWrapper() {
  DoFn dofn = new Read.BoundedSourceAsSDFWrapperFn<>();
  DoFnInvokers.invokerFor(dofn);
}
 
Example 6
Source File: SplittableParDo.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  invoker = DoFnInvokers.invokerFor(fn);
  invoker.invokeSetup();
}
 
Example 7
Source File: SplittableParDo.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  invoker = DoFnInvokers.invokerFor(splittableFn);
  invoker.invokeSetup();
}
 
Example 8
Source File: SplittableParDoNaiveBounded.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  this.invoker = DoFnInvokers.invokerFor(fn);
  invoker.invokeSetup();
}
 
Example 9
Source File: SplittableParDoViaKeyedWorkItems.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() throws Exception {
  invoker = DoFnInvokers.invokerFor(fn);
  invoker.invokeSetup();
}
 
Example 10
Source File: SimpleDoFnRunner.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Constructor. */
public SimpleDoFnRunner(
    PipelineOptions options,
    DoFn<InputT, OutputT> fn,
    SideInputReader sideInputReader,
    OutputManager outputManager,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    StepContext stepContext,
    @Nullable Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.options = options;
  this.fn = fn;
  this.signature = DoFnSignatures.getSignature(fn.getClass());
  this.observesWindow = signature.processElement().observesWindow() || !sideInputReader.isEmpty();
  this.invoker = DoFnInvokers.invokerFor(fn);
  this.sideInputReader = sideInputReader;
  this.schemaCoder =
      (inputCoder instanceof SchemaCoder) ? (SchemaCoder<InputT>) inputCoder : null;
  this.outputCoders = outputCoders;
  if (outputCoders != null && !outputCoders.isEmpty()) {
    Coder<OutputT> outputCoder = (Coder<OutputT>) outputCoders.get(mainOutputTag);
    mainOutputSchemaCoder =
        (outputCoder instanceof SchemaCoder) ? (SchemaCoder<OutputT>) outputCoder : null;
  } else {
    mainOutputSchemaCoder = null;
  }
  this.outputManager = outputManager;
  this.mainOutputTag = mainOutputTag;
  this.outputTags =
      Sets.newHashSet(FluentIterable.<TupleTag<?>>of(mainOutputTag).append(additionalOutputTags));
  this.stepContext = stepContext;

  // This is a cast of an _invariant_ coder. But we are assured by pipeline validation
  // that it really is the coder for whatever BoundedWindow subclass is provided
  @SuppressWarnings("unchecked")
  Coder<BoundedWindow> untypedCoder =
      (Coder<BoundedWindow>) windowingStrategy.getWindowFn().windowCoder();
  this.windowCoder = untypedCoder;
  this.allowedLateness = windowingStrategy.getAllowedLateness();
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example 11
Source File: DoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open() throws Exception {
  // WindowDoFnOperator need use state and timer to get DoFn.
  // So must wait StateInternals and TimerInternals ready.
  // This will be called after initializeState()
  this.doFn = getDoFn();
  doFnInvoker = DoFnInvokers.invokerFor(doFn);
  doFnInvoker.invokeSetup();

  FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
  StepContext stepContext = new FlinkStepContext();
  doFnRunner =
      DoFnRunners.simpleRunner(
          options,
          doFn,
          sideInputReader,
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          stepContext,
          getInputCoder(),
          outputCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  if (requiresStableInput) {
    // put this in front of the root FnRunner before any additional wrappers
    doFnRunner =
        bufferingDoFnRunner =
            BufferingDoFnRunner.create(
                doFnRunner,
                "stable-input-buffer",
                windowedInputCoder,
                windowingStrategy.getWindowFn().windowCoder(),
                getOperatorStateBackend(),
                getKeyedStateBackend(),
                options.getNumConcurrentCheckpoints());
  }
  doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
  earlyBindStateIfNeeded();

  if (!options.getDisableMetrics()) {
    flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
    doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
    String checkpointMetricNamespace = options.getReportCheckpointDuration();
    if (checkpointMetricNamespace != null) {
      MetricName checkpointMetric =
          MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
      checkpointStats =
          new CheckpointStats(
              () ->
                  flinkMetricContainer
                      .getMetricsContainer(stepName)
                      .getDistribution(checkpointMetric));
    }
  }

  elementCount = 0L;
  lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();

  // Schedule timer to check timeout of finish bundle.
  long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
  checkFinishBundleTimer =
      getProcessingTimeService()
          .scheduleAtFixedRate(
              timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);

  if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
    pushbackDoFnRunner =
        new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
  } else {
    pushbackDoFnRunner =
        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
  }
}
 
Example 12
Source File: DoFnOp.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open(
    Config config,
    Context context,
    Scheduler<KeyedTimerData<Void>> timerRegistry,
    OpEmitter<OutT> emitter) {
  this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
  this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
  this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
  this.currentBundleElementCount = new AtomicLong(0L);
  this.bundleStartTime = new AtomicLong(Long.MAX_VALUE);
  this.isBundleStarted = new AtomicBoolean(false);
  this.bundleWatermarkHold = null;

  final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
  final SamzaExecutionContext samzaExecutionContext =
      (SamzaExecutionContext) context.getApplicationContainerContext();
  this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
  this.maxBundleSize = samzaPipelineOptions.getMaxBundleSize();
  this.maxBundleTimeMs = samzaPipelineOptions.getMaxBundleTimeMs();
  this.bundleTimerScheduler = timerRegistry;

  if (this.maxBundleSize > 1) {
    scheduleNextBundleCheck();
  }

  final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory =
      SamzaStoreStateInternals.createStateInternalFactory(
          transformId, null, context.getTaskContext(), samzaPipelineOptions, signature);

  this.timerInternalsFactory =
      SamzaTimerInternalsFactory.createTimerInternalFactory(
          keyCoder,
          (Scheduler) timerRegistry,
          getTimerStateId(signature),
          nonKeyedStateInternalsFactory,
          windowingStrategy,
          isBounded,
          samzaPipelineOptions);

  this.sideInputHandler =
      new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));

  if (isPortable) {
    // storing events within a bundle in states
    final BagState<WindowedValue<InT>> bundledEventsBagState =
        nonKeyedStateInternalsFactory
            .stateInternalsForKey(null)
            .state(StateNamespaces.global(), StateTags.bag(bundleStateId, windowedValueCoder));
    final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
    stageBundleFactory = samzaExecutionContext.getJobBundleFactory().forStage(executableStage);
    this.fnRunner =
        SamzaDoFnRunners.createPortable(
            samzaPipelineOptions,
            bundledEventsBagState,
            outputManagerFactory.create(emitter),
            stageBundleFactory,
            mainOutputTag,
            idToTupleTagMap,
            context,
            transformFullName);
  } else {
    this.fnRunner =
        SamzaDoFnRunners.create(
            samzaPipelineOptions,
            doFn,
            windowingStrategy,
            transformFullName,
            transformId,
            context,
            mainOutputTag,
            sideInputHandler,
            timerInternalsFactory,
            keyCoder,
            outputManagerFactory.create(emitter),
            inputCoder,
            sideOutputTags,
            outputCoders,
            doFnSchemaInformation,
            sideInputMapping);
  }

  this.pushbackFnRunner =
      SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
  this.pushbackValues = new ArrayList<>();

  final Iterator<SamzaDoFnInvokerRegistrar> invokerReg =
      ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
  if (!invokerReg.hasNext()) {
    // use the default invoker here
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
  } else {
    doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerFor(doFn, context);
  }

  doFnInvoker.invokeSetup();
}
 
Example 13
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateMultiHelper(
    ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));
  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  translateOutputs(context.getOutputs(transform), stepContext);
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}
 
Example 14
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateSingleHelper(
    ParDoSingle<InputT, OutputT> transform, TranslationContext context) {

  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));

  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  stepContext.addOutput(
      transform.getMainOutputTag().getId(), context.getOutput(transform));
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}