org.apache.beam.sdk.transforms.reflect.DoFnInvokers Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.reflect.DoFnInvokers. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void prepare(TSetContext context) {
  initTransient();
  sideInputReader = new Twister2SideInputReader(sideInputs, context);
  outputManager.setup(mainOutput, sideOutputs);
  doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn);

  doFnRunner =
      DoFnRunners.simpleRunner(
          pipelineOptions,
          doFn,
          sideInputReader,
          outputManager,
          mainOutput,
          sideOutputs,
          stepcontext,
          inputCoder,
          outputCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);
}
 
Example #2
Source File: DoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void compute(
    Iterator<WindowedValue<InputT>> input, RecordCollector<RawUnionValue> output) {
  try {
    outputManager.clear();
    doFnRunner.startBundle();
    while (input.hasNext()) {
      doFnRunner.processElement(input.next());
    }

    doFnRunner.finishBundle();
    Iterator<RawUnionValue> outputs = outputManager.getOutputs();
    while (outputs.hasNext()) {

      output.collect(outputs.next());
    }
  } catch (final RuntimeException re) {
    DoFnInvokers.invokerFor(doFn).invokeTeardown();
    throw re;
  }
}
 
Example #3
Source File: DoFnFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>> call(Iterator<WindowedValue<InputT>> iter)
    throws Exception {
  if (!wasSetupCalled && iter.hasNext()) {
    DoFnInvokers.tryInvokeSetupFor(doFn);
    wasSetupCalled = true;
  }

  DoFnOutputManager outputManager = new DoFnOutputManager();

  DoFnRunner<InputT, OutputT> doFnRunner =
      DoFnRunners.simpleRunner(
          serializableOptions.get(),
          doFn,
          CachedSideInputReader.of(new SparkSideInputReader(sideInputs, broadcastStateData)),
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          new NoOpStepContext(),
          inputCoder,
          outputCoderMap,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  DoFnRunnerWithMetrics<InputT, OutputT> doFnRunnerWithMetrics =
      new DoFnRunnerWithMetrics<>(stepName, doFnRunner, metricsAccum);

  return new ProcessContext<>(
          doFn, doFnRunnerWithMetrics, outputManager, Collections.emptyIterator())
      .processPartition(iter)
      .iterator();
}
 
Example #4
Source File: DoFnLifecycleManager.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void onRemoval(RemovalNotification<Thread, DoFn<?, ?>> notification) {
  try {
    DoFnInvokers.invokerFor(notification.getValue()).invokeTeardown();
  } catch (Exception e) {
    thrownOnTeardown.put(notification.getKey(), e);
  }
}
 
Example #5
Source File: DoFnLifecycleManager.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public DoFn<?, ?> load(Thread key) throws Exception {
  DoFn<?, ?> fn =
      (DoFn<?, ?>)
          SerializableUtils.deserializeFromByteArray(
              original, "DoFn Copy in thread " + key.getName());
  DoFnInvokers.tryInvokeSetupFor(fn);
  return fn;
}
 
Example #6
Source File: FlinkStatefulDoFnFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration parameters) {
  // Note that the SerializablePipelineOptions already initialize FileSystems in the readObject()
  // deserialization method. However, this is a hack, and we want to properly initialize the
  // options where they are needed.
  FileSystems.setDefaultPipelineOptions(serializedOptions.get());
  metricContainer = new FlinkMetricContainer(getRuntimeContext());
  doFnInvoker = DoFnInvokers.tryInvokeSetupFor(dofn);
}
 
Example #7
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@Nonnull Outbox outbox, @Nonnull Context context) {
  this.outbox = outbox;
  this.metricsContainer = new JetMetricsContainer(stepId, ownerId, context);

  doFnInvoker = DoFnInvokers.invokerFor(doFn);
  doFnInvoker.invokeSetup();

  if (ordinalToSideInput.isEmpty()) {
    sideInputReader = NullSideInputReader.of(Collections.emptyList());
  } else {
    bufferedItems = new SimpleInbox();
    sideInputHandler =
        new SideInputHandler(ordinalToSideInput.values(), InMemoryStateInternals.forKey(null));
    sideInputReader = sideInputHandler;
  }

  outputManager = new JetOutputManager(outbox, outputCoders, outputCollToOrdinals);

  doFnRunner =
      getDoFnRunner(
          pipelineOptions.get(),
          doFn,
          sideInputReader,
          outputManager,
          mainOutputTag,
          Lists.newArrayList(outputCollToOrdinals.keySet()),
          inputValueCoder,
          outputValueCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);
}
 
Example #8
Source File: DoTransform.java    From nemo with Apache License 2.0 5 votes vote down vote up
@Override
public void onData(final Iterator<I> elements, final String srcVertexId) {
  final StartBundleContext startBundleContext = new StartBundleContext(doFn, serializedOptions);
  final FinishBundleContext finishBundleContext = new FinishBundleContext(doFn, outputCollector, serializedOptions);
  final ProcessContext processContext = new ProcessContext(doFn, outputCollector, sideInputs, serializedOptions);
  final DoFnInvoker invoker = DoFnInvokers.invokerFor(doFn);
  invoker.invokeSetup();
  invoker.invokeStartBundle(startBundleContext);
  elements.forEachRemaining(element -> { // No need to check for input index, since it is always 0 for DoTransform
    processContext.setElement(element);
    invoker.invokeProcessElement(processContext);
  });
  invoker.invokeFinishBundle(finishBundleContext);
  invoker.invokeTeardown();
}
 
Example #9
Source File: DoFnTester.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void initializeState() throws Exception {
  checkState(state == State.UNINITIALIZED, "Already initialized");
  checkState(fn == null, "Uninitialized but fn != null");
  if (cloningBehavior.equals(CloningBehavior.DO_NOT_CLONE)) {
    fn = origFn;
  } else {
    fn =
        (DoFn<InputT, OutputT>)
            SerializableUtils.deserializeFromByteArray(
                SerializableUtils.serializeToByteArray(origFn), origFn.toString());
  }
  fnInvoker = DoFnInvokers.invokerFor(fn);
  fnInvoker.invokeSetup();
}
 
Example #10
Source File: SplittableParDo.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<InputT> input) {
  Coder<RestrictionT> restrictionCoder =
      DoFnInvokers.invokerFor(doFn)
          .invokeGetRestrictionCoder(input.getPipeline().getCoderRegistry());
  Coder<WatermarkEstimatorStateT> watermarkEstimatorStateCoder =
      DoFnInvokers.invokerFor(doFn)
          .invokeGetWatermarkEstimatorStateCoder(input.getPipeline().getCoderRegistry());
  Coder<KV<InputT, RestrictionT>> splitCoder = KvCoder.of(input.getCoder(), restrictionCoder);

  PCollection<KV<byte[], KV<InputT, RestrictionT>>> keyedRestrictions =
      input
          .apply(
              "Pair with initial restriction",
              ParDo.of(new PairWithRestrictionFn<InputT, OutputT, RestrictionT>(doFn)))
          .setCoder(splitCoder)
          .apply("Split restriction", ParDo.of(new SplitRestrictionFn<>(doFn)))
          .setCoder(splitCoder)
          // ProcessFn requires all input elements to be in a single window and have a single
          // element per work item. This must precede the unique keying so each key has a single
          // associated element.
          .apply("Explode windows", ParDo.of(new ExplodeWindowsFn<>()))
          .apply("Assign unique key", WithKeys.of(new RandomUniqueKeyFn<>()));

  return keyedRestrictions.apply(
      "ProcessKeyedElements",
      new ProcessKeyedElements<>(
          doFn,
          input.getCoder(),
          restrictionCoder,
          watermarkEstimatorStateCoder,
          (WindowingStrategy<InputT, ?>) input.getWindowingStrategy(),
          sideInputs,
          mainOutputTag,
          additionalOutputTags,
          outputTagsToCoders));
}
 
Example #11
Source File: ReadTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testInstantiationOfBoundedSourceAsSDFWrapper() {
  DoFn dofn = new Read.BoundedSourceAsSDFWrapperFn<>();
  DoFnInvokers.invokerFor(dofn);
}
 
Example #12
Source File: DoFnInstanceManagers.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void abort(DoFnInfo<?, ?> fnInfo) throws Exception {
  if (fnInfo != null && fnInfo.getDoFn() != null) {
    DoFnInvokers.invokerFor(fnInfo.getDoFn()).invokeTeardown();
  }
}
 
Example #13
Source File: DoFnInstanceManagers.java    From beam with Apache License 2.0 4 votes vote down vote up
private DoFnInfo<?, ?> deserializeCopy() throws Exception {
  DoFnInfo<?, ?> fn;
  fn = (DoFnInfo<?, ?>) SerializableUtils.deserializeFromByteArray(serializedFnInfo, null);
  DoFnInvokers.invokerFor(fn.getDoFn()).invokeSetup();
  return fn;
}
 
Example #14
Source File: BatchStatefulParDoOverrides.java    From beam with Apache License 2.0 4 votes vote down vote up
@Teardown
public void teardown() {
  DoFnInvokers.invokerFor(underlyingDoFn).invokeTeardown();
}
 
Example #15
Source File: BatchStatefulParDoOverrides.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  DoFnInvokers.invokerFor(underlyingDoFn).invokeSetup();
}
 
Example #16
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateSingleHelper(
    ParDoSingle<InputT, OutputT> transform, TranslationContext context) {

  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));

  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  stepContext.addOutput(
      transform.getMainOutputTag().getId(), context.getOutput(transform));
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}
 
Example #17
Source File: DataflowPipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <InputT, OutputT> void translateMultiHelper(
    ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
  StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
  DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation =
      ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
  Map<TupleTag<?>, Coder<?>> outputCoders =
      context.getOutputs(transform).entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder()));
  translateInputs(
      stepContext,
      context.getInput(transform),
      transform.getSideInputs().values(),
      context);
  translateOutputs(context.getOutputs(transform), stepContext);
  String ptransformId =
      context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
  translateFn(
      stepContext,
      ptransformId,
      transform.getFn(),
      context.getInput(transform).getWindowingStrategy(),
      transform.getSideInputs().values(),
      context.getInput(transform).getCoder(),
      context,
      transform.getMainOutputTag(),
      outputCoders,
      doFnSchemaInformation,
      sideInputMapping);

  // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is
  // removed.
  if (context.isFnApi()) {
    DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn());
    if (signature.processElement().isSplittable()) {
      DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn());
      Coder<?> restrictionAndWatermarkStateCoder =
          KvCoder.of(
              doFnInvoker.invokeGetRestrictionCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()),
              doFnInvoker.invokeGetWatermarkEstimatorStateCoder(
                  context.getInput(transform).getPipeline().getCoderRegistry()));
      stepContext.addInput(
          PropertyNames.RESTRICTION_ENCODING,
          translateCoder(restrictionAndWatermarkStateCoder, context));
    }
  }
}
 
Example #18
Source File: DoFnOp.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open(
    Config config,
    Context context,
    Scheduler<KeyedTimerData<Void>> timerRegistry,
    OpEmitter<OutT> emitter) {
  this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
  this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
  this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
  this.currentBundleElementCount = new AtomicLong(0L);
  this.bundleStartTime = new AtomicLong(Long.MAX_VALUE);
  this.isBundleStarted = new AtomicBoolean(false);
  this.bundleWatermarkHold = null;

  final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
  final SamzaExecutionContext samzaExecutionContext =
      (SamzaExecutionContext) context.getApplicationContainerContext();
  this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
  this.maxBundleSize = samzaPipelineOptions.getMaxBundleSize();
  this.maxBundleTimeMs = samzaPipelineOptions.getMaxBundleTimeMs();
  this.bundleTimerScheduler = timerRegistry;

  if (this.maxBundleSize > 1) {
    scheduleNextBundleCheck();
  }

  final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory =
      SamzaStoreStateInternals.createStateInternalFactory(
          transformId, null, context.getTaskContext(), samzaPipelineOptions, signature);

  this.timerInternalsFactory =
      SamzaTimerInternalsFactory.createTimerInternalFactory(
          keyCoder,
          (Scheduler) timerRegistry,
          getTimerStateId(signature),
          nonKeyedStateInternalsFactory,
          windowingStrategy,
          isBounded,
          samzaPipelineOptions);

  this.sideInputHandler =
      new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));

  if (isPortable) {
    // storing events within a bundle in states
    final BagState<WindowedValue<InT>> bundledEventsBagState =
        nonKeyedStateInternalsFactory
            .stateInternalsForKey(null)
            .state(StateNamespaces.global(), StateTags.bag(bundleStateId, windowedValueCoder));
    final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
    stageBundleFactory = samzaExecutionContext.getJobBundleFactory().forStage(executableStage);
    this.fnRunner =
        SamzaDoFnRunners.createPortable(
            samzaPipelineOptions,
            bundledEventsBagState,
            outputManagerFactory.create(emitter),
            stageBundleFactory,
            mainOutputTag,
            idToTupleTagMap,
            context,
            transformFullName);
  } else {
    this.fnRunner =
        SamzaDoFnRunners.create(
            samzaPipelineOptions,
            doFn,
            windowingStrategy,
            transformFullName,
            transformId,
            context,
            mainOutputTag,
            sideInputHandler,
            timerInternalsFactory,
            keyCoder,
            outputManagerFactory.create(emitter),
            inputCoder,
            sideOutputTags,
            outputCoders,
            doFnSchemaInformation,
            sideInputMapping);
  }

  this.pushbackFnRunner =
      SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
  this.pushbackValues = new ArrayList<>();

  final Iterator<SamzaDoFnInvokerRegistrar> invokerReg =
      ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
  if (!invokerReg.hasNext()) {
    // use the default invoker here
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
  } else {
    doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerFor(doFn, context);
  }

  doFnInvoker.invokeSetup();
}
 
Example #19
Source File: FlinkDoFnFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open(Configuration parameters) {
  // Note that the SerializablePipelineOptions already initialize FileSystems in the readObject()
  // deserialization method. However, this is a hack, and we want to properly initialize the
  // options where they are needed.
  FileSystems.setDefaultPipelineOptions(serializedOptions.get());
  doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn);
  metricContainer = new FlinkMetricContainer(getRuntimeContext());

  // setup DoFnRunner
  final RuntimeContext runtimeContext = getRuntimeContext();
  final DoFnRunners.OutputManager outputManager;
  if (outputMap.size() == 1) {
    outputManager = new DoFnOutputManager();
  } else {
    // it has some additional outputs
    outputManager = new MultiDoFnOutputManager(outputMap);
  }

  final List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());

  DoFnRunner<InputT, OutputT> doFnRunner =
      DoFnRunners.simpleRunner(
          serializedOptions.get(),
          doFn,
          new FlinkSideInputReader(sideInputs, runtimeContext),
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          new FlinkNoOpStepContext(),
          inputCoder,
          outputCoderMap,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  if (!serializedOptions.get().as(FlinkPipelineOptions.class).getDisableMetrics()) {
    doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
  }

  this.collectorAware = (CollectorAware) outputManager;
  this.doFnRunner = doFnRunner;
}
 
Example #20
Source File: SplittableParDo.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  invoker = DoFnInvokers.invokerFor(fn);
  invoker.invokeSetup();
}
 
Example #21
Source File: DoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void open() throws Exception {
  // WindowDoFnOperator need use state and timer to get DoFn.
  // So must wait StateInternals and TimerInternals ready.
  // This will be called after initializeState()
  this.doFn = getDoFn();
  doFnInvoker = DoFnInvokers.invokerFor(doFn);
  doFnInvoker.invokeSetup();

  FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
  StepContext stepContext = new FlinkStepContext();
  doFnRunner =
      DoFnRunners.simpleRunner(
          options,
          doFn,
          sideInputReader,
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          stepContext,
          getInputCoder(),
          outputCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  if (requiresStableInput) {
    // put this in front of the root FnRunner before any additional wrappers
    doFnRunner =
        bufferingDoFnRunner =
            BufferingDoFnRunner.create(
                doFnRunner,
                "stable-input-buffer",
                windowedInputCoder,
                windowingStrategy.getWindowFn().windowCoder(),
                getOperatorStateBackend(),
                getKeyedStateBackend(),
                options.getNumConcurrentCheckpoints());
  }
  doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
  earlyBindStateIfNeeded();

  if (!options.getDisableMetrics()) {
    flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
    doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
    String checkpointMetricNamespace = options.getReportCheckpointDuration();
    if (checkpointMetricNamespace != null) {
      MetricName checkpointMetric =
          MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
      checkpointStats =
          new CheckpointStats(
              () ->
                  flinkMetricContainer
                      .getMetricsContainer(stepName)
                      .getDistribution(checkpointMetric));
    }
  }

  elementCount = 0L;
  lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();

  // Schedule timer to check timeout of finish bundle.
  long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
  checkFinishBundleTimer =
      getProcessingTimeService()
          .scheduleAtFixedRate(
              timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);

  if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
    pushbackDoFnRunner =
        new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
  } else {
    pushbackDoFnRunner =
        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
  }
}
 
Example #22
Source File: SparkProcessContext.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
protected OutputT computeNext() {
  try {
    // Process each element from the (input) iterator, which produces, zero, one or more
    // output elements (of type V) in the output iterator. Note that the output
    // collection (and iterator) is reset between each call to processElement, so the
    // collection only holds the output values for each call to processElement, rather
    // than for the whole partition (which would use too much memory).
    if (!isBundleStarted) {
      isBundleStarted = true;
      // call startBundle() before beginning to process the partition.
      doFnRunner.startBundle();
    }

    while (true) {
      if (outputIterator.hasNext()) {
        return outputIterator.next();
      }

      clearOutput();
      if (inputIterator.hasNext()) {
        // grab the next element and process it.
        doFnRunner.processElement(inputIterator.next());
        outputIterator = getOutputIterator();
      } else if (timerDataIterator.hasNext()) {
        fireTimer(timerDataIterator.next());
        outputIterator = getOutputIterator();
      } else {
        // no more input to consume, but finishBundle can produce more output
        if (!isBundleFinished) {
          isBundleFinished = true;
          doFnRunner.finishBundle();
          outputIterator = getOutputIterator();
          continue; // try to consume outputIterator from start of loop
        }
        DoFnInvokers.invokerFor(doFn).invokeTeardown();
        return endOfData();
      }
    }
  } catch (final RuntimeException re) {
    DoFnInvokers.invokerFor(doFn).invokeTeardown();
    throw re;
  }
}
 
Example #23
Source File: MultiDoFnFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>> call(Iterator<WindowedValue<InputT>> iter)
    throws Exception {
  if (!wasSetupCalled && iter.hasNext()) {
    DoFnInvokers.tryInvokeSetupFor(doFn);
    wasSetupCalled = true;
  }

  DoFnOutputManager outputManager = new DoFnOutputManager();

  final InMemoryTimerInternals timerInternals;
  final StepContext context;
  // Now only implements the StatefulParDo in Batch mode.
  if (stateful) {
    Object key = null;
    if (iter.hasNext()) {
      WindowedValue<InputT> currentValue = iter.next();
      key = ((KV) currentValue.getValue()).getKey();
      iter = Iterators.concat(Iterators.singletonIterator(currentValue), iter);
    }
    final InMemoryStateInternals<?> stateInternals = InMemoryStateInternals.forKey(key);
    timerInternals = new InMemoryTimerInternals();
    context =
        new StepContext() {
          @Override
          public StateInternals stateInternals() {
            return stateInternals;
          }

          @Override
          public TimerInternals timerInternals() {
            return timerInternals;
          }
        };
  } else {
    timerInternals = null;
    context = new SparkProcessContext.NoOpStepContext();
  }

  final DoFnRunner<InputT, OutputT> doFnRunner =
      DoFnRunners.simpleRunner(
          options.get(),
          doFn,
          CachedSideInputReader.of(new SparkSideInputReader(sideInputs)),
          outputManager,
          mainOutputTag,
          additionalOutputTags,
          context,
          inputCoder,
          outputCoders,
          windowingStrategy,
          doFnSchemaInformation,
          sideInputMapping);

  DoFnRunnerWithMetrics<InputT, OutputT> doFnRunnerWithMetrics =
      new DoFnRunnerWithMetrics<>(stepName, doFnRunner, metricsAccum);

  return new SparkProcessContext<>(
          doFn,
          doFnRunnerWithMetrics,
          outputManager,
          stateful ? new TimerDataIterator(timerInternals) : Collections.emptyIterator())
      .processPartition(iter)
      .iterator();
}
 
Example #24
Source File: AbstractDoFnTransform.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public final void prepare(final Context context, final OutputCollector<WindowedValue<OutputT>> oc) {
  // deserialize pipeline option
  final NemoPipelineOptions options = serializedOptions.get().as(NemoPipelineOptions.class);
  this.outputCollector = wrapOutputCollector(oc);

  this.bundleMillis = options.getMaxBundleTimeMills();
  this.bundleSize = options.getMaxBundleSize();

  // create output manager
  outputManager = new DefaultOutputManager<>(outputCollector, mainOutputTag);

  // create side input reader
  sideInputReader = new InMemorySideInputReader(new ArrayList<>(sideInputs.values()));

  // this transform does not support state and timer.
  final StepContext stepContext = new StepContext() {
    @Override
    public StateInternals stateInternals() {
      throw new UnsupportedOperationException("Not support stateInternals in DoFnTransform");
    }

    @Override
    public TimerInternals timerInternals() {
      throw new UnsupportedOperationException("Not support timerInternals in DoFnTransform");
    }
  };

  final DoFn wrappedDoFn = wrapDoFn(doFn);

  // invoker
  doFnInvoker = DoFnInvokers.invokerFor(wrappedDoFn);
  doFnInvoker.invokeSetup();

  // DoFnRunners.simpleRunner takes care of all the hard stuff of running the DoFn
  // and that this approach is the standard used by most of the Beam runners
  doFnRunner = DoFnRunners.simpleRunner(
    options,
    wrappedDoFn,
    sideInputReader,
    outputManager,
    mainOutputTag,
    additionalOutputTags,
    stepContext,
    inputCoder,
    outputCoders,
    windowingStrategy,
    doFnSchemaInformation,
    sideInputMapping);

  pushBackRunner = sideInputs.isEmpty()
    ? null
    : SimplePushbackSideInputDoFnRunner.<InterT, OutputT>create(doFnRunner, sideInputs.values(), sideInputReader);
}
 
Example #25
Source File: ProcessContext.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
protected OutputT computeNext() {
  try {
    // Process each element from the (input) iterator, which produces, zero, one or more
    // output elements (of type V) in the output iterator. Note that the output
    // collection (and iterator) is reset between each call to processElement, so the
    // collection only holds the output values for each call to processElement, rather
    // than for the whole partition (which would use too much memory).
    if (!isBundleStarted) {
      isBundleStarted = true;
      // call startBundle() before beginning to process the partition.
      doFnRunner.startBundle();
    }

    while (true) {
      if (outputIterator.hasNext()) {
        return outputIterator.next();
      }

      clearOutput();
      if (inputIterator.hasNext()) {
        // grab the next element and process it.
        doFnRunner.processElement(inputIterator.next());
        outputIterator = getOutputIterator();
      } else if (timerDataIterator.hasNext()) {
        outputIterator = getOutputIterator();
      } else {
        // no more input to consume, but finishBundle can produce more output
        if (!isBundleFinished) {
          isBundleFinished = true;
          doFnRunner.finishBundle();
          outputIterator = getOutputIterator();
          continue; // try to consume outputIterator from start of loop
        }
        DoFnInvokers.invokerFor(doFn).invokeTeardown();
        return endOfData();
      }
    }
  } catch (final RuntimeException re) {
    DoFnInvokers.invokerFor(doFn).invokeTeardown();
    throw re;
  }
}
 
Example #26
Source File: SimpleDoFnRunner.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Constructor. */
public SimpleDoFnRunner(
    PipelineOptions options,
    DoFn<InputT, OutputT> fn,
    SideInputReader sideInputReader,
    OutputManager outputManager,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    StepContext stepContext,
    @Nullable Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.options = options;
  this.fn = fn;
  this.signature = DoFnSignatures.getSignature(fn.getClass());
  this.observesWindow = signature.processElement().observesWindow() || !sideInputReader.isEmpty();
  this.invoker = DoFnInvokers.invokerFor(fn);
  this.sideInputReader = sideInputReader;
  this.schemaCoder =
      (inputCoder instanceof SchemaCoder) ? (SchemaCoder<InputT>) inputCoder : null;
  this.outputCoders = outputCoders;
  if (outputCoders != null && !outputCoders.isEmpty()) {
    Coder<OutputT> outputCoder = (Coder<OutputT>) outputCoders.get(mainOutputTag);
    mainOutputSchemaCoder =
        (outputCoder instanceof SchemaCoder) ? (SchemaCoder<OutputT>) outputCoder : null;
  } else {
    mainOutputSchemaCoder = null;
  }
  this.outputManager = outputManager;
  this.mainOutputTag = mainOutputTag;
  this.outputTags =
      Sets.newHashSet(FluentIterable.<TupleTag<?>>of(mainOutputTag).append(additionalOutputTags));
  this.stepContext = stepContext;

  // This is a cast of an _invariant_ coder. But we are assured by pipeline validation
  // that it really is the coder for whatever BoundedWindow subclass is provided
  @SuppressWarnings("unchecked")
  Coder<BoundedWindow> untypedCoder =
      (Coder<BoundedWindow>) windowingStrategy.getWindowFn().windowCoder();
  this.windowCoder = untypedCoder;
  this.allowedLateness = windowingStrategy.getAllowedLateness();
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #27
Source File: SplittableParDoViaKeyedWorkItems.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() throws Exception {
  invoker = DoFnInvokers.invokerFor(fn);
  invoker.invokeSetup();
}
 
Example #28
Source File: SplittableParDoNaiveBounded.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  this.invoker = DoFnInvokers.invokerFor(fn);
  invoker.invokeSetup();
}
 
Example #29
Source File: SplittableParDo.java    From beam with Apache License 2.0 4 votes vote down vote up
@Setup
public void setup() {
  invoker = DoFnInvokers.invokerFor(splittableFn);
  invoker.invokeSetup();
}