Java Code Examples for org.apache.beam.model.pipeline.v1.RunnerApi#ExecutableStagePayload

The following examples show how to use org.apache.beam.model.pipeline.v1.RunnerApi#ExecutableStagePayload . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkExecutableStageFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
SparkExecutableStageFunction(
    RunnerApi.ExecutableStagePayload stagePayload,
    JobInfo jobInfo,
    Map<String, Integer> outputMap,
    SparkExecutableStageContextFactory contextFactory,
    Map<String, Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>>> sideInputs,
    MetricsContainerStepMapAccumulator metricsAccumulator,
    Coder windowCoder) {
  this.stagePayload = stagePayload;
  this.jobInfo = jobInfo;
  this.outputMap = outputMap;
  this.contextFactory = contextFactory;
  this.sideInputs = sideInputs;
  this.metricsAccumulator = metricsAccumulator;
  this.windowCoder = windowCoder;
}
 
Example 2
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Broadcast the side inputs of an executable stage. *This can be expensive.*
 *
 * @return Map from PCollection ID to Spark broadcast variable and coder to decode its contents.
 */
private static <SideInputT>
    ImmutableMap<String, Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>>>
        broadcastSideInputs(
            RunnerApi.ExecutableStagePayload stagePayload, SparkTranslationContext context) {
  Map<String, Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>>>
      broadcastVariables = new HashMap<>();
  for (SideInputId sideInputId : stagePayload.getSideInputsList()) {
    RunnerApi.Components stagePayloadComponents = stagePayload.getComponents();
    String collectionId =
        stagePayloadComponents
            .getTransformsOrThrow(sideInputId.getTransformId())
            .getInputsOrThrow(sideInputId.getLocalName());
    if (broadcastVariables.containsKey(collectionId)) {
      // This PCollection has already been broadcast.
      continue;
    }
    Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 =
        broadcastSideInput(collectionId, stagePayloadComponents, context);
    broadcastVariables.put(collectionId, tuple2);
  }
  return ImmutableMap.copyOf(broadcastVariables);
}
 
Example 3
Source File: FlinkExecutableStageFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkExecutableStageFunction(
    String stepName,
    PipelineOptions pipelineOptions,
    RunnerApi.ExecutableStagePayload stagePayload,
    JobInfo jobInfo,
    Map<String, Integer> outputMap,
    FlinkExecutableStageContextFactory contextFactory,
    Coder windowCoder) {
  this.stepName = stepName;
  this.pipelineOptions = new SerializablePipelineOptions(pipelineOptions);
  this.stagePayload = stagePayload;
  this.jobInfo = jobInfo;
  this.outputMap = outputMap;
  this.contextFactory = contextFactory;
  this.windowCoder = windowCoder;
}
 
Example 4
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>
    getSideInputIdToPCollectionViewMap(
        RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) {

  RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);

  LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs =
      new LinkedHashMap<>();
  // for PCollectionView compatibility, not used to transform materialization
  ViewFn<Iterable<WindowedValue<?>>, ?> viewFn =
      (ViewFn)
          new PCollectionViews.MultimapViewFn<>(
              (PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>)
                  () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {}),
              (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids);

  for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId :
      stagePayload.getSideInputsList()) {

    // TODO: local name is unique as long as only one transform with side input can be within a
    // stage
    String sideInputTag = sideInputId.getLocalName();
    String collectionId =
        components
            .getTransformsOrThrow(sideInputId.getTransformId())
            .getInputsOrThrow(sideInputId.getLocalName());
    RunnerApi.WindowingStrategy windowingStrategyProto =
        components.getWindowingStrategiesOrThrow(
            components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());

    final WindowingStrategy<?, ?> windowingStrategy;
    try {
      windowingStrategy =
          WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (InvalidProtocolBufferException e) {
      throw new IllegalStateException(
          String.format(
              "Unable to hydrate side input windowing strategy %s.", windowingStrategyProto),
          e);
    }

    Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components);
    // side input materialization via GBK (T -> Iterable<T>)
    WindowedValueCoder wvCoder = (WindowedValueCoder) coder;
    coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder()));

    sideInputs.put(
        sideInputId,
        new RunnerPCollectionView<>(
            null,
            new TupleTag<>(sideInputTag),
            viewFn,
            // TODO: support custom mapping fn
            windowingStrategy.getWindowFn().getDefaultWindowMappingFn(),
            windowingStrategy,
            coder));
  }
  return sideInputs;
}
 
Example 5
Source File: ExecutableStageDoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Constructor. */
public ExecutableStageDoFnOperator(
    String stepName,
    Coder<WindowedValue<InputT>> windowedInputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    OutputManagerFactory<OutputT> outputManagerFactory,
    Map<Integer, PCollectionView<?>> sideInputTagMapping,
    Collection<PCollectionView<?>> sideInputs,
    Map<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputIds,
    PipelineOptions options,
    RunnerApi.ExecutableStagePayload payload,
    JobInfo jobInfo,
    FlinkExecutableStageContextFactory contextFactory,
    Map<String, TupleTag<?>> outputMap,
    WindowingStrategy windowingStrategy,
    Coder keyCoder,
    KeySelector<WindowedValue<InputT>, ?> keySelector) {
  super(
      new NoOpDoFn(),
      stepName,
      windowedInputCoder,
      outputCoders,
      mainOutputTag,
      additionalOutputTags,
      outputManagerFactory,
      windowingStrategy,
      sideInputTagMapping,
      sideInputs,
      options,
      keyCoder,
      keySelector,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
  this.isStateful = payload.getUserStatesCount() > 0 || payload.getTimersCount() > 0;
  this.payload = payload;
  this.jobInfo = jobInfo;
  this.contextFactory = contextFactory;
  this.outputMap = outputMap;
  this.sideInputIds = sideInputIds;
  this.stateBackendLock = new ReentrantLock();
}
 
Example 6
Source File: ExecutableStageDoFnOperator.java    From beam with Apache License 2.0 4 votes vote down vote up
private DoFnRunner<InputT, OutputT> ensureStateDoFnRunner(
    SdkHarnessDoFnRunner<InputT, OutputT> sdkHarnessRunner,
    RunnerApi.ExecutableStagePayload payload,
    StepContext stepContext) {

  if (!isStateful) {
    return sdkHarnessRunner;
  }
  // Takes care of state cleanup via StatefulDoFnRunner
  Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
  CleanupTimer<InputT> cleanupTimer =
      new CleanupTimer<>(
          timerInternals,
          stateBackendLock,
          windowingStrategy,
          keyCoder,
          windowCoder,
          getKeyedStateBackend());

  List<String> userStates =
      executableStage.getUserStates().stream()
          .map(UserStateReference::localName)
          .collect(Collectors.toList());

  KeyedStateBackend<ByteBuffer> stateBackend = getKeyedStateBackend();

  StateCleaner stateCleaner =
      new StateCleaner(
          userStates,
          windowCoder,
          stateBackend::getCurrentKey,
          timerInternals::hasPendingEventTimeTimers,
          cleanupTimer);

  return new StatefulDoFnRunner<InputT, OutputT, BoundedWindow>(
      sdkHarnessRunner,
      getInputCoder(),
      stepContext,
      windowingStrategy,
      cleanupTimer,
      stateCleaner,
      requiresTimeSortedInput(payload, true)) {

    @Override
    public void processElement(WindowedValue<InputT> input) {
      try (Locker locker = Locker.locked(stateBackendLock)) {
        @SuppressWarnings({"unchecked", "rawtypes"})
        final ByteBuffer key =
            FlinkKeyUtils.encodeKey(((KV) input.getValue()).getKey(), (Coder) keyCoder);
        getKeyedStateBackend().setCurrentKey(key);
        super.processElement(input);
      }
    }

    @Override
    public void finishBundle() {
      // Before cleaning up state, first finish bundle for all underlying DoFnRunners
      super.finishBundle();
      // execute cleanup after the bundle is complete
      if (!stateCleaner.cleanupQueue.isEmpty()) {
        try (Locker locker = Locker.locked(stateBackendLock)) {
          stateCleaner.cleanupState(keyedStateInternals, stateBackend::setCurrentKey);
        } catch (Exception e) {
          throw new RuntimeException("Failed to cleanup state.", e);
        }
      }
    }
  };
}
 
Example 7
Source File: DoFnOp.java    From beam with Apache License 2.0 4 votes vote down vote up
public DoFnOp(
    TupleTag<FnOutT> mainOutputTag,
    DoFn<InT, FnOutT> doFn,
    Coder<?> keyCoder,
    Coder<InT> inputCoder,
    Coder<WindowedValue<InT>> windowedValueCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Collection<PCollectionView<?>> sideInputs,
    List<TupleTag<?>> sideOutputTags,
    WindowingStrategy windowingStrategy,
    Map<String, PCollectionView<?>> idToViewMap,
    OutputManagerFactory<OutT> outputManagerFactory,
    String transformFullName,
    String transformId,
    PCollection.IsBounded isBounded,
    boolean isPortable,
    RunnerApi.ExecutableStagePayload stagePayload,
    Map<String, TupleTag<?>> idToTupleTagMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.mainOutputTag = mainOutputTag;
  this.doFn = doFn;
  this.sideInputs = sideInputs;
  this.sideOutputTags = sideOutputTags;
  this.inputCoder = inputCoder;
  this.windowedValueCoder = windowedValueCoder;
  this.outputCoders = new HashMap<>(outputCoders);
  this.windowingStrategy = windowingStrategy;
  this.idToViewMap = new HashMap<>(idToViewMap);
  this.outputManagerFactory = outputManagerFactory;
  this.transformFullName = transformFullName;
  this.transformId = transformId;
  this.keyCoder = keyCoder;
  this.isBounded = isBounded;
  this.isPortable = isPortable;
  this.stagePayload = stagePayload;
  this.idToTupleTagMap = new HashMap<>(idToTupleTagMap);
  this.bundleCheckTimerId = "_samza_bundle_check_" + transformId;
  this.bundleStateId = "_samza_bundle_" + transformId;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example 8
Source File: ParDoBoundMultiTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <InT, OutT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  Map<String, String> outputs = transform.getTransform().getOutputsMap();

  final RunnerApi.ExecutableStagePayload stagePayload;
  try {
    stagePayload =
        RunnerApi.ExecutableStagePayload.parseFrom(
            transform.getTransform().getSpec().getPayload());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  String inputId = stagePayload.getInput();
  final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId);
  // TODO: support side input
  final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList();

  final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
  final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>();

  // first output as the main output
  final TupleTag<OutT> mainOutputTag =
      outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());

  AtomicInteger index = new AtomicInteger(0);
  outputs
      .keySet()
      .iterator()
      .forEachRemaining(
          outputName -> {
            TupleTag<?> tupleTag = new TupleTag<>(outputName);
            tagToIndexMap.put(tupleTag, index.get());
            index.incrementAndGet();
            String collectionId = outputs.get(outputName);
            idToTupleTagMap.put(collectionId, tupleTag);
          });

  WindowedValue.WindowedValueCoder<InT> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());

  final DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform());

  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(transform.getTransform());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final DoFnOp<InT, OutT, RawUnionValue> op =
      new DoFnOp<>(
          mainOutputTag,
          new NoOpDoFn<>(),
          null, // key coder not in use
          windowedInputCoder.getValueCoder(), // input coder not in use
          windowedInputCoder,
          Collections.emptyMap(), // output coders not in use
          Collections.emptyList(), // sideInputs not in use until side input support
          new ArrayList<>(idToTupleTagMap.values()), // used by java runner only
          SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline),
          Collections.emptyMap(), // idToViewMap not in use until side input support
          new DoFnOp.MultiOutputManagerFactory(tagToIndexMap),
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          isBounded,
          true,
          stagePayload,
          idToTupleTagMap,
          doFnSchemaInformation,
          sideInputMapping);

  final MessageStream<OpMessage<InT>> mergedStreams;
  if (sideInputStreams.isEmpty()) {
    mergedStreams = inputStream;
  } else {
    MessageStream<OpMessage<InT>> mergedSideInputStreams =
        MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
    mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
  }

  final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream =
      mergedStreams.flatMap(OpAdapter.adapt(op));

  for (int outputIndex : tagToIndexMap.values()) {
    final MessageStream<OpMessage<OutT>> outputStream =
        taggedOutputStream
            .filter(
                message ->
                    message.getType() != OpMessage.Type.ELEMENT
                        || message.getElement().getValue().getUnionTag() == outputIndex)
            .flatMap(OpAdapter.adapt(new RawUnionValueToValue()));

    ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
  }
}