org.apache.beam.runners.core.construction.SerializablePipelineOptions Java Examples

The following examples show how to use org.apache.beam.runners.core.construction.SerializablePipelineOptions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkDoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkDoFnFunction(
    DoFn<InputT, OutputT> doFn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions options,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.doFn = doFn;
  this.stepName = stepName;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #2
Source File: GroupByWindowFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }

  SdkComponents components = SdkComponents.create();
  options = new SerializablePipelineOptions(serializedOptions).get();

  try {
    options = null;
    windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(windowBytes);
    windowingStrategy =
        (WindowingStrategy<?, W>)
            WindowingStrategyTranslation.fromProto(
                windowStrategyProto.getWindowingStrategy(),
                RehydratedComponents.forComponents(components.toComponents()));
  } catch (InvalidProtocolBufferException e) {
    LOG.info(e.getMessage());
  }
  this.isInitialized = true;
}
 
Example #3
Source File: AssignWindowsFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }
  options = new SerializablePipelineOptions(serializedOptions).get();

  try {
    RunnerApi.FunctionSpec windowFnProto = RunnerApi.FunctionSpec.parseFrom(windowFnBytes);

    windowFn =
        (WindowFn<T, BoundedWindow>)
            WindowingStrategyTranslation.windowFnFromProto(windowFnProto);
  } catch (InvalidProtocolBufferException e) {
    LOG.info(e.getMessage());
  }
  this.isInitialized = true;
}
 
Example #4
Source File: GroupByWindowFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public GroupByWindowFunction(
    WindowingStrategy<?, W> windowingStrategy,
    SystemReduceFn<K, V, Iterable<V>, Iterable<V>, W> reduceFn,
    PipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));

  try {
    windowStrategyProto =
        WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
    windowBytes = windowStrategyProto.toByteArray();
  } catch (IOException e) {
    LOG.info(e.getMessage());
  }
  this.reduceFn = reduceFn;
}
 
Example #5
Source File: ConfigBuilder.java    From beam with Apache License 2.0 6 votes vote down vote up
public Config build() {
  try {
    // apply framework configs
    config.putAll(createSystemConfig(options));

    // apply user configs
    config.putAll(createUserConfig(options));

    config.put(ApplicationConfig.APP_NAME, options.getJobName());
    config.put(ApplicationConfig.APP_ID, options.getJobInstance());
    config.put(JOB_NAME, options.getJobName());
    config.put(JOB_ID, options.getJobInstance());

    config.put(
        "beamPipelineOptions",
        Base64Serializer.serializeUnchecked(new SerializablePipelineOptions(options)));

    validateConfigs(options, config);

    return new MapConfig(config);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #6
Source File: FlinkStatefulDoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkStatefulDoFnFunction(
    DoFn<KV<K, V>, OutputT> dofn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<KV<K, V>> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.dofn = dofn;
  this.stepName = stepName;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #7
Source File: DatasetSourceBatch.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private DatasetReader(DataSourceOptions options) {
  if (!options.get(BEAM_SOURCE_OPTION).isPresent()) {
    throw new RuntimeException("Beam source was not set in DataSource options");
  }
  this.source =
      Base64Serializer.deserializeUnchecked(
          options.get(BEAM_SOURCE_OPTION).get(), BoundedSource.class);

  if (!options.get(DEFAULT_PARALLELISM).isPresent()) {
    throw new RuntimeException("Spark default parallelism was not set in DataSource options");
  }
  this.numPartitions = Integer.parseInt(options.get(DEFAULT_PARALLELISM).get());
  checkArgument(numPartitions > 0, "Number of partitions must be greater than zero.");

  if (!options.get(PIPELINE_OPTIONS).isPresent()) {
    throw new RuntimeException("Beam pipelineOptions were not set in DataSource options");
  }
  this.serializablePipelineOptions =
      new SerializablePipelineOptions(options.get(PIPELINE_OPTIONS).get());
}
 
Example #8
Source File: DatasetSourceStreaming.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private DatasetMicroBatchReader(String checkpointLocation, DataSourceOptions options) {
  if (!options.get(BEAM_SOURCE_OPTION).isPresent()) {
    throw new RuntimeException("Beam source was not set in DataSource options");
  }
  this.source =
      Base64Serializer.deserializeUnchecked(
          options.get(BEAM_SOURCE_OPTION).get(), UnboundedSource.class);

  if (!options.get(DEFAULT_PARALLELISM).isPresent()) {
    throw new RuntimeException("Spark default parallelism was not set in DataSource options");
  }
  this.numPartitions = Integer.parseInt(options.get(DEFAULT_PARALLELISM).get());
  checkArgument(numPartitions > 0, "Number of partitions must be greater than zero.");

  if (!options.get(PIPELINE_OPTIONS).isPresent()) {
    throw new RuntimeException("Beam pipelineOptions were not set in DataSource options");
  }
  this.serializablePipelineOptions =
      new SerializablePipelineOptions(options.get(PIPELINE_OPTIONS).get());
}
 
Example #9
Source File: DatasetSourceStreaming.java    From beam with Apache License 2.0 6 votes vote down vote up
DatasetPartitionReader(
    UnboundedSource<T, CheckpointMarkT> source,
    SerializablePipelineOptions serializablePipelineOptions) {
  this.started = false;
  this.closed = false;
  this.source = source;
  // reader is not serializable so lazy initialize it
  try {
    reader =
        // In
        // https://blog.yuvalitzchakov.com/exploring-stateful-streaming-with-spark-structured-streaming/
        // "Structured Streaming stores and retrieves the offsets on our behalf when re-running
        // the application meaning we no longer have to store them externally."
        source.createReader(serializablePipelineOptions.get(), null);
  } catch (IOException e) {
    throw new RuntimeException("Error creating UnboundedReader ", e);
  }
}
 
Example #10
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 6 votes vote down vote up
UpdateStateByKeyFunction(
    final List<Integer> sourceIds,
    final WindowingStrategy<?, W> windowingStrategy,
    final FullWindowedValueCoder<InputT> wvCoder,
    final Coder<K> keyCoder,
    final SerializablePipelineOptions options,
    final String logPrefix) {
  this.wvCoder = wvCoder;
  this.keyCoder = keyCoder;
  this.sourceIds = sourceIds;
  this.timerDataCoder = timerDataCoderOf(windowingStrategy);
  this.windowingStrategy = windowingStrategy;
  this.options = options;
  this.itrWvCoder = IterableCoder.of(wvCoder);
  this.logPrefix = logPrefix;
  this.wvKvIterCoder =
      windowedValueKeyValueCoderOf(
          keyCoder,
          wvCoder.getValueCoder(),
          ((FullWindowedValueCoder<InputT>) wvCoder).getWindowCoder());
}
 
Example #11
Source File: AbstractDoFnTransform.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * AbstractDoFnTransform constructor.
 *
 * @param doFn                 doFn
 * @param inputCoder           input coder
 * @param outputCoders         output coders
 * @param mainOutputTag        main output tag
 * @param additionalOutputTags additional output tags
 * @param windowingStrategy    windowing strategy
 * @param sideInputs           side inputs
 * @param options              pipeline options
 * @param displayData          display data.
 */
public AbstractDoFnTransform(final DoFn<InterT, OutputT> doFn,
                             final Coder<InputT> inputCoder,
                             final Map<TupleTag<?>, Coder<?>> outputCoders,
                             final TupleTag<OutputT> mainOutputTag,
                             final List<TupleTag<?>> additionalOutputTags,
                             final WindowingStrategy<?, ?> windowingStrategy,
                             final Map<Integer, PCollectionView<?>> sideInputs,
                             final PipelineOptions options,
                             final DisplayData displayData,
                             final DoFnSchemaInformation doFnSchemaInformation,
                             final Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.displayData = displayData;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #12
Source File: FlinkExecutableStageFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkExecutableStageFunction(
    String stepName,
    PipelineOptions pipelineOptions,
    RunnerApi.ExecutableStagePayload stagePayload,
    JobInfo jobInfo,
    Map<String, Integer> outputMap,
    FlinkExecutableStageContextFactory contextFactory,
    Coder windowCoder) {
  this.stepName = stepName;
  this.pipelineOptions = new SerializablePipelineOptions(pipelineOptions);
  this.stagePayload = stagePayload;
  this.jobInfo = jobInfo;
  this.outputMap = outputMap;
  this.contextFactory = contextFactory;
  this.windowCoder = windowCoder;
}
 
Example #13
Source File: SparkCombineFn.java    From beam with Apache License 2.0 6 votes vote down vote up
SparkCombineFn(
    boolean global,
    Function<InputT, ValueT> toValue,
    CombineWithContext.CombineFnWithContext<ValueT, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy) {
  this(
      global,
      toValue,
      combineFn,
      options,
      sideInputs,
      windowingStrategy,
      WindowedAccumulator.Type.EXPLODE_WINDOWS);
}
 
Example #14
Source File: SparkCombineFn.java    From beam with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
SparkCombineFn(
    boolean global,
    Function<InputT, ValueT> toValue,
    CombineWithContext.CombineFnWithContext<ValueT, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy,
    WindowedAccumulator.Type defaultNonMergingCombineStrategy) {

  this.globalCombine = global;
  this.options = options;
  this.sideInputs = sideInputs;
  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> castStrategy = (WindowingStrategy) windowingStrategy;
  this.windowingStrategy = castStrategy;
  this.toValue = toValue;
  this.defaultNonMergingCombineStrategy = defaultNonMergingCombineStrategy;
  this.combineFn = combineFn;
  @SuppressWarnings("unchecked")
  TypeDescriptor<BoundedWindow> untyped =
      (TypeDescriptor<BoundedWindow>) windowingStrategy.getWindowFn().getWindowTypeDescriptor();
  this.windowComparator = asWindowComparator(untyped);
}
 
Example #15
Source File: SourceRDD.java    From beam with Apache License 2.0 6 votes vote down vote up
public Bounded(
    SparkContext sc,
    BoundedSource<T> source,
    SerializablePipelineOptions options,
    String stepName) {
  super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag());
  this.source = source;
  this.options = options;
  // the input parallelism is determined by Spark's scheduler backend.
  // when running on YARN/SparkDeploy it's the result of max(totalCores, 2).
  // when running on Mesos it's 8.
  // when running local it's the total number of cores (local = 1, local[N] = N,
  // local[*] = estimation of the machine's cores).
  // ** the configuration "spark.default.parallelism" takes precedence over all of the above **
  this.numPartitions = sc.defaultParallelism();
  checkArgument(this.numPartitions > 0, "Number of partitions must be greater than zero.");
  this.bundleSize = options.get().as(SparkPipelineOptions.class).getBundleSize();
  this.stepName = stepName;
  this.metricsAccum = MetricsAccumulator.getInstance();
}
 
Example #16
Source File: FlinkPartialReduceFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
public FlinkPartialReduceFunction(
    CombineFnBase.GlobalCombineFn<InputT, AccumT, ?> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    boolean groupedByWindow) {
  this.combineFn = combineFn;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.groupedByWindow = groupedByWindow;
}
 
Example #17
Source File: SparkCombineFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <InputT, AccumT, OutputT> SparkCombineFn<InputT, InputT, AccumT, OutputT> globally(
    CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy) {
  return new SparkCombineFn<>(true, e -> e, combineFn, options, sideInputs, windowingStrategy);
}
 
Example #18
Source File: SourceDStream.java    From beam with Apache License 2.0 5 votes vote down vote up
SourceDStream(
    StreamingContext ssc,
    UnboundedSource<T, CheckpointMarkT> unboundedSource,
    SerializablePipelineOptions options,
    Long boundMaxRecords) {
  super(ssc, JavaSparkContext$.MODULE$.fakeClassTag());
  this.unboundedSource = unboundedSource;
  this.options = options;

  SparkPipelineOptions sparkOptions = options.get().as(SparkPipelineOptions.class);

  // Reader cache expiration interval. 50% of batch interval is added to accommodate latency.
  this.readerCacheInterval = 1.5 * sparkOptions.getBatchIntervalMillis();

  this.boundReadDuration =
      boundReadDuration(
          sparkOptions.getReadTimePercentage(), sparkOptions.getMinReadTimeMillis());
  // set initial parallelism once.
  this.initialParallelism = ssc().sparkContext().defaultParallelism();
  checkArgument(this.initialParallelism > 0, "Number of partitions must be greater than zero.");

  this.boundMaxRecords = boundMaxRecords;

  try {
    this.numPartitions = createMicrobatchSource().split(sparkOptions).size();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #19
Source File: SourceRDD.java    From beam with Apache License 2.0 5 votes vote down vote up
public Unbounded(
    SparkContext sc,
    SerializablePipelineOptions options,
    MicrobatchSource<T, CheckpointMarkT> microbatchSource,
    int initialNumPartitions) {
  super(sc, NIL, JavaSparkContext$.MODULE$.fakeClassTag());
  this.options = options;
  this.microbatchSource = microbatchSource;
  this.partitioner = new HashPartitioner(initialNumPartitions);
}
 
Example #20
Source File: FlinkMergingNonShuffleReduceFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
public FlinkMergingNonShuffleReduceFunction(
    CombineFnBase.GlobalCombineFn<InputT, AccumT, OutputT> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions) {

  this.combineFn = combineFn;

  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;

  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
}
 
Example #21
Source File: SparkCombineFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, V, AccumT, OutputT> SparkCombineFn<KV<K, V>, V, AccumT, OutputT> keyed(
    CombineWithContext.CombineFnWithContext<V, AccumT, OutputT> combineFn,
    SerializablePipelineOptions options,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy) {
  return new SparkCombineFn<>(
      false, KV::getValue, combineFn, options, sideInputs, windowingStrategy);
}
 
Example #22
Source File: MultiDoFnFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * @param metricsAccum The Spark {@link AccumulatorV2} that backs the Beam metrics.
 * @param doFn The {@link DoFn} to be wrapped.
 * @param options The {@link SerializablePipelineOptions}.
 * @param mainOutputTag The main output {@link TupleTag}.
 * @param additionalOutputTags Additional {@link TupleTag output tags}.
 * @param inputCoder The coder for the input.
 * @param outputCoders A map of all output coders.
 * @param sideInputs Side inputs used in this {@link DoFn}.
 * @param windowingStrategy Input {@link WindowingStrategy}.
 * @param stateful Stateful {@link DoFn}.
 */
public MultiDoFnFunction(
    MetricsContainerStepMapAccumulator metricsAccum,
    String stepName,
    DoFn<InputT, OutputT> doFn,
    SerializablePipelineOptions options,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs,
    WindowingStrategy<?, ?> windowingStrategy,
    boolean stateful,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.metricsAccum = metricsAccum;
  this.stepName = stepName;
  this.doFn = SerializableUtils.clone(doFn);
  this.options = options;
  this.mainOutputTag = mainOutputTag;
  this.additionalOutputTags = additionalOutputTags;
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.sideInputs = sideInputs;
  this.windowingStrategy = windowingStrategy;
  this.stateful = stateful;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #23
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, InputT, W extends BoundedWindow>
    JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> groupByKeyAndWindow(
        final JavaDStream<WindowedValue<KV<K, InputT>>> inputDStream,
        final Coder<K> keyCoder,
        final Coder<WindowedValue<InputT>> wvCoder,
        final WindowingStrategy<?, W> windowingStrategy,
        final SerializablePipelineOptions options,
        final List<Integer> sourceIds,
        final String transformFullName) {

  final PairDStreamFunctions<ByteArray, byte[]> pairDStream =
      buildPairDStream(inputDStream, keyCoder, wvCoder);

  // use updateStateByKey to scan through the state and update elements and timers.
  final UpdateStateByKeyFunction<K, InputT, W> updateFunc =
      new UpdateStateByKeyFunction<>(
          sourceIds,
          windowingStrategy,
          (FullWindowedValueCoder<InputT>) wvCoder,
          keyCoder,
          options,
          transformFullName);

  final DStream<
          Tuple2</*K*/ ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>
      firedStream =
          pairDStream.updateStateByKey(
              updateFunc,
              pairDStream.defaultPartitioner(pairDStream.defaultPartitioner$default$1()),
              true,
              JavaSparkContext$.MODULE$.fakeClassTag());

  checkpointIfNeeded(firedStream, options);

  // filter state-only output (nothing to fire) and remove the state from the output.
  return stripStateValues(firedStream, keyCoder, (FullWindowedValueCoder<InputT>) wvCoder);
}
 
Example #24
Source File: AssignWindowsFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
public AssignWindowsFunction(WindowFn<T, BoundedWindow> windowFn, PipelineOptions options) {
  this.windowFn = windowFn;
  SdkComponents components = SdkComponents.create();
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));
  RunnerApi.FunctionSpec windowFnProto =
      WindowingStrategyTranslation.toProto(windowFn, components);
  windowFnBytes = windowFnProto.toByteArray();
}
 
Example #25
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 5 votes vote down vote up
private static void checkpointIfNeeded(
    final DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> firedStream,
    final SerializablePipelineOptions options) {

  final Long checkpointDurationMillis = getBatchDuration(options);

  if (checkpointDurationMillis > 0) {
    firedStream.checkpoint(new Duration(checkpointDurationMillis));
  }
}
 
Example #26
Source File: DoFnFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
public DoFnFunction(
    Twister2TranslationContext context,
    DoFn<InputT, OutputT> doFn,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    List<TupleTag<?>> sideOutputs,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    TupleTag<OutputT> mainOutput,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, Integer> outputMap,
    Map<String, PCollectionView<?>> sideInputMapping) {
  this.doFn = doFn;
  this.pipelineOptions = context.getOptions();
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
  this.inputCoder = inputCoder;
  this.outputCoders = outputCoders;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = convertToTuples(sideInputs);
  this.mainOutput = mainOutput;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideOutputs = sideOutputs;
  this.stepcontext = new NoOpStepContext();
  this.outputMap = outputMap;
  this.sideInputMapping = sideInputMapping;
  outputManager = new DoFnOutputManager(this.outputMap);
  prepareSerialization();
}
 
Example #27
Source File: Twister2BoundedSource.java    From beam with Apache License 2.0 5 votes vote down vote up
public Twister2BoundedSource(
    BoundedSource<T> boundedSource, Twister2TranslationContext context, PipelineOptions options) {
  source = boundedSource;
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));
  RunnerApi.FunctionSpec sourceProto = ReadTranslation.toProto(source, components);
  sourceBytes = sourceProto.getPayload().toByteArray();
}
 
Example #28
Source File: GroupAlsoByWindowViaOutputBufferFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public GroupAlsoByWindowViaOutputBufferFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn,
    SerializablePipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.stateInternalsFactory = stateInternalsFactory;
  this.reduceFn = reduceFn;
  this.options = options;
}
 
Example #29
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
AbstractParDoP(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  this.pipelineOptions = pipelineOptions;
  this.doFn = Utils.serde(doFn);
  this.windowingStrategy = windowingStrategy;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.outputCollToOrdinals = outputCollToOrdinals;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.sideInputCoders =
      sideInputCoders.entrySet().stream()
          .collect(
              Collectors.toMap(
                  Map.Entry::getKey,
                  e ->
                      Utils.deriveIterableValueCoder(
                          (WindowedValue.FullWindowedValueCoder) e.getValue())));
  this.outputCoders = outputCoders;
  this.inputValueCoder = inputValueCoder;
  this.outputValueCoders = outputValueCoders;
  this.ordinalToSideInput = ordinalToSideInput;
  this.ownerId = ownerId;
  this.stepId = stepId;
  this.cooperative = isCooperativenessAllowed(pipelineOptions) && hasOutput();
}
 
Example #30
Source File: FlinkReduceFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
public FlinkReduceFunction(
    CombineFnBase.GlobalCombineFn<?, AccumT, OutputT> combineFn,
    WindowingStrategy<Object, W> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions pipelineOptions,
    boolean groupedByWindow) {
  this.combineFn = combineFn;
  this.windowingStrategy = windowingStrategy;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
  this.groupedByWindow = groupedByWindow;
}