org.apache.beam.runners.core.SystemReduceFn Java Examples

The following examples show how to use org.apache.beam.runners.core.SystemReduceFn. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PipelineTranslator.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Create a group by key transform.
 * It returns GroupByKeyAndWindowDoFnTransform if window function is not default.
 *
 * @param ctx      translation context
 * @param beamNode the beam node to be translated
 * @return group by key transform
 */
private static Transform createGBKTransform(
  final PipelineTranslationContext ctx,
  final TransformHierarchy.Node beamNode) {
  final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline());
  final PCollection<?> mainInput = (PCollection<?>)
    Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform));
  final TupleTag mainOutputTag = new TupleTag<>();

  if (isGlobalWindow(beamNode, ctx.getPipeline())) {
    return new GroupByKeyTransform();
  } else {
    return new GroupByKeyAndWindowDoFnTransform(
      getOutputCoders(pTransform),
      mainOutputTag,
      mainInput.getWindowingStrategy(),
      ctx.getPipelineOptions(),
      SystemReduceFn.buffering(mainInput.getCoder()),
      DisplayData.from(beamNode.getTransform()));
  }
}
 
Example #2
Source File: GroupByWindowFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public GroupByWindowFunction(
    WindowingStrategy<?, W> windowingStrategy,
    SystemReduceFn<K, V, Iterable<V>, Iterable<V>, W> reduceFn,
    PipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.options = options;
  this.serializedOptions = new SerializablePipelineOptions(options).toString();
  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(
      Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class)));

  try {
    windowStrategyProto =
        WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
    windowBytes = windowStrategyProto.toByteArray();
  } catch (IOException e) {
    LOG.info(e.getMessage());
  }
  this.reduceFn = reduceFn;
}
 
Example #3
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <K, InputT, OutputT>
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn(
        PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
        Pipeline pipeline,
        KvCoder<K, InputT> kvInputCoder) {
  if (transform instanceof GroupByKey) {
    return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
        SystemReduceFn.buffering(kvInputCoder.getValueCoder());
  } else if (transform instanceof Combine.PerKey) {
    final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn =
        ((Combine.PerKey) transform).getFn();
    return SystemReduceFn.combining(
        kvInputCoder.getKeyCoder(),
        AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder));
  } else {
    throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey.");
  }
}
 
Example #4
Source File: GroupByKeyOp.java    From beam with Apache License 2.0 6 votes vote down vote up
public GroupByKeyOp(
    TupleTag<KV<K, OutputT>> mainOutputTag,
    Coder<KeyedWorkItem<K, InputT>> inputCoder,
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn,
    WindowingStrategy<?, BoundedWindow> windowingStrategy,
    OutputManagerFactory<KV<K, OutputT>> outputManagerFactory,
    String transformFullName,
    String transformId,
    IsBounded isBounded) {
  this.mainOutputTag = mainOutputTag;
  this.windowingStrategy = windowingStrategy;
  this.outputManagerFactory = outputManagerFactory;
  this.transformFullName = transformFullName;
  this.transformId = transformId;
  this.isBounded = isBounded;

  if (!(inputCoder instanceof KeyedWorkItemCoder)) {
    throw new IllegalArgumentException(
        String.format(
            "GroupByKeyOp requires input to use KeyedWorkItemCoder. Got: %s",
            inputCoder.getClass()));
  }
  this.inputCoder = (KeyedWorkItemCoder<K, InputT>) inputCoder;
  this.keyCoder = this.inputCoder.getKeyCoder();
  this.reduceFn = reduceFn;
}
 
Example #5
Source File: WindowDoFnOperator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
protected DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> getDoFn() {
  // this will implicitly be keyed by the key of the incoming
  // element or by the key of a firing timer
  StateInternalsFactory<K> stateInternalsFactory = key -> (StateInternals) keyedStateInternals;

  // this will implicitly be keyed like the StateInternalsFactory
  TimerInternalsFactory<K> timerInternalsFactory = key -> timerInternals;

  // we have to do the unchecked cast because GroupAlsoByWindowViaWindowSetDoFn.create
  // has the window type as generic parameter while WindowingStrategy is almost always
  // untyped.
  @SuppressWarnings("unchecked")
  DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn =
      GroupAlsoByWindowViaWindowSetNewDoFn.create(
          windowingStrategy,
          stateInternalsFactory,
          timerInternalsFactory,
          sideInputReader,
          (SystemReduceFn) systemReduceFn,
          outputManager,
          mainOutputTag);
  return doFn;
}
 
Example #6
Source File: GroupAlsoByWindowEvaluatorFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
public GroupAlsoByWindowEvaluator(
    final EvaluationContext evaluationContext,
    PipelineOptions options,
    CommittedBundle<KeyedWorkItem<K, V>> inputBundle,
    final AppliedPTransform<
            PCollection<KeyedWorkItem<K, V>>,
            PCollection<KV<K, Iterable<V>>>,
            DirectGroupAlsoByWindow<K, V>>
        application) {
  this.evaluationContext = evaluationContext;
  this.options = options;
  this.application = application;

  structuralKey = inputBundle.getKey();
  stepContext =
      evaluationContext
          .getExecutionContext(application, inputBundle.getKey())
          .getStepContext(evaluationContext.getStepName(application));
  windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>)
          application.getTransform().getInputWindowingStrategy();

  outputBundles = new ArrayList<>();
  unprocessedElements = ImmutableList.builder();

  Coder<V> valueCoder =
      application.getTransform().getValueCoder(inputBundle.getPCollection().getCoder());
  reduceFn = SystemReduceFn.buffering(valueCoder);
  droppedDueToLateness =
      Metrics.counter(
          GroupAlsoByWindowEvaluator.class,
          GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER);
}
 
Example #7
Source File: CombiningGroupAlsoByWindowsViaOutputBufferDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public <W extends BoundedWindow> BatchGroupAlsoByWindowFn<K, InputT, OutputT> forStrategy(
    WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory) {
  return new BatchGroupAlsoByWindowViaOutputBufferFn<>(
      windowingStrategy,
      stateInternalsFactory,
      SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
}
 
Example #8
Source File: GroupAlsoByWindowViaOutputBufferDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public <W extends BoundedWindow>
    BatchGroupAlsoByWindowFn<K, InputT, Iterable<InputT>> forStrategy(
        WindowingStrategy<?, W> windowingStrategy,
        StateInternalsFactory<K> stateInternalsFactory) {
  return new BatchGroupAlsoByWindowViaOutputBufferFn<K, InputT, Iterable<InputT>, W>(
      windowingStrategy,
      stateInternalsFactory,
      SystemReduceFn.<K, InputT, W>buffering(inputCoder));
}
 
Example #9
Source File: StreamingGroupAlsoByWindowViaWindowSetFn.java    From beam with Apache License 2.0 5 votes vote down vote up
private StreamingGroupAlsoByWindowViaWindowSetFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
  @SuppressWarnings("unchecked")
  WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
  this.windowingStrategy = noWildcard;
  this.reduceFn = reduceFn;
  this.stateInternalsFactory = stateInternalsFactory;
}
 
Example #10
Source File: StreamingGroupAlsoByWindowViaWindowSetFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, InputT, OutputT, W extends BoundedWindow>
    GroupAlsoByWindowFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
        WindowingStrategy<?, W> strategy,
        StateInternalsFactory<K> stateInternalsFactory,
        SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
  return new StreamingGroupAlsoByWindowViaWindowSetFn<>(
      strategy, stateInternalsFactory, reduceFn);
}
 
Example #11
Source File: BatchGroupAlsoByWindowViaOutputBufferFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public BatchGroupAlsoByWindowViaOutputBufferFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
  this.strategy = windowingStrategy;
  this.reduceFn = reduceFn;
  this.stateInternalsFactory = stateInternalsFactory;
}
 
Example #12
Source File: BatchGroupAlsoByWindowsDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link BatchGroupAlsoByWindowFn} without a combine function. Depending on the {@code
 * windowFn} this will either use iterators or window sets to implement the grouping.
 *
 * @param windowingStrategy The window function and trigger to use for grouping
 * @param inputCoder the input coder to use
 */
public static <K, V, W extends BoundedWindow>
    BatchGroupAlsoByWindowFn<K, V, Iterable<V>> createForIterable(
        WindowingStrategy<?, W> windowingStrategy,
        StateInternalsFactory<K> stateInternalsFactory,
        Coder<V> inputCoder) {
  // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
  if (BatchGroupAlsoByWindowReshuffleFn.isReshuffle(windowingStrategy)) {
    return new BatchGroupAlsoByWindowReshuffleFn<>();
  } else if (BatchGroupAlsoByWindowViaIteratorsFn.isSupported(windowingStrategy)) {
    return new BatchGroupAlsoByWindowViaIteratorsFn<K, V, W>(windowingStrategy);
  }
  return new BatchGroupAlsoByWindowViaOutputBufferFn<>(
      windowingStrategy, stateInternalsFactory, SystemReduceFn.buffering(inputCoder));
}
 
Example #13
Source File: StreamingGroupAlsoByWindowsDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, V, W extends BoundedWindow>
    GroupAlsoByWindowFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> createForIterable(
        final WindowingStrategy<?, W> windowingStrategy,
        StateInternalsFactory<K> stateInternalsFactory,
        Coder<V> inputCoder) {
  // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
  if (StreamingGroupAlsoByWindowReshuffleFn.isReshuffle(windowingStrategy)) {
    return new StreamingGroupAlsoByWindowReshuffleFn<>();
  } else {
    return StreamingGroupAlsoByWindowViaWindowSetFn.create(
        windowingStrategy, stateInternalsFactory, SystemReduceFn.buffering(inputCoder));
  }
}
 
Example #14
Source File: StreamingGroupAlsoByWindowsDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
    GroupAlsoByWindowFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
        final WindowingStrategy<?, W> windowingStrategy,
        StateInternalsFactory<K> stateInternalsFactory,
        final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
        final Coder<K> keyCoder) {
  Preconditions.checkNotNull(combineFn);
  return StreamingGroupAlsoByWindowViaWindowSetFn.create(
      windowingStrategy, stateInternalsFactory, SystemReduceFn.combining(keyCoder, combineFn));
}
 
Example #15
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, InputT, OutputT> void doTranslate(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
    TransformHierarchy.Node node,
    TranslationContext ctx) {
  final PCollection<KV<K, InputT>> input = ctx.getInput(transform);

  final PCollection<KV<K, OutputT>> output = ctx.getOutput(transform);
  final TupleTag<KV<K, OutputT>> outputTag = ctx.getOutputTag(transform);

  @SuppressWarnings("unchecked")
  final WindowingStrategy<?, BoundedWindow> windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();

  final MessageStream<OpMessage<KV<K, InputT>>> inputStream = ctx.getMessageStream(input);

  final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) input.getCoder();
  final Coder<WindowedValue<KV<K, InputT>>> elementCoder = SamzaCoders.of(input);

  final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn =
      getSystemReduceFn(transform, input.getPipeline(), kvInputCoder);

  final MessageStream<OpMessage<KV<K, OutputT>>> outputStream =
      doTranslateGBK(
          inputStream,
          needRepartition(node, ctx),
          reduceFn,
          windowingStrategy,
          kvInputCoder,
          elementCoder,
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          outputTag,
          input.isBounded());

  ctx.registerMessageStream(output, outputStream);
}
 
Example #16
Source File: WindowDoFnOperatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() {
  WindowingStrategy<Object, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(FixedWindows.of(standardMinutes(1)));

  TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output");

  SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn =
      SystemReduceFn.combining(
          VarLongCoder.of(),
          AppliedCombineFn.withInputCoder(
              Sum.ofLongs(),
              CoderRegistry.createDefault(),
              KvCoder.of(VarLongCoder.of(), VarLongCoder.of())));

  Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
  SingletonKeyedWorkItemCoder<Long, Long> workItemCoder =
      SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder);
  FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder =
      WindowedValue.getFullCoder(workItemCoder, windowCoder);
  FullWindowedValueCoder<KV<Long, Long>> outputCoder =
      WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder);

  return new WindowDoFnOperator<Long, Long, Long>(
      reduceFn,
      "stepName",
      (Coder) inputCoder,
      outputTag,
      emptyList(),
      new MultiOutputOutputManagerFactory<>(outputTag, outputCoder),
      windowingStrategy,
      emptyMap(),
      emptyList(),
      PipelineOptionsFactory.as(FlinkPipelineOptions.class),
      VarLongCoder.of(),
      new WorkItemKeySelector(VarLongCoder.of()));
}
 
Example #17
Source File: WindowDoFnOperator.java    From beam with Apache License 2.0 5 votes vote down vote up
public WindowDoFnOperator(
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> systemReduceFn,
    String stepName,
    Coder<WindowedValue<KeyedWorkItem<K, InputT>>> windowedInputCoder,
    TupleTag<KV<K, OutputT>> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    OutputManagerFactory<KV<K, OutputT>> outputManagerFactory,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<Integer, PCollectionView<?>> sideInputTagMapping,
    Collection<PCollectionView<?>> sideInputs,
    PipelineOptions options,
    Coder<K> keyCoder,
    KeySelector<WindowedValue<KeyedWorkItem<K, InputT>>, ?> keySelector) {
  super(
      null,
      stepName,
      windowedInputCoder,
      Collections.emptyMap(),
      mainOutputTag,
      additionalOutputTags,
      outputManagerFactory,
      windowingStrategy,
      sideInputTagMapping,
      sideInputs,
      options,
      keyCoder,
      keySelector,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());

  this.systemReduceFn = systemReduceFn;
}
 
Example #18
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V> void translateGroupByKey(
    PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) {

  RunnerApi.Components components = pipeline.getComponents();
  String inputId = getInputId(transformNode);
  Dataset inputDataset = context.popDataset(inputId);
  JavaRDD<WindowedValue<KV<K, V>>> inputRdd = ((BoundedDataset<KV<K, V>>) inputDataset).getRDD();
  WindowedValueCoder<KV<K, V>> inputCoder = getWindowedValueCoder(inputId, components);
  KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder.getValueCoder();
  Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder();
  Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
  WindowingStrategy windowingStrategy = getWindowingStrategy(inputId, components);
  WindowFn<Object, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
  WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(inputValueCoder, windowFn.windowCoder());

  JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKeyAndWindow;
  Partitioner partitioner = getPartitioner(context);
  if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) {
    // we can have a memory sensitive translation for non-merging windows
    groupedByKeyAndWindow =
        GroupNonMergingWindowsFunctions.groupByKeyAndWindow(
            inputRdd, inputKeyCoder, inputValueCoder, windowingStrategy, partitioner);
  } else {
    JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly =
        GroupCombineFunctions.groupByKeyOnly(inputRdd, inputKeyCoder, wvCoder, partitioner);
    // for batch, GroupAlsoByWindow uses an in-memory StateInternals.
    groupedByKeyAndWindow =
        groupedByKeyOnly.flatMap(
            new SparkGroupAlsoByWindowViaOutputBufferFn<>(
                windowingStrategy,
                new TranslationUtils.InMemoryStateInternalsFactory<>(),
                SystemReduceFn.buffering(inputValueCoder),
                context.serializablePipelineOptions));
  }
  context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(groupedByKeyAndWindow));
}
 
Example #19
Source File: SparkGroupAlsoByWindowViaOutputBufferFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public SparkGroupAlsoByWindowViaOutputBufferFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn,
    SerializablePipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.stateInternalsFactory = stateInternalsFactory;
  this.reduceFn = reduceFn;
  this.options = options;
}
 
Example #20
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 5 votes vote down vote up
UpdateStateByKeyOutputIterator(
    final Iterator<
            Tuple3<ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, List<byte[]>>>>>
        input,
    final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn,
    final CounterCell droppedDueToLateness) {
  this.input = input;
  this.reduceFn = reduceFn;
  this.droppedDueToLateness = droppedDueToLateness;
}
 
Example #21
Source File: GroupAlsoByWindowViaOutputBufferFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public GroupAlsoByWindowViaOutputBufferFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn,
    SerializablePipelineOptions options) {
  this.windowingStrategy = windowingStrategy;
  this.stateInternalsFactory = stateInternalsFactory;
  this.reduceFn = reduceFn;
  this.options = options;
}
 
Example #22
Source File: GroupByKeyTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> transform,
    TranslationContext context) {

  @SuppressWarnings("unchecked")
  final PCollection<KV<K, V>> inputPCollection = (PCollection<KV<K, V>>) context.getInput();
  Dataset<WindowedValue<KV<K, V>>> input = context.getDataset(inputPCollection);
  WindowingStrategy<?, ?> windowingStrategy = inputPCollection.getWindowingStrategy();
  KvCoder<K, V> kvCoder = (KvCoder<K, V>) inputPCollection.getCoder();
  Coder<V> valueCoder = kvCoder.getValueCoder();

  // group by key only
  Coder<K> keyCoder = kvCoder.getKeyCoder();
  KeyValueGroupedDataset<K, WindowedValue<KV<K, V>>> groupByKeyOnly =
      input.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  // group also by windows
  WindowedValue.FullWindowedValueCoder<KV<K, Iterable<V>>> outputCoder =
      WindowedValue.FullWindowedValueCoder.of(
          KvCoder.of(keyCoder, IterableCoder.of(valueCoder)),
          windowingStrategy.getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, Iterable<V>>>> output =
      groupByKeyOnly.flatMapGroups(
          new GroupAlsoByWindowViaOutputBufferFn<>(
              windowingStrategy,
              new InMemoryStateInternalsFactory<>(),
              SystemReduceFn.buffering(valueCoder),
              context.getSerializableOptions()),
          EncoderHelpers.fromBeamCoder(outputCoder));

  context.putDataset(context.getOutput(), output);
}
 
Example #23
Source File: GroupByKeyTranslatorBatch.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
  PCollection<KV<K, V>> input = context.getInput(transform);
  BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
  final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
  Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
  WindowingStrategy windowingStrategy = input.getWindowingStrategy();
  WindowFn<KV<K, V>, BoundedWindow> windowFn =
      (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  KeyedTSet<byte[], byte[]> keyedTSet =
      inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));

  // todo add support for a partition function to be specified, this would use
  // todo keyedPartition function instead of KeyedGather
  ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>>
      groupedbyKeyTset =
      keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));

  // --- now group also by window.
  ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>>
      outputTset =
      groupedbyKeyTset
          .direct()
          .<WindowedValue<KV<K, Iterable<V>>>>flatmap(
              new GroupByWindowFunction(
                  windowingStrategy,
                  SystemReduceFn.buffering(coder.getValueCoder())));
  PCollection output = context.getOutput(transform);
  context.setOutputDataSet(output, outputTset);
}
 
Example #24
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(
    DataStream<WindowedValue<KV<K, V>>> inputDataStream,
    WindowingStrategy<?, ?> windowingStrategy,
    WindowedValueCoder<KV<K, V>> windowedInputCoder,
    String operatorName,
    StreamingTranslationContext context) {
  KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();

  SingletonKeyedWorkItemCoder<K, V> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputElementCoder.getKeyCoder(),
          inputElementCoder.getValueCoder(),
          windowingStrategy.getWindowFn().windowCoder());

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder =
      WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream =
      inputDataStream
          .flatMap(
              new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(
                  context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector<K, V> keySelector =
      new WorkItemKeySelector<>(inputElementCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream =
      workItemStream.keyBy(keySelector);

  SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputElementCoder.getValueCoder());

  Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder),
          windowingStrategy.getWindowFn().windowCoder());

  TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo =
      new CoderTypeInformation<>(outputCoder);

  TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");

  WindowDoFnOperator<K, V, Iterable<V>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          operatorName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputElementCoder.getKeyCoder(),
          (KeySelector) keySelector /* key selector */);

  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream =
      keyedWorkItemStream.transform(
          operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator);

  return outputDataStream;
}
 
Example #25
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder =
          WindowedValue.getFullCoder(
              workItemCoder, input.getWindowingStrategy().getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItem<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector keySelector = new WorkItemKeySelector<>(inputKvCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputKvCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<InputT>>>> outputCoder =
      context.getWindowedInputCoder(context.getOutput(transform));
  TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
      context.getTypeInfo(context.getOutput(transform));

  TupleTag<KV<K, Iterable<InputT>>> mainTag = new TupleTag<>("main output");

  String fullName = getCurrentTransformName(context);
  WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          fullName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputKvCoder.getKeyCoder(),
          keySelector);

  // our operator expects WindowedValue<KeyedWorkItem> while our input stream
  // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
  @SuppressWarnings("unchecked")
  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream =
      keyedWorkItemStream
          .transform(fullName, outputTypeInfo, (OneInputStreamOperator) doFnOperator)
          .uid(fullName);

  context.setOutputDataStream(context.getOutput(transform), outDataStream);
}
 
Example #26
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final MessageStream<OpMessage<KV<K, InputT>>> inputStream =
      ctx.getOneInputMessageStream(transform);
  final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1;
  final WindowingStrategy<?, BoundedWindow> windowingStrategy =
      ctx.getPortableWindowStrategy(transform, pipeline);
  final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();

  final String inputId = ctx.getInputId(transform);
  final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());
  final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder();
  final Coder<WindowedValue<KV<K, InputT>>> elementCoder =
      WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder);

  final TupleTag<KV<K, OutputT>> outputTag =
      new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet()));

  @SuppressWarnings("unchecked")
  final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn =
      (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
          SystemReduceFn.buffering(kvInputCoder.getValueCoder());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final MessageStream<OpMessage<KV<K, OutputT>>> outputStream =
      doTranslateGBK(
          inputStream,
          needRepartition,
          reduceFn,
          windowingStrategy,
          kvInputCoder,
          elementCoder,
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          outputTag,
          isBounded);
  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}
 
Example #27
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT> MessageStream<OpMessage<KV<K, OutputT>>> doTranslateGBK(
    MessageStream<OpMessage<KV<K, InputT>>> inputStream,
    boolean needRepartition,
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn,
    WindowingStrategy<?, BoundedWindow> windowingStrategy,
    KvCoder<K, InputT> kvInputCoder,
    Coder<WindowedValue<KV<K, InputT>>> elementCoder,
    String transformFullName,
    String transformId,
    TupleTag<KV<K, OutputT>> outputTag,
    PCollection.IsBounded isBounded) {
  final MessageStream<OpMessage<KV<K, InputT>>> filteredInputStream =
      inputStream.filter(msg -> msg.getType() == OpMessage.Type.ELEMENT);

  final MessageStream<OpMessage<KV<K, InputT>>> partitionedInputStream;
  if (!needRepartition) {
    partitionedInputStream = filteredInputStream;
  } else {
    partitionedInputStream =
        filteredInputStream
            .partitionBy(
                msg -> msg.getElement().getValue().getKey(),
                msg -> msg.getElement(),
                KVSerde.of(
                    SamzaCoders.toSerde(kvInputCoder.getKeyCoder()),
                    SamzaCoders.toSerde(elementCoder)),
                "gbk-" + escape(transformId))
            .map(kv -> OpMessage.ofElement(kv.getValue()));
  }

  final Coder<KeyedWorkItem<K, InputT>> keyedWorkItemCoder =
      KeyedWorkItemCoder.of(
          kvInputCoder.getKeyCoder(),
          kvInputCoder.getValueCoder(),
          windowingStrategy.getWindowFn().windowCoder());

  final MessageStream<OpMessage<KV<K, OutputT>>> outputStream =
      partitionedInputStream
          .flatMap(OpAdapter.adapt(new KvToKeyedWorkItemOp<>()))
          .flatMap(
              OpAdapter.adapt(
                  new GroupByKeyOp<>(
                      outputTag,
                      keyedWorkItemCoder,
                      reduceFn,
                      windowingStrategy,
                      new DoFnOp.SingleOutputManagerFactory<>(),
                      transformFullName,
                      transformId,
                      isBounded)));
  return outputStream;
}
 
Example #28
Source File: TransformTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
  return new TransformEvaluator<GroupByKey<K, V>>() {
    @Override
    public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      JavaRDD<WindowedValue<KV<K, V>>> inRDD =
          ((BoundedDataset<KV<K, V>>) context.borrowDataset(transform)).getRDD();
      final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
      @SuppressWarnings("unchecked")
      final WindowingStrategy<?, W> windowingStrategy =
          (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
      @SuppressWarnings("unchecked")
      final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();

      // --- coders.
      final Coder<K> keyCoder = coder.getKeyCoder();
      final WindowedValue.WindowedValueCoder<V> wvCoder =
          WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());

      JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKey;
      Partitioner partitioner = getPartitioner(context);
      if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) {
        // we can have a memory sensitive translation for non-merging windows
        groupedByKey =
            GroupNonMergingWindowsFunctions.groupByKeyAndWindow(
                inRDD, keyCoder, coder.getValueCoder(), windowingStrategy, partitioner);
      } else {
        // --- group by key only.
        JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly =
            GroupCombineFunctions.groupByKeyOnly(inRDD, keyCoder, wvCoder, partitioner);

        // --- now group also by window.
        // for batch, GroupAlsoByWindow uses an in-memory StateInternals.
        groupedByKey =
            groupedByKeyOnly.flatMap(
                new SparkGroupAlsoByWindowViaOutputBufferFn<>(
                    windowingStrategy,
                    new TranslationUtils.InMemoryStateInternalsFactory<>(),
                    SystemReduceFn.buffering(coder.getValueCoder()),
                    context.getSerializableOptions()));
      }
      context.putDataset(transform, new BoundedDataset<>(groupedByKey));
    }

    @Override
    public String toNativeString() {
      return "groupByKey()";
    }
  };
}
 
Example #29
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<
        Tuple2</*K*/ ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>
    apply(
        final Iterator<
                Tuple3<
                    /*K*/ ByteArray,
                    Seq</*WV<I>*/ byte[]>,
                    Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>>
            input) {
  // --- ACTUAL STATEFUL OPERATION:
  //
  // Input Iterator: the partition (~bundle) of a co-grouping of the input
  // and the previous state (if exists).
  //
  // Output Iterator: the output key, and the updated state.
  //
  // possible input scenarios for (K, Seq, Option<S>):
  // (1) Option<S>.isEmpty: new data with no previous state.
  // (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour).
  // (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state.

  final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn =
      SystemReduceFn.buffering(wvCoder.getValueCoder());

  final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider");

  final CounterCell droppedDueToClosedWindow =
      cellProvider.getCounter(
          MetricName.named(
              SparkGroupAlsoByWindowViaWindowSet.class,
              GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER));

  final CounterCell droppedDueToLateness =
      cellProvider.getCounter(
          MetricName.named(
              SparkGroupAlsoByWindowViaWindowSet.class,
              GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER));

  // log if there's something to log.
  final long lateDropped = droppedDueToLateness.getCumulative();
  if (lateDropped > 0) {
    LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped));
    droppedDueToLateness.inc(-droppedDueToLateness.getCumulative());
  }
  final long closedWindowDropped = droppedDueToClosedWindow.getCumulative();
  if (closedWindowDropped > 0) {
    LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped));
    droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative());
  }

  return scala.collection.JavaConversions.asScalaIterator(
      new UpdateStateByKeyOutputIterator(input, reduceFn, droppedDueToLateness));
}
 
Example #30
Source File: WindowGroupP.java    From beam with Apache License 2.0 4 votes vote down vote up
KeyManager(K key) {
  this.timerInternals = new InMemoryTimerInternals();
  this.stateInternals = new InMemoryStateInternalsImpl(key);
  this.reduceFnRunner =
      new ReduceFnRunner<>(
          key,
          windowingStrategy,
          ExecutableTriggerStateMachine.create(
              TriggerStateMachines.stateMachineForTrigger(
                  TriggerTranslation.toProto(windowingStrategy.getTrigger()))),
          stateInternals,
          timerInternals,
          new OutputWindowedValue<KV<K, Iterable<V>>>() {
            @Override
            public void outputWindowedValue(
                KV<K, Iterable<V>> output,
                Instant timestamp,
                Collection<? extends BoundedWindow> windows,
                PaneInfo pane) {
              WindowedValue<KV<K, Iterable<V>>> windowedValue =
                  WindowedValue.of(output, timestamp, windows, pane);
              byte[] encodedValue = Utils.encode(windowedValue, outputCoder);
              //noinspection ResultOfMethodCallIgnored
              appendableTraverser.append(encodedValue);
            }

            @Override
            public <AdditionalOutputT> void outputWindowedValue(
                TupleTag<AdditionalOutputT> tag,
                AdditionalOutputT output,
                Instant timestamp,
                Collection<? extends BoundedWindow> windows,
                PaneInfo pane) {
              throw new UnsupportedOperationException("Grouping should not use side outputs");
            }
          },
          NullSideInputReader.empty(),
          SystemReduceFn.buffering(inputValueValueCoder),
          pipelineOptions.get());
  advanceWatermark(latestWatermark, Instant.now());
}