Java Code Examples for org.apache.beam.sdk.values.WindowingStrategy#getWindowFn()

The following examples show how to use org.apache.beam.sdk.values.WindowingStrategy#getWindowFn() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReduceFnTester.java    From beam with Apache License 2.0 6 votes vote down vote up
private ReduceFnTester(
    WindowingStrategy<?, W> wildcardStrategy,
    TriggerStateMachine triggerStateMachine,
    ReduceFn<String, InputT, OutputT, W> reduceFn,
    Coder<OutputT> outputCoder,
    PipelineOptions options,
    SideInputReader sideInputReader)
    throws Exception {
  @SuppressWarnings("unchecked")
  WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;

  this.objectStrategy = objectStrategy;
  this.reduceFn = reduceFn;
  this.windowFn = objectStrategy.getWindowFn();
  this.testOutputter = new TestOutputWindowedValue();
  this.sideInputReader = sideInputReader;
  this.executableTriggerStateMachine = ExecutableTriggerStateMachine.create(triggerStateMachine);
  this.outputCoder = outputCoder;
  this.options = options;
}
 
Example 2
Source File: StatefulDoFnRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
public StatefulDoFnRunner(
    DoFnRunner<InputT, OutputT> doFnRunner,
    Coder<InputT> inputCoder,
    StepContext stepContext,
    WindowingStrategy<?, ?> windowingStrategy,
    CleanupTimer<InputT> cleanupTimer,
    StateCleaner<W> stateCleaner,
    boolean requiresTimeSortedInput) {
  this.doFnRunner = doFnRunner;
  this.stepContext = stepContext;
  this.windowingStrategy = windowingStrategy;
  this.cleanupTimer = cleanupTimer;
  this.stateCleaner = stateCleaner;
  this.requiresTimeSortedInput = requiresTimeSortedInput;
  WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
  @SuppressWarnings("unchecked")
  Coder<BoundedWindow> untypedCoder = (Coder<BoundedWindow>) windowFn.windowCoder();
  this.windowCoder = untypedCoder;

  this.sortBufferTag =
      StateTags.makeSystemTagInternal(
          StateTags.bag(SORT_BUFFER_STATE, WindowedValue.getFullCoder(inputCoder, windowCoder)));

  rejectMergingWindowFn(windowFn);
}
 
Example 3
Source File: Window.java    From beam with Apache License 2.0 6 votes vote down vote up
private void applicableTo(PCollection<?> input) {
  WindowingStrategy<?, ?> outputStrategy =
      getOutputStrategyInternal(input.getWindowingStrategy());

  // Make sure that the windowing strategy is complete & valid.
  if (outputStrategy.isTriggerSpecified()
      && !(outputStrategy.getTrigger() instanceof DefaultTrigger)
      && !(outputStrategy.getWindowFn() instanceof GlobalWindows)
      && !outputStrategy.isAllowedLatenessSpecified()) {
    throw new IllegalArgumentException(
        "Except when using GlobalWindows,"
            + " calling .triggering() to specify a trigger requires that the allowed lateness"
            + " be specified using .withAllowedLateness() to set the upper bound on how late"
            + " data can arrive before being dropped. See Javadoc for more details.");
  }

  if (!outputStrategy.isModeSpecified() && canProduceMultiplePanes(outputStrategy)) {
    throw new IllegalArgumentException(
        "Calling .triggering() to specify a trigger or calling .withAllowedLateness() to"
            + " specify an allowed lateness greater than zero requires that the accumulation"
            + " mode be specified using .discardingFiredPanes() or .accumulatingFiredPanes()."
            + " See Javadoc for more details.");
  }
}
 
Example 4
Source File: GroupByKey.java    From beam with Apache License 2.0 5 votes vote down vote up
public WindowingStrategy<?, ?> updateWindowingStrategy(WindowingStrategy<?, ?> inputStrategy) {
  WindowFn<?, ?> inputWindowFn = inputStrategy.getWindowFn();
  if (!inputWindowFn.isNonMerging()) {
    // Prevent merging windows again, without explicit user
    // involvement, e.g., by Window.into() or Window.remerge().
    inputWindowFn =
        new InvalidWindows<>(
            "WindowFn has already been consumed by previous GroupByKey", inputWindowFn);
  }

  // We also switch to the continuation trigger associated with the current trigger.
  return inputStrategy
      .withWindowFn(inputWindowFn)
      .withTrigger(inputStrategy.getTrigger().getContinuationTrigger());
}
 
Example 5
Source File: AggregatorCombiner.java    From beam with Apache License 2.0 5 votes vote down vote up
private Map<W, W> mergeWindows(WindowingStrategy<InputT, W> windowingStrategy, Set<W> windows)
    throws Exception {
  WindowFn<InputT, W> windowFn = windowingStrategy.getWindowFn();

  if (windowingStrategy.getWindowFn().isNonMerging()) {
    // Return an empty map, indicating that every window is not merged.
    return Collections.emptyMap();
  }

  Map<W, W> windowToMergeResult = new HashMap<>();
  windowFn.mergeWindows(new MergeContextImpl(windowFn, windows, windowToMergeResult));
  return windowToMergeResult;
}
 
Example 6
Source File: TransformTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V, W extends BoundedWindow> TransformEvaluator<Reshuffle<K, V>> reshuffle() {
  return new TransformEvaluator<Reshuffle<K, V>>() {
    @Override
    public void evaluate(Reshuffle<K, V> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      JavaRDD<WindowedValue<KV<K, V>>> inRDD =
          ((BoundedDataset<KV<K, V>>) context.borrowDataset(transform)).getRDD();
      @SuppressWarnings("unchecked")
      final WindowingStrategy<?, W> windowingStrategy =
          (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
      final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
      @SuppressWarnings("unchecked")
      final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();

      final WindowedValue.WindowedValueCoder<KV<K, V>> wvCoder =
          WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder());

      JavaRDD<WindowedValue<KV<K, V>>> reshuffled =
          GroupCombineFunctions.reshuffle(inRDD, wvCoder);

      context.putDataset(transform, new BoundedDataset<>(reshuffled));
    }

    @Override
    public String toNativeString() {
      return "repartition(...)";
    }
  };
}
 
Example 7
Source File: StatefulDoFnRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
public TimeInternalsCleanupTimer(
    TimerInternals timerInternals, WindowingStrategy<?, ?> windowingStrategy) {
  this.windowingStrategy = windowingStrategy;
  WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
  windowCoder = (Coder<BoundedWindow>) windowFn.windowCoder();
  this.timerInternals = timerInternals;
}
 
Example 8
Source File: StreamingTransformTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
  return new TransformEvaluator<GroupByKey<K, V>>() {
    @Override
    public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      UnboundedDataset<KV<K, V>> inputDataset =
          (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
      List<Integer> streamSources = inputDataset.getStreamSources();
      JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
      final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
      @SuppressWarnings("unchecked")
      final WindowingStrategy<?, W> windowingStrategy =
          (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
      @SuppressWarnings("unchecked")
      final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();

      // --- coders.
      final WindowedValue.WindowedValueCoder<V> wvCoder =
          WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());

      JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream =
          SparkGroupAlsoByWindowViaWindowSet.groupByKeyAndWindow(
              dStream,
              coder.getKeyCoder(),
              wvCoder,
              windowingStrategy,
              context.getSerializableOptions(),
              streamSources,
              context.getCurrentTransform().getFullName());

      context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
    }

    @Override
    public String toNativeString() {
      return "groupByKey()";
    }
  };
}
 
Example 9
Source File: PCollectionViewTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) {
  BatchTSet<WindowedValue<ElemT>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));
  @SuppressWarnings("unchecked")
  AppliedPTransform<
          PCollection<ElemT>,
          PCollection<ElemT>,
          PTransform<PCollection<ElemT>, PCollection<ElemT>>>
      application =
          (AppliedPTransform<
                  PCollection<ElemT>,
                  PCollection<ElemT>,
                  PTransform<PCollection<ElemT>, PCollection<ElemT>>>)
              context.getCurrentTransform();
  org.apache.beam.sdk.values.PCollectionView<ViewT> input;
  PCollection<ElemT> inputPCol = context.getInput(transform);
  final KvCoder coder = (KvCoder) inputPCol.getCoder();
  Coder inputKeyCoder = coder.getKeyCoder();
  WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy();
  WindowFn windowFn = windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  BatchTSet<WindowedValue<ElemT>> inputGathered =
      inputDataSet
          .direct()
          .map(new MapToTupleFunction<>(inputKeyCoder, wvCoder))
          .allGather()
          .map(new ByteToWindowFunctionPrimitive(inputKeyCoder, wvCoder));
  try {
    input = CreatePCollectionViewTranslation.getView(application);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  context.setSideInputDataSet(input.getTagInternal().getId(), inputGathered);
}
 
Example 10
Source File: StreamingSideInputFetcher.java    From beam with Apache License 2.0 5 votes vote down vote up
private <SideWindowT extends BoundedWindow> Windmill.GlobalDataRequest buildGlobalDataRequest(
    PCollectionView<?> view, BoundedWindow mainWindow) {
  @SuppressWarnings("unchecked")
  WindowingStrategy<?, SideWindowT> sideWindowStrategy =
      (WindowingStrategy<?, SideWindowT>) view.getWindowingStrategyInternal();

  WindowFn<?, SideWindowT> sideWindowFn = sideWindowStrategy.getWindowFn();

  Coder<SideWindowT> sideInputWindowCoder = sideWindowFn.windowCoder();

  SideWindowT sideInputWindow =
      (SideWindowT) view.getWindowMappingFn().getSideInputWindow(mainWindow);

  ByteString.Output windowStream = ByteString.newOutput();
  try {
    sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }

  return Windmill.GlobalDataRequest.newBuilder()
      .setDataId(
          Windmill.GlobalDataId.newBuilder()
              .setTag(view.getTagInternal().getId())
              .setVersion(windowStream.toByteString())
              .build())
      .setExistenceWatermarkDeadline(
          WindmillTimeUtils.harnessToWindmillTimestamp(
              sideWindowStrategy.getTrigger().getWatermarkThatGuaranteesFiring(sideInputWindow)))
      .build();
}
 
Example 11
Source File: AssignWindowTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Window.Assign<T> transform, Twister2BatchTranslationContext context) {
  BatchTSetImpl<WindowedValue<T>> inputTTSet =
      context.getInputDataSet(context.getInput(transform));

  final WindowingStrategy<T, BoundedWindow> windowingStrategy =
      (WindowingStrategy<T, BoundedWindow>) context.getOutput(transform).getWindowingStrategy();

  WindowFn<T, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
  ComputeTSet<WindowedValue<T>, Iterator<WindowedValue<T>>> outputTSet =
      inputTTSet.direct().compute(new AssignWindowsFunction(windowFn, context.getOptions()));
  context.setOutputDataSet(context.getOutput(transform), outputTSet);
}
 
Example 12
Source File: AssignWindowTranslatorBatch.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Window.Assign<T> transform, Twister2BatchTranslationContext context) {
  BatchTSetImpl<WindowedValue<T>> inputTTSet = context.getInputDataSet(context.getInput(
      transform));

  final WindowingStrategy<T, BoundedWindow> windowingStrategy =
      (WindowingStrategy<T, BoundedWindow>) context.getOutput(transform).getWindowingStrategy();

  WindowFn<T, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
  ComputeTSet<WindowedValue<T>, Iterator<WindowedValue<T>>> outputTSet =
      inputTTSet.direct().compute(new AssignWindowsFunction(windowFn));
  context.setOutputDataSet(context.getOutput(transform), outputTSet);
}
 
Example 13
Source File: HashingFlinkCombineRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
private Map<W, W> mergeWindows(WindowingStrategy<Object, W> windowingStrategy, Set<W> windows)
    throws Exception {
  WindowFn<Object, W> windowFn = windowingStrategy.getWindowFn();

  if (windowingStrategy.getWindowFn().isNonMerging()) {
    // Return an empty map, indicating that every window is not merged.
    return Collections.emptyMap();
  }

  Map<W, W> windowToMergeResult = new HashMap<>();
  windowFn.mergeWindows(new MergeContextImpl(windowFn, windows, windowToMergeResult));
  return windowToMergeResult;
}
 
Example 14
Source File: Window.java    From beam with Apache License 2.0 5 votes vote down vote up
private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
    WindowingStrategy<?, W> inputStrategy) {
  if (inputStrategy.getWindowFn() instanceof InvalidWindows) {
    @SuppressWarnings("unchecked")
    InvalidWindows<W> invalidWindows = (InvalidWindows<W>) inputStrategy.getWindowFn();
    return inputStrategy.withWindowFn(invalidWindows.getOriginalWindowFn());
  } else {
    return inputStrategy;
  }
}
 
Example 15
Source File: Window.java    From beam with Apache License 2.0 5 votes vote down vote up
private boolean canProduceMultiplePanes(WindowingStrategy<?, ?> strategy) {
  // The default trigger is Repeatedly.forever(AfterWatermark.pastEndOfWindow()); This fires
  // for every late-arriving element if allowed lateness is nonzero, and thus we must have
  // an accumulating mode specified
  boolean dataCanArriveLate =
      !(strategy.getWindowFn() instanceof GlobalWindows)
          && strategy.getAllowedLateness().getMillis() > 0;
  boolean hasCustomTrigger = !(strategy.getTrigger() instanceof DefaultTrigger);
  return dataCanArriveLate || hasCustomTrigger;
}
 
Example 16
Source File: Flatten.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<T> expand(PCollectionList<T> inputs) {
  WindowingStrategy<?, ?> windowingStrategy;
  IsBounded isBounded = IsBounded.BOUNDED;
  if (!inputs.getAll().isEmpty()) {
    windowingStrategy = inputs.get(0).getWindowingStrategy();
    for (PCollection<?> input : inputs.getAll()) {
      WindowingStrategy<?, ?> other = input.getWindowingStrategy();
      if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) {
        throw new IllegalStateException(
            "Inputs to Flatten had incompatible window windowFns: "
                + windowingStrategy.getWindowFn()
                + ", "
                + other.getWindowFn());
      }

      if (!windowingStrategy.getTrigger().isCompatible(other.getTrigger())) {
        throw new IllegalStateException(
            "Inputs to Flatten had incompatible triggers: "
                + windowingStrategy.getTrigger()
                + ", "
                + other.getTrigger());
      }
      isBounded = isBounded.and(input.isBounded());
    }
  } else {
    windowingStrategy = WindowingStrategy.globalDefault();
  }

  return PCollection.createPrimitiveOutputInternal(
      inputs.getPipeline(),
      windowingStrategy,
      isBounded,
      // Take coder from first collection. If there are none, will be left unspecified.
      inputs.getAll().isEmpty() ? null : inputs.get(0).getCoder());
}
 
Example 17
Source File: BeamAggregationRel.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Performs the same check as {@link GroupByKey}, provides more context in exception.
 *
 * <p>Verifies that the input PCollection is bounded, or that there is windowing/triggering
 * being used. Without this, the watermark (at end of global window) will never be reached.
 *
 * <p>Throws {@link UnsupportedOperationException} if validation fails.
 */
private void validateWindowIsSupported(PCollection<Row> upstream) {
  WindowingStrategy<?, ?> windowingStrategy = upstream.getWindowingStrategy();
  if (windowingStrategy.getWindowFn() instanceof GlobalWindows
      && windowingStrategy.getTrigger() instanceof DefaultTrigger
      && upstream.isBounded() != BOUNDED) {

    throw new UnsupportedOperationException(
        "Please explicitly specify windowing in SQL query using HOP/TUMBLE/SESSION functions "
            + "(default trigger will be used in this case). "
            + "Unbounded input with global windowing and default trigger is not supported "
            + "in Beam SQL aggregations. "
            + "See GroupByKey section in Beam Programming Guide");
  }
}
 
Example 18
Source File: BeamCoGBKJoinRel.java    From beam with Apache License 2.0 5 votes vote down vote up
private boolean triggersOncePerWindow(WindowingStrategy windowingStrategy) {
  Trigger trigger = windowingStrategy.getTrigger();

  return !(windowingStrategy.getWindowFn() instanceof GlobalWindows)
      && trigger instanceof DefaultTrigger
      && ZERO.equals(windowingStrategy.getAllowedLateness());
}
 
Example 19
Source File: GroupByKeyTranslatorBatch.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
  PCollection<KV<K, V>> input = context.getInput(transform);
  BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
  final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
  Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
  WindowingStrategy windowingStrategy = input.getWindowingStrategy();
  WindowFn<KV<K, V>, BoundedWindow> windowFn =
      (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  KeyedTSet<byte[], byte[]> keyedTSet =
      inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));

  // todo add support for a partition function to be specified, this would use
  // todo keyedPartition function instead of KeyedGather
  ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>>
      groupedbyKeyTset =
      keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));

  // --- now group also by window.
  ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>>
      outputTset =
      groupedbyKeyTset
          .direct()
          .<WindowedValue<KV<K, Iterable<V>>>>flatmap(
              new GroupByWindowFunction(
                  windowingStrategy,
                  SystemReduceFn.buffering(coder.getValueCoder())));
  PCollection output = context.getOutput(transform);
  context.setOutputDataSet(output, outputTset);
}
 
Example 20
Source File: BeamSortRel.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public PCollection<Row> expand(PCollectionList<Row> pinput) {
  checkArgument(
      pinput.size() == 1,
      "Wrong number of inputs for %s: %s",
      BeamIOSinkRel.class.getSimpleName(),
      pinput);
  PCollection<Row> upstream = pinput.get(0);

  // There is a need to separate ORDER BY LIMIT and LIMIT:
  //  - GroupByKey (used in Top) is not allowed on unbounded data in global window so ORDER BY
  // ... LIMIT
  //    works only on bounded data.
  //  - Just LIMIT operates on unbounded data, but across windows.
  if (fieldIndices.isEmpty()) {
    // TODO(https://issues.apache.org/jira/projects/BEAM/issues/BEAM-4702)
    // Figure out which operations are per-window and which are not.

    return upstream
        .apply(Window.into(new GlobalWindows()))
        .apply(new LimitTransform<>(startIndex))
        .setRowSchema(CalciteUtils.toSchema(getRowType()));
  } else {

    WindowingStrategy<?, ?> windowingStrategy = upstream.getWindowingStrategy();
    if (!(windowingStrategy.getWindowFn() instanceof GlobalWindows)) {
      throw new UnsupportedOperationException(
          String.format(
              "`ORDER BY` is only supported for %s, actual windowing strategy: %s",
              GlobalWindows.class.getSimpleName(), windowingStrategy));
    }

    ReversedBeamSqlRowComparator comparator =
        new ReversedBeamSqlRowComparator(fieldIndices, orientation, nullsFirst);

    // first find the top (offset + count)
    PCollection<List<Row>> rawStream =
        upstream
            .apply(
                "extractTopOffsetAndFetch",
                Top.of(startIndex + count, comparator).withoutDefaults())
            .setCoder(ListCoder.of(upstream.getCoder()));

    // strip the `leading offset`
    if (startIndex > 0) {
      rawStream =
          rawStream
              .apply(
                  "stripLeadingOffset",
                  ParDo.of(new SubListFn<>(startIndex, startIndex + count)))
              .setCoder(ListCoder.of(upstream.getCoder()));
    }

    return rawStream
        .apply("flatten", Flatten.iterables())
        .setRowSchema(CalciteUtils.toSchema(getRowType()));
  }
}