Java Code Examples for org.apache.beam.sdk.util.WindowedValue#getWindows()

The following examples show how to use org.apache.beam.sdk.util.WindowedValue#getWindows() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SideInputHandler.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Add the given value to the internal side-input store of the given side input. This might change
 * the result of {@link #isReady(PCollectionView, BoundedWindow)} for that side input.
 */
public void addSideInputValue(PCollectionView<?> sideInput, WindowedValue<Iterable<?>> value) {
  @SuppressWarnings("unchecked")
  Coder<BoundedWindow> windowCoder =
      (Coder<BoundedWindow>) sideInput.getWindowingStrategyInternal().getWindowFn().windowCoder();

  StateTag<ValueState<Iterable<?>>> stateTag = sideInputContentsTags.get(sideInput);

  for (BoundedWindow window : value.getWindows()) {
    stateInternals
        .state(StateNamespaces.window(windowCoder, window), stateTag)
        .write(value.getValue());

    stateInternals
        .state(StateNamespaces.global(), availableWindowsTags.get(sideInput))
        .add(window);
  }
}
 
Example 2
Source File: ProcessFnRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T> void checkTrivialOuterWindows(
    WindowedValue<KeyedWorkItem<byte[], T>> windowedKWI) {
  // In practice it will be in 0 or 1 windows (ValueInEmptyWindows or ValueInGlobalWindow)
  Collection<? extends BoundedWindow> outerWindows = windowedKWI.getWindows();
  if (!outerWindows.isEmpty()) {
    checkArgument(
        outerWindows.size() == 1,
        "The KeyedWorkItem itself must not be in multiple windows, but was in: %s",
        outerWindows);
    BoundedWindow onlyWindow = Iterables.getOnlyElement(outerWindows);
    checkArgument(
        onlyWindow instanceof GlobalWindow,
        "KeyedWorkItem must be in the Global window, but was in: %s",
        onlyWindow);
  }
}
 
Example 3
Source File: DataflowProcessFnRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T> void checkTrivialOuterWindows(
    WindowedValue<KeyedWorkItem<byte[], T>> windowedKWI) {
  // In practice it will be in 0 or 1 windows (ValueInEmptyWindows or ValueInGlobalWindow)
  Collection<? extends BoundedWindow> outerWindows = windowedKWI.getWindows();
  if (!outerWindows.isEmpty()) {
    checkArgument(
        outerWindows.size() == 1,
        "The KeyedWorkItem itself must not be in multiple windows, but was in: %s",
        outerWindows);
    BoundedWindow onlyWindow = Iterables.getOnlyElement(outerWindows);
    checkArgument(
        onlyWindow instanceof GlobalWindow,
        "KeyedWorkItem must be in the Global window, but was in: %s",
        onlyWindow);
  }
}
 
Example 4
Source File: CreateIsmShardKeyAndSortKeyDoFnFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(Object untypedElem) throws Exception {
  @SuppressWarnings("unchecked")
  WindowedValue<KV<K, V>> elem = (WindowedValue) untypedElem;

  K userKey = elem.getValue().getKey();
  V userValue = elem.getValue().getValue();
  int hashKey = coder.hash(ImmutableList.of(elem.getValue().getKey()));

  // Explode all the windows the users values are in.
  for (BoundedWindow window : elem.getWindows()) {
    KV<K, BoundedWindow> sortKey = KV.of(userKey, window);
    KV<KV<K, BoundedWindow>, V> valueWithSortKey = KV.of(sortKey, userValue);
    // Note that the shuffle writer expects a KV<PrimaryKey, KV<SortKey, Value>> when sorting.
    receiver.process(elem.withValue(KV.of(hashKey, valueWithSortKey)));
  }
}
 
Example 5
Source File: CreateViewTransform.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
@Override
public void onData(final WindowedValue<KV<?, I>> element) {
  // The key of element is always null (beam's semantic)
  // because view is a globally materialized data regardless of key
  for (final BoundedWindow window : element.getWindows()) {
    windowListMap.putIfAbsent(window, new ArrayList<>());
    final List<I> list = windowListMap.get(window);
    list.add(element.getValue().getValue());
  }
}
 
Example 6
Source File: InMemorySideInputReader.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Stores the side input in memory to be used with main inputs.
 *
 * @param view             of the side input.
 * @param sideInputElement to add.
 */
public void addSideInputElement(final PCollectionView<?> view,
                                final WindowedValue<SideInputElement<?>> sideInputElement) {
  for (final BoundedWindow bw : sideInputElement.getWindows()) {
    inMemorySideInputs.put(Pair.of(view, bw), sideInputElement.getValue().getSideInputValue());
  }
}
 
Example 7
Source File: ReduceFnRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Extract the windows associated with the values. */
private Set<W> collectWindows(Iterable<WindowedValue<InputT>> values) throws Exception {
  Set<W> windows = new HashSet<>();
  for (WindowedValue<?> value : values) {
    for (BoundedWindow untypedWindow : value.getWindows()) {
      @SuppressWarnings("unchecked")
      W window = (W) untypedWindow;
      windows.add(window);
    }
  }
  return windows;
}
 
Example 8
Source File: ViewP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean tryProcess(int ordinal, @Nonnull Object item) {
  WindowedValue<?> windowedValue = Utils.decodeWindowedValue((byte[]) item, inputCoder);
  for (BoundedWindow window : windowedValue.getWindows()) {
    values.merge(
        window,
        new TimestampAndValues(
            windowedValue.getPane(), windowedValue.getTimestamp(), windowedValue.getValue()),
        (o, n) -> o.merge(timestampCombiner, n));
  }

  return true;
}
 
Example 9
Source File: AggregatorCombiner.java    From beam with Apache License 2.0 5 votes vote down vote up
private Set<W> collectAccumulatorsWindows(Iterable<WindowedValue<AccumT>> accumulators) {
  Set<W> windows = new HashSet<>();
  for (WindowedValue<?> accumulator : accumulators) {
    for (BoundedWindow untypedWindow : accumulator.getWindows()) {
      @SuppressWarnings("unchecked")
      W window = (W) untypedWindow;
      windows.add(window);
    }
  }
  return windows;
}
 
Example 10
Source File: HashingFlinkCombineRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
private Set<W> collectWindows(Iterable<WindowedValue<KV<K, InputT>>> values) {
  Set<W> windows = new HashSet<>();
  for (WindowedValue<?> value : values) {
    for (BoundedWindow untypedWindow : value.getWindows()) {
      @SuppressWarnings("unchecked")
      W window = (W) untypedWindow;
      windows.add(window);
    }
  }
  return windows;
}
 
Example 11
Source File: PartialGroupByKeyParDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(Object elem) throws Exception {
  @SuppressWarnings({"unchecked"})
  WindowedValue<KV<K, InputT>> input = (WindowedValue<KV<K, InputT>>) elem;
  for (BoundedWindow w : input.getWindows()) {
    WindowedValue<KV<K, InputT>> windowsExpandedInput =
        WindowedValue.of(input.getValue(), input.getTimestamp(), w, input.getPane());
    groupingTable.put(windowsExpandedInput, receiver);
  }
}
 
Example 12
Source File: PartialGroupByKeyParDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(Object elem) throws Exception {
  @SuppressWarnings({"unchecked"})
  WindowedValue<KV<K, InputT>> input = (WindowedValue<KV<K, InputT>>) elem;
  for (BoundedWindow w : input.getWindows()) {
    WindowedValue<KV<K, InputT>> windowsExpandedInput =
        WindowedValue.of(input.getValue(), input.getTimestamp(), w, input.getPane());

    if (!sideInputFetcher.storeIfBlocked(windowsExpandedInput)) {
      groupingTable.put(windowsExpandedInput, receiver);
    }
  }
}
 
Example 13
Source File: SideInputContainer.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Index the provided values by all {@link BoundedWindow windows} in which they appear. */
private Map<BoundedWindow, Collection<WindowedValue<?>>> indexValuesByWindow(
    Iterable<? extends WindowedValue<?>> values) {
  Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = new HashMap<>();
  for (WindowedValue<?> value : values) {
    for (BoundedWindow window : value.getWindows()) {
      Collection<WindowedValue<?>> windowValues =
          valuesPerWindow.computeIfAbsent(window, k -> new ArrayList<>());
      windowValues.add(value);
    }
  }
  return valuesPerWindow;
}
 
Example 14
Source File: AggregatorCombiner.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<WindowedValue<AccumT>> merge(
    Iterable<WindowedValue<AccumT>> accumulators1,
    Iterable<WindowedValue<AccumT>> accumulators2) {

  // merge the windows of all the accumulators
  Iterable<WindowedValue<AccumT>> accumulators = Iterables.concat(accumulators1, accumulators2);
  Set<W> accumulatorsWindows = collectAccumulatorsWindows(accumulators);
  Map<W, W> windowToMergeResult;
  try {
    windowToMergeResult = mergeWindows(windowingStrategy, accumulatorsWindows);
  } catch (Exception e) {
    throw new RuntimeException("Unable to merge accumulators windows", e);
  }

  // group accumulators by their merged window
  Map<W, List<Tuple2<AccumT, Instant>>> mergedWindowToAccumulators = new HashMap<>();
  for (WindowedValue<AccumT> accumulatorWv : accumulators) {
    for (BoundedWindow accumulatorWindow : accumulatorWv.getWindows()) {
      W mergedWindowForAccumulator = windowToMergeResult.get(accumulatorWindow);
      mergedWindowForAccumulator =
          (mergedWindowForAccumulator == null)
              ? (W) accumulatorWindow
              : mergedWindowForAccumulator;

      // we need only the timestamp and the AccumT, we create a tuple
      Tuple2<AccumT, Instant> accumAndInstant =
          new Tuple2<>(
              accumulatorWv.getValue(),
              timestampCombiner.assign(
                  mergedWindowForAccumulator,
                  windowingStrategy
                      .getWindowFn()
                      .getOutputTime(accumulatorWv.getTimestamp(), mergedWindowForAccumulator)));
      if (mergedWindowToAccumulators.get(mergedWindowForAccumulator) == null) {
        mergedWindowToAccumulators.put(
            mergedWindowForAccumulator, Lists.newArrayList(accumAndInstant));
      } else {
        mergedWindowToAccumulators.get(mergedWindowForAccumulator).add(accumAndInstant);
      }
    }
  }
  // merge the accumulators for each mergedWindow
  List<WindowedValue<AccumT>> result = new ArrayList<>();
  for (Map.Entry<W, List<Tuple2<AccumT, Instant>>> entry :
      mergedWindowToAccumulators.entrySet()) {
    W mergedWindow = entry.getKey();
    List<Tuple2<AccumT, Instant>> accumsAndInstantsForMergedWindow = entry.getValue();

    // we need to create the first accumulator because combineFn.mergerAccumulators can modify the
    // first accumulator
    AccumT first = combineFn.createAccumulator();
    Iterable<AccumT> accumulatorsToMerge =
        Iterables.concat(
            Collections.singleton(first),
            accumsAndInstantsForMergedWindow.stream()
                .map(x -> x._1())
                .collect(Collectors.toList()));
    result.add(
        WindowedValue.of(
            combineFn.mergeAccumulators(accumulatorsToMerge),
            timestampCombiner.combine(
                accumsAndInstantsForMergedWindow.stream()
                    .map(x -> x._2())
                    .collect(Collectors.toList())),
            mergedWindow,
            PaneInfo.NO_FIRING));
  }
  return result;
}
 
Example 15
Source File: BatchGroupAlsoByWindowViaIteratorsFn.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("ReferenceEquality")
public void processElement(
    KV<K, Iterable<WindowedValue<V>>> element,
    PipelineOptions options,
    StepContext stepContext,
    SideInputReader sideInputReader,
    OutputWindowedValue<KV<K, Iterable<V>>> output)
    throws Exception {
  K key = element.getKey();
  // This iterable is required to be in order of increasing timestamps
  Iterable<WindowedValue<V>> value = element.getValue();
  PeekingReiterator<WindowedValue<V>> iterator;

  if (value instanceof Collection) {
    iterator =
        new PeekingReiterator<>(
            new ListReiterator<WindowedValue<V>>(
                new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
  } else if (value instanceof Reiterable) {
    iterator = new PeekingReiterator<>(((Reiterable<WindowedValue<V>>) value).iterator());
  } else {
    throw new IllegalArgumentException(
        "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
  }

  // This ListMultimap is a map of window maxTimestamps to the list of active
  // windows with that maxTimestamp.
  ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();

  while (iterator.hasNext()) {
    WindowedValue<V> e = iterator.peek();
    for (BoundedWindow window : e.getWindows()) {
      // If this window is not already in the active set, emit a new WindowReiterable
      // corresponding to this window, starting at this element in the input Reiterable.
      if (!windows.containsEntry(window.maxTimestamp(), window)) {
        // This window was produced by strategy.getWindowFn()
        @SuppressWarnings("unchecked")
        W typedWindow = (W) window;
        // Iterating through the WindowReiterable may advance iterator as an optimization
        // for as long as it detects that there are no new windows.
        windows.put(window.maxTimestamp(), window);
        output.outputWindowedValue(
            KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
            strategy
                .getTimestampCombiner()
                .assign(
                    typedWindow,
                    strategy.getWindowFn().getOutputTime(e.getTimestamp(), typedWindow)),
            Arrays.asList(window),
            PaneInfo.ON_TIME_AND_ONLY_FIRING);
      }
    }
    // Copy the iterator in case the next DoFn cached its version of the iterator instead
    // of immediately iterating through it.
    // And, only advance the iterator if the consuming operation hasn't done so.
    iterator = iterator.copy();
    if (iterator.hasNext() && iterator.peek() == e) {
      iterator.next();
    }

    // Remove all windows with maxTimestamp behind the current timestamp.
    Iterator<Instant> windowIterator = windows.keys().iterator();
    while (windowIterator.hasNext() && windowIterator.next().isBefore(e.getTimestamp())) {
      windowIterator.remove();
    }
  }
}
 
Example 16
Source File: StatefulParDoEvaluatorFactory.java    From beam with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
private TransformEvaluator<KeyedWorkItem<K, KV<K, InputT>>> createEvaluator(
    AppliedPTransform<
            PCollection<? extends KeyedWorkItem<K, KV<K, InputT>>>,
            PCollectionTuple,
            StatefulParDo<K, InputT, OutputT>>
        application,
    CommittedBundle<KeyedWorkItem<K, KV<K, InputT>>> inputBundle)
    throws Exception {

  final DoFn<KV<K, InputT>, OutputT> doFn = application.getTransform().getDoFn();
  final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());

  // If the DoFn is stateful, schedule state clearing.
  // It is semantically correct to schedule any number of redundant clear tasks; the
  // cache is used to limit the number of tasks to avoid performance degradation.
  if (signature.stateDeclarations().size() > 0) {
    for (final WindowedValue<?> element : inputBundle.getElements()) {
      for (final BoundedWindow window : element.getWindows()) {
        cleanupRegistry.get(
            AppliedPTransformOutputKeyAndWindow.create(
                application, (StructuralKey<K>) inputBundle.getKey(), window));
      }
    }
  }

  DoFnLifecycleManagerRemovingTransformEvaluator<KV<K, InputT>> delegateEvaluator =
      delegateFactory.createEvaluator(
          (AppliedPTransform) application,
          (PCollection) inputBundle.getPCollection(),
          inputBundle.getKey(),
          application.getTransform().getSideInputs(),
          application.getTransform().getMainOutputTag(),
          application.getTransform().getAdditionalOutputTags().getAll(),
          application.getTransform().getSchemaInformation(),
          application.getTransform().getSideInputMapping());

  DirectStepContext stepContext =
      evaluationContext
          .getExecutionContext(application, inputBundle.getKey())
          .getStepContext(evaluationContext.getStepName(application));

  stepContext.stateInternals().commit();
  return new StatefulParDoEvaluator<>(delegateEvaluator, stepContext);
}