org.apache.beam.sdk.values.WindowingStrategy Java Examples

The following examples show how to use org.apache.beam.sdk.values.WindowingStrategy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WindowTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindowIntoPropagatesLateness() {

  FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
  FixedWindows fixed25 = FixedWindows.of(Duration.standardMinutes(25));
  WindowingStrategy<?, ?> strategy =
      pipeline
          .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
          .apply(
              "WindowInto10",
              Window.<String>into(fixed10)
                  .withAllowedLateness(Duration.standardDays(1))
                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
                  .accumulatingFiredPanes())
          .apply("WindowInto25", Window.into(fixed25))
          .getWindowingStrategy();

  assertEquals(Duration.standardDays(1), strategy.getAllowedLateness());
  assertEquals(fixed25, strategy.getWindowFn());
}
 
Example #2
Source File: GroupAlsoByWindowViaWindowSetNewDoFn.java    From beam with Apache License 2.0 6 votes vote down vote up
public GroupAlsoByWindowViaWindowSetNewDoFn(
    WindowingStrategy<?, W> windowingStrategy,
    StateInternalsFactory<K> stateInternalsFactory,
    TimerInternalsFactory<K> timerInternalsFactory,
    SideInputReader sideInputReader,
    SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn,
    DoFnRunners.OutputManager outputManager,
    TupleTag<KV<K, OutputT>> mainTag) {
  this.timerInternalsFactory = timerInternalsFactory;
  this.sideInputReader = sideInputReader;
  this.outputManager = outputManager;
  this.mainTag = mainTag;
  @SuppressWarnings("unchecked")
  WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
  this.windowingStrategy = noWildcard;
  this.reduceFn = reduceFn;
  this.stateInternalsFactory = stateInternalsFactory;
}
 
Example #3
Source File: SplittableParDo.java    From beam with Apache License 2.0 6 votes vote down vote up
public static <OutputT> PCollectionTuple createPrimitiveOutputFor(
    PCollection<?> input,
    DoFn<?, OutputT> fn,
    TupleTag<OutputT> mainOutputTag,
    TupleTagList additionalOutputTags,
    Map<TupleTag<?>, Coder<?>> outputTagsToCoders,
    WindowingStrategy<?, ?> windowingStrategy) {
  DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
  PCollectionTuple outputs =
      PCollectionTuple.ofPrimitiveOutputsInternal(
          input.getPipeline(),
          TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()),
          outputTagsToCoders,
          windowingStrategy,
          input.isBounded().and(signature.isBoundedPerElement()));

  // Set output type descriptor similarly to how ParDo.MultiOutput does it.
  outputs.get(mainOutputTag).setTypeDescriptor(fn.getOutputTypeDescriptor());

  return outputs;
}
 
Example #4
Source File: SparkCombineFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGlobalCombineFn() throws Exception {
  SparkCombineFn<Integer, Integer, Long, Long> sparkCombineFn =
      SparkCombineFn.globally(
          combineFn, opts, Collections.emptyMap(), WindowingStrategy.globalDefault());

  WindowedValue<Integer> first = inputValue(1, Instant.now());
  WindowedValue<Integer> second = inputValue(2, Instant.now());
  WindowedValue<Integer> third = inputValue(3, Instant.now());
  SparkCombineFn.WindowedAccumulator<Integer, Integer, Long, ?> c1 =
      sparkCombineFn.createCombiner(first);
  SparkCombineFn.WindowedAccumulator<Integer, Integer, Long, ?> c2 =
      sparkCombineFn.createCombiner(third);
  sparkCombineFn.mergeValue(c1, second);
  SparkCombineFn.WindowedAccumulator<Integer, Integer, Long, ?> c3 =
      sparkCombineFn.mergeCombiners(c1, c2);
  assertEquals(6, (long) Iterables.getOnlyElement(sparkCombineFn.extractOutput(c3)).getValue());
}
 
Example #5
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void emptyFlattenWithNonFlatten() {
  AppliedPTransform application =
      AppliedPTransform
          .<PCollection<Iterable<Integer>>, PCollection<Integer>, Flatten.Iterables<Integer>>of(
              "EmptyFlatten",
              Collections.emptyMap(),
              Collections.singletonMap(
                  new TupleTag<Integer>(),
                  PCollection.createPrimitiveOutputInternal(
                      p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarIntCoder.of())),
              /* This isn't actually possible to construct, but for the sake of example */
              Flatten.iterables(),
              p);

  assertThat(PTransformMatchers.emptyFlatten().matches(application), is(false));
}
 
Example #6
Source File: FlinkDoFnFunction.java    From beam with Apache License 2.0 6 votes vote down vote up
public FlinkDoFnFunction(
    DoFn<InputT, OutputT> doFn,
    String stepName,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
    PipelineOptions options,
    Map<TupleTag<?>, Integer> outputMap,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoderMap,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {

  this.doFn = doFn;
  this.stepName = stepName;
  this.sideInputs = sideInputs;
  this.serializedOptions = new SerializablePipelineOptions(options);
  this.windowingStrategy = windowingStrategy;
  this.outputMap = outputMap;
  this.mainOutputTag = mainOutputTag;
  this.inputCoder = inputCoder;
  this.outputCoderMap = outputCoderMap;
  this.doFnSchemaInformation = doFnSchemaInformation;
  this.sideInputMapping = sideInputMapping;
}
 
Example #7
Source File: Twister2SideInputReader.java    From twister2 with Apache License 2.0 6 votes vote down vote up
public Twister2SideInputReader(Map<PCollectionView<?>, WindowingStrategy<?, ?>> indexByView,
                               TSetContext context) {
  this.sideInputs = new HashMap<>();

  for (PCollectionView<?> view : indexByView.keySet()) {
    checkArgument(
        Materializations.MULTIMAP_MATERIALIZATION_URN.equals(
            view.getViewFn().getMaterialization().getUrn()),
        "This handler is only capable of dealing with %s materializations "
            + "but was asked to handle %s for PCollectionView with tag %s.",
        Materializations.MULTIMAP_MATERIALIZATION_URN,
        view.getViewFn().getMaterialization().getUrn(),
        view.getTagInternal().getId());
  }
  for (Map.Entry<PCollectionView<?>, WindowingStrategy<?, ?>> entry : indexByView.entrySet()) {
    sideInputs.put(entry.getKey().getTagInternal(), entry.getValue());
  }
  this.runtimeContext = context;
}
 
Example #8
Source File: GroupAlsoByWindowsParDoFn.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link GroupAlsoByWindowsParDoFn} using basic information about the {@link
 * GroupAlsoByWindowFn} and the step being executed.
 */
GroupAlsoByWindowsParDoFn(
    PipelineOptions options,
    GroupAlsoByWindowFn<InputT, KV<K, Iterable<V>>> doFn,
    WindowingStrategy<?, W> windowingStrategy,
    Iterable<PCollectionView<?>> sideInputViews,
    Coder<InputT> inputCoder,
    SideInputReader sideInputReader,
    TupleTag<KV<K, Iterable<V>>> mainOutputTag,
    DataflowExecutionContext.DataflowStepContext stepContext) {
  this.options = options;

  this.sideInputReader = sideInputReader;
  this.mainOutputTag = mainOutputTag;
  this.stepContext = stepContext;
  this.doFn = doFn;
  this.windowingStrategy = windowingStrategy;
  this.sideInputViews = sideInputViews;
  this.inputCoder = inputCoder;

  this.acceptsKeyedWorkItems = inputCoder instanceof WindmillKeyedWorkItem.FakeKeyedWorkItemCoder;
}
 
Example #9
Source File: WindowingStrategyTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Converts a {@link WindowingStrategy} into a {@link RunnerApi.WindowingStrategy}, registering
 * any components in the provided {@link SdkComponents}.
 */
public static RunnerApi.WindowingStrategy toProto(
    WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException {
  FunctionSpec windowFnSpec = toProto(windowingStrategy.getWindowFn(), components);

  RunnerApi.WindowingStrategy.Builder windowingStrategyProto =
      RunnerApi.WindowingStrategy.newBuilder()
          .setOutputTime(toProto(windowingStrategy.getTimestampCombiner()))
          .setAccumulationMode(toProto(windowingStrategy.getMode()))
          .setClosingBehavior(toProto(windowingStrategy.getClosingBehavior()))
          .setAllowedLateness(windowingStrategy.getAllowedLateness().getMillis())
          .setTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))
          .setWindowFn(windowFnSpec)
          .setAssignsToOneWindow(windowingStrategy.getWindowFn().assignsToOneWindow())
          .setOnTimeBehavior(toProto(windowingStrategy.getOnTimeBehavior()))
          .setWindowCoderId(
              components.registerCoder(windowingStrategy.getWindowFn().windowCoder()))
          .setEnvironmentId(components.getOnlyEnvironmentId());

  return windowingStrategyProto.build();
}
 
Example #10
Source File: WindowTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindowIntoTriggersAndAccumulating() {
  FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
  Repeatedly trigger = Repeatedly.forever(AfterPane.elementCountAtLeast(5));
  WindowingStrategy<?, ?> strategy =
      pipeline
          .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
          .apply(
              Window.<String>into(fixed10)
                  .triggering(trigger)
                  .accumulatingFiredPanes()
                  .withAllowedLateness(Duration.ZERO))
          .getWindowingStrategy();

  assertEquals(fixed10, strategy.getWindowFn());
  assertEquals(trigger, strategy.getTrigger());
  assertEquals(AccumulationMode.ACCUMULATING_FIRED_PANES, strategy.getMode());
}
 
Example #11
Source File: StatefulParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
protected DoFnRunner<KV<?, ?>, OutputT> getDoFnRunner(
    PipelineOptions pipelineOptions,
    DoFn<KV<?, ?>, OutputT> doFn,
    SideInputReader sideInputReader,
    JetOutputManager outputManager,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    Coder<KV<?, ?>> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<String, PCollectionView<?>> sideInputMapping) {
  timerInternals = new InMemoryTimerInternals();
  keyedStepContext = new KeyedStepContext(timerInternals);
  return DoFnRunners.simpleRunner(
      pipelineOptions,
      doFn,
      sideInputReader,
      outputManager,
      mainOutputTag,
      additionalOutputTags,
      keyedStepContext,
      inputValueCoder,
      outputValueCoders,
      windowingStrategy,
      doFnSchemaInformation,
      sideInputMapping);
}
 
Example #12
Source File: BatchGroupAlsoByWindowsDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Construct a {@link BatchGroupAlsoByWindowFn} using the {@code combineFn} if available. */
public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
    BatchGroupAlsoByWindowFn<K, InputT, OutputT> create(
        final WindowingStrategy<?, W> windowingStrategy,
        final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
  checkNotNull(combineFn);
  return new BatchGroupAlsoByWindowAndCombineFn<>(windowingStrategy, combineFn.getFn());
}
 
Example #13
Source File: ParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
private ParDoP(
    DoFn<InputT, OutputT> doFn,
    WindowingStrategy<?, ?> windowingStrategy,
    DoFnSchemaInformation doFnSchemaInformation,
    Map<TupleTag<?>, int[]> outputCollToOrdinals,
    SerializablePipelineOptions pipelineOptions,
    TupleTag<OutputT> mainOutputTag,
    Coder<InputT> inputCoder,
    Map<PCollectionView<?>, Coder<?>> sideInputCoders,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    Coder<InputT> inputValueCoder,
    Map<TupleTag<?>, Coder<?>> outputValueCoders,
    Map<Integer, PCollectionView<?>> ordinalToSideInput,
    String ownerId,
    String stepId) {
  super(
      doFn,
      windowingStrategy,
      doFnSchemaInformation,
      outputCollToOrdinals,
      pipelineOptions,
      mainOutputTag,
      inputCoder,
      sideInputCoders,
      outputCoders,
      inputValueCoder,
      outputValueCoders,
      ordinalToSideInput,
      ownerId,
      stepId);
}
 
Example #14
Source File: LateDataDroppingDoFnRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
public LateDataFilter(
    WindowingStrategy<?, ?> windowingStrategy, TimerInternals timerInternals) {
  this.windowingStrategy = windowingStrategy;
  this.timerInternals = timerInternals;
  this.droppedDueToLateness =
      Metrics.counter(LateDataDroppingDoFnRunner.class, DROPPED_DUE_TO_LATENESS);
}
 
Example #15
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * We should fire a non-empty ON_TIME pane in the GlobalWindow when the watermark moves to
 * end-of-time.
 */
@Test
public void fireNonEmptyOnDrainInGlobalWindow() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(new GlobalWindows())
              .withTrigger(Repeatedly.forever(AfterPane.elementCountAtLeast(3)))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES));

  tester.advanceInputWatermark(new Instant(0));

  final int n = 20;
  for (int i = 0; i < n; i++) {
    tester.injectElements(TimestampedValue.of(i, new Instant(i)));
  }

  List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
  assertEquals(n / 3, output.size());
  for (int i = 0; i < output.size(); i++) {
    assertEquals(Timing.EARLY, output.get(i).getPane().getTiming());
    assertEquals(i, output.get(i).getPane().getIndex());
    assertEquals(3, Iterables.size(output.get(i).getValue()));
  }

  tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);

  output = tester.extractOutput();
  assertEquals(1, output.size());
  assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming());
  assertEquals(n / 3, output.get(0).getPane().getIndex());
  assertEquals(n - ((n / 3) * 3), Iterables.size(output.get(0).getValue()));
}
 
Example #16
Source File: AbstractFlinkCombineRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Consumes {@link WindowedValue WindowedValues} and produces combined output to the given output.
 */
public abstract void combine(
    FlinkCombiner<K, InputT, AccumT, OutputT> flinkCombiner,
    WindowingStrategy<Object, W> windowingStrategy,
    SideInputReader sideInputReader,
    PipelineOptions options,
    Iterable<WindowedValue<KV<K, InputT>>> elements,
    Collector<WindowedValue<KV<K, OutputT>>> out)
    throws Exception;
 
Example #17
Source File: SplittableParDo.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionTuple expand(PCollection<InputT> input) {
  Coder<RestrictionT> restrictionCoder =
      DoFnInvokers.invokerFor(doFn)
          .invokeGetRestrictionCoder(input.getPipeline().getCoderRegistry());
  Coder<WatermarkEstimatorStateT> watermarkEstimatorStateCoder =
      DoFnInvokers.invokerFor(doFn)
          .invokeGetWatermarkEstimatorStateCoder(input.getPipeline().getCoderRegistry());
  Coder<KV<InputT, RestrictionT>> splitCoder = KvCoder.of(input.getCoder(), restrictionCoder);

  PCollection<KV<byte[], KV<InputT, RestrictionT>>> keyedRestrictions =
      input
          .apply(
              "Pair with initial restriction",
              ParDo.of(new PairWithRestrictionFn<InputT, OutputT, RestrictionT>(doFn)))
          .setCoder(splitCoder)
          .apply("Split restriction", ParDo.of(new SplitRestrictionFn<>(doFn)))
          .setCoder(splitCoder)
          // ProcessFn requires all input elements to be in a single window and have a single
          // element per work item. This must precede the unique keying so each key has a single
          // associated element.
          .apply("Explode windows", ParDo.of(new ExplodeWindowsFn<>()))
          .apply("Assign unique key", WithKeys.of(new RandomUniqueKeyFn<>()));

  return keyedRestrictions.apply(
      "ProcessKeyedElements",
      new ProcessKeyedElements<>(
          doFn,
          input.getCoder(),
          restrictionCoder,
          watermarkEstimatorStateCoder,
          (WindowingStrategy<InputT, ?>) input.getWindowingStrategy(),
          sideInputs,
          mainOutputTag,
          additionalOutputTags,
          outputTagsToCoders));
}
 
Example #18
Source File: StreamingGroupAlsoByWindowViaWindowSetFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, InputT, OutputT, W extends BoundedWindow>
    GroupAlsoByWindowFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
        WindowingStrategy<?, W> strategy,
        StateInternalsFactory<K> stateInternalsFactory,
        SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
  return new StreamingGroupAlsoByWindowViaWindowSetFn<>(
      strategy, stateInternalsFactory, reduceFn);
}
 
Example #19
Source File: CombineValuesFnFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombineValuesFnMerge() throws Exception {
  TestReceiver receiver = new TestReceiver();
  MeanInts mean = new MeanInts();

  Combine.CombineFn<Integer, CountSum, String> combiner = mean;

  ParDoFn combineParDoFn =
      createCombineValuesFn(
          CombinePhase.MERGE,
          combiner,
          StringUtf8Coder.of(),
          BigEndianIntegerCoder.of(),
          new CountSumCoder(),
          WindowingStrategy.globalDefault());

  combineParDoFn.startBundle(receiver);
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(
          KV.of(
              "a", Arrays.asList(new CountSum(3, 6), new CountSum(2, 9), new CountSum(1, 12)))));
  combineParDoFn.processElement(
      WindowedValue.valueInGlobalWindow(
          KV.of("b", Arrays.asList(new CountSum(2, 20), new CountSum(1, 1)))));
  combineParDoFn.finishBundle();

  Object[] expectedReceivedElems = {
    WindowedValue.valueInGlobalWindow(KV.of("a", new CountSum(6, 27))),
    WindowedValue.valueInGlobalWindow(KV.of("b", new CountSum(3, 21))),
  };
  assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
}
 
Example #20
Source File: TransformHierarchyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void emptyCompositeSucceeds() {
  PCollection<Long> created =
      PCollection.createPrimitiveOutputInternal(
          pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, VarLongCoder.of());
  TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
  hierarchy.setOutput(created);
  hierarchy.popNode();
  PCollectionList<Long> pcList = PCollectionList.of(created);

  TransformHierarchy.Node emptyTransform =
      hierarchy.pushNode(
          "Extract",
          pcList,
          new PTransform<PCollectionList<Long>, PCollection<Long>>() {
            @Override
            public PCollection<Long> expand(PCollectionList<Long> input) {
              return input.get(0);
            }
          });
  hierarchy.setOutput(created);
  hierarchy.popNode();
  assertThat(hierarchy.getProducer(created), equalTo(node));
  assertThat(
      "A Transform that produces non-primitive output should be composite",
      emptyTransform.isCompositeNode(),
      is(true));
}
 
Example #21
Source File: AggregatorCombiner.java    From beam with Apache License 2.0 5 votes vote down vote up
private Map<W, W> mergeWindows(WindowingStrategy<InputT, W> windowingStrategy, Set<W> windows)
    throws Exception {
  WindowFn<InputT, W> windowFn = windowingStrategy.getWindowFn();

  if (windowingStrategy.getWindowFn().isNonMerging()) {
    // Return an empty map, indicating that every window is not merged.
    return Collections.emptyMap();
  }

  Map<W, W> windowToMergeResult = new HashMap<>();
  windowFn.mergeWindows(new MergeContextImpl(windowFn, windows, windowToMergeResult));
  return windowToMergeResult;
}
 
Example #22
Source File: RehydratedComponents.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public WindowingStrategy<?, ?> load(String id) throws Exception {
  @Nullable
  RunnerApi.WindowingStrategy windowingStrategyProto =
      components.getWindowingStrategiesOrDefault(id, null);
  checkState(
      windowingStrategyProto != null,
      "No WindowingStrategy with id '%s' in serialized components",
      id);
  return WindowingStrategyTranslation.fromProto(
      windowingStrategyProto, RehydratedComponents.this);
}
 
Example #23
Source File: SplittableDoFnOperator.java    From beam with Apache License 2.0 5 votes vote down vote up
public SplittableDoFnOperator(
    DoFn<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>, OutputT> doFn,
    String stepName,
    Coder<WindowedValue<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>> windowedInputCoder,
    Map<TupleTag<?>, Coder<?>> outputCoders,
    TupleTag<OutputT> mainOutputTag,
    List<TupleTag<?>> additionalOutputTags,
    OutputManagerFactory<OutputT> outputManagerFactory,
    WindowingStrategy<?, ?> windowingStrategy,
    Map<Integer, PCollectionView<?>> sideInputTagMapping,
    Collection<PCollectionView<?>> sideInputs,
    PipelineOptions options,
    Coder<?> keyCoder,
    KeySelector<WindowedValue<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, ?> keySelector) {
  super(
      doFn,
      stepName,
      windowedInputCoder,
      outputCoders,
      mainOutputTag,
      additionalOutputTags,
      outputManagerFactory,
      windowingStrategy,
      sideInputTagMapping,
      sideInputs,
      options,
      keyCoder,
      keySelector,
      DoFnSchemaInformation.create(),
      Collections.emptyMap());
}
 
Example #24
Source File: AppliedCombineFn.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <K, InputT, AccumT, OutputT>
    AppliedCombineFn<K, InputT, AccumT, OutputT> withAccumulatorCoder(
        GlobalCombineFn<? super InputT, AccumT, OutputT> fn,
        Coder<AccumT> accumCoder,
        Iterable<PCollectionView<?>> sideInputViews,
        KvCoder<K, InputT> kvCoder,
        WindowingStrategy<?, ?> windowingStrategy) {
  // Casting down the K and InputT is safe because they're only used as inputs.
  @SuppressWarnings("unchecked")
  GlobalCombineFn<InputT, AccumT, OutputT> clonedFn =
      (GlobalCombineFn<InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
  return create(clonedFn, accumCoder, sideInputViews, kvCoder, windowingStrategy);
}
 
Example #25
Source File: AppliedCombineFn.java    From beam with Apache License 2.0 5 votes vote down vote up
private AppliedCombineFn(
    GlobalCombineFn<InputT, AccumT, OutputT> fn,
    Coder<AccumT> accumulatorCoder,
    Iterable<PCollectionView<?>> sideInputViews,
    KvCoder<K, InputT> kvCoder,
    WindowingStrategy<?, ?> windowingStrategy) {
  this.fn = fn;
  this.accumulatorCoder = accumulatorCoder;
  this.sideInputViews = sideInputViews;
  this.kvCoder = kvCoder;
  this.windowingStrategy = windowingStrategy;
}
 
Example #26
Source File: AbstractParDoP.java    From beam with Apache License 2.0 5 votes vote down vote up
abstract Processor getEx(
DoFn<InputT, OutputT> doFn,
WindowingStrategy<?, ?> windowingStrategy,
DoFnSchemaInformation doFnSchemaInformation,
Map<TupleTag<?>, int[]> outputCollToOrdinals,
SerializablePipelineOptions pipelineOptions,
TupleTag<OutputT> mainOutputTag,
Coder<InputT> inputCoder,
Map<PCollectionView<?>, Coder<?>> sideInputCoders,
Map<TupleTag<?>, Coder<?>> outputCoders,
Coder<InputT> inputValueCoder,
Map<TupleTag<?>, Coder<?>> outputValueCoders,
Map<Integer, PCollectionView<?>> ordinalToSideInput,
String ownerId,
String stepId);
 
Example #27
Source File: PartialGroupByKeyParDoFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
  StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
  options.setStreaming(true);

  Coder keyCoder = StringUtf8Coder.of();
  Coder valueCoder = BigEndianIntegerCoder.of();
  KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);

  TestOutputReceiver receiver =
      new TestOutputReceiver(
          new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)),
          counterSet,
          NameContextsForTests.nameContextForTest());

  when(mockSideInputReader.isEmpty()).thenReturn(false);
  when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
  when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any()))
      .thenReturn(mockState);
  when(mockState.read()).thenReturn(Maps.newHashMap());

  ParDoFn pgbk =
      PartialGroupByKeyParDoFns.create(
          options,
          kvCoder,
          AppliedCombineFn.withInputCoder(
              Sum.ofIntegers(),
              CoderRegistry.createDefault(),
              kvCoder,
              ImmutableList.<PCollectionView<?>>of(),
              WindowingStrategy.globalDefault()),
          mockSideInputReader,
          receiver,
          mockStreamingStepContext);
  assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
 
Example #28
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that the given GABW implementation correctly groups elements into merged sessions with
 * output timestamps at the end of the merged window.
 */
public static void groupsElementsInMergedSessionsWithEndOfWindowTimestamp(
    GroupAlsoByWindowDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.END_OF_WINDOW);

  List<WindowedValue<KV<String, Iterable<String>>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "k",
          WindowedValue.of(
              "v1", new Instant(0), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v2", new Instant(5), Arrays.asList(window(5, 15)), PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v3", new Instant(15), Arrays.asList(window(15, 25)), PaneInfo.NO_FIRING));

  assertThat(result, hasSize(2));

  TimestampedValue<KV<String, Iterable<String>>> item0 =
      getOnlyElementInWindow(result, window(0, 15));
  assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
  assertThat(item0.getTimestamp(), equalTo(window(0, 15).maxTimestamp()));

  TimestampedValue<KV<String, Iterable<String>>> item1 =
      getOnlyElementInWindow(result, window(15, 25));
  assertThat(item1.getValue().getValue(), contains("v3"));
  assertThat(item1.getTimestamp(), equalTo(window(15, 25).maxTimestamp()));
}
 
Example #29
Source File: EvaluationContextTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void callAfterOutputMustHaveBeenProducedAlreadyAfterCallsImmediately() throws Exception {
  TransformResult<?> finishedResult = StepTransformResult.withoutHold(createdProducer).build();
  context.handleResult(null, ImmutableList.of(), finishedResult);

  final CountDownLatch callLatch = new CountDownLatch(1);
  context.extractFiredTimers();
  Runnable callback = callLatch::countDown;
  context.scheduleAfterOutputWouldBeProduced(
      downstream, GlobalWindow.INSTANCE, WindowingStrategy.globalDefault(), callback);
  assertThat(callLatch.await(1, TimeUnit.SECONDS), is(true));
}
 
Example #30
Source File: PipelineTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}")
public static Iterable<Pipeline> testPipelines() {
  Pipeline trivialPipeline = Pipeline.create();
  trivialPipeline.apply(Create.of(1, 2, 3));

  Pipeline sideInputPipeline = Pipeline.create();
  final PCollectionView<String> singletonView =
      sideInputPipeline.apply(Create.of("foo")).apply(View.asSingleton());
  sideInputPipeline
      .apply(Create.of("main input"))
      .apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                      // actually never executed and no effect on translation
                      c.sideInput(singletonView);
                    }
                  })
              .withSideInputs(singletonView));

  Pipeline complexPipeline = Pipeline.create();
  BigEndianLongCoder customCoder = BigEndianLongCoder.of();
  PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L));
  PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder);
  PCollection<Long> windowed =
      counted.apply(
          Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withLateFirings(AfterPane.elementCountAtLeast(19)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(3L)));
  final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
  PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.of("foo"));
  PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.create());

  return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline);
}