org.apache.beam.sdk.transforms.windowing.AfterPane Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.windowing.AfterPane. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WriteFeatureSetSpecAck.java    From feast with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<FeatureSetReference> expand(PCollection<FeatureSetReference> input) {
  return input
      .apply(
          "OnEveryElementTrigger",
          Window.<FeatureSetReference>into(new GlobalWindows())
              .accumulatingFiredPanes()
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .withAllowedLateness(Duration.ZERO))
      .apply("CountingReadySinks", Count.perElement())
      .apply(
          "WhenAllReady",
          Filter.by(
              (SerializableFunction<KV<FeatureSetReference, Long>, Boolean>)
                  count -> count.getValue() >= sinksCount))
      .apply(Keys.create());
}
 
Example #2
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupByKeyFinishingEndOfWindowEarlyFiringsTriggerNotOk() {
  PCollection<KV<String, String>> input =
      p.apply(Create.of(KV.of("hello", "goodbye")))
          .apply(
              Window.<KV<String, String>>configure()
                  .discardingFiredPanes()
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withEarlyFirings(AfterPane.elementCountAtLeast(1)))
                  .withAllowedLateness(Duration.millis(10)));

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Unsafe trigger");
  input.apply(GroupByKey.create());
}
 
Example #3
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWatermarkHoldForLateNewWindow() throws Exception {
  Duration allowedLateness = Duration.standardMinutes(1);
  Duration gapDuration = Duration.millis(10);
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(Sessions.withGapDuration(gapDuration))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
              .withTrigger(
                  Repeatedly.forever(
                      AfterWatermark.pastEndOfWindow()
                          .withLateFirings(AfterPane.elementCountAtLeast(1))))
              .withAllowedLateness(allowedLateness));
  tester.setAutoAdvanceOutputWatermark(false);

  assertEquals(null, tester.getWatermarkHold());
  assertEquals(null, tester.getOutputWatermark());
  tester.advanceInputWatermark(new Instant(40));
  injectElements(tester, 1);
  assertThat(tester.getWatermarkHold(), nullValue());
  injectElements(tester, 10);
  assertThat(tester.getWatermarkHold(), nullValue());
}
 
Example #4
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testHotKeyCombiningWithAccumulationMode() {
  PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5));

  PCollection<Integer> output =
      input
          .apply(
              Window.<Integer>into(new GlobalWindows())
                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
                  .accumulatingFiredPanes()
                  .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS))
          .apply(Sum.integersGlobally().withoutDefaults().withFanout(2))
          .apply(ParDo.of(new GetLast()));

  PAssert.that(output)
      .satisfies(
          input1 -> {
            assertThat(input1, hasItem(15));
            return null;
          });

  pipeline.run();
}
 
Example #5
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesSideInputs.class})
public void testGlobalCombineWithDefaultsAndTriggers() {
  PCollection<Integer> input = pipeline.apply(Create.of(1, 1));

  PCollection<String> output =
      input
          .apply(
              Window.<Integer>into(new GlobalWindows())
                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
                  .accumulatingFiredPanes()
                  .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS))
          .apply(Sum.integersGlobally())
          .apply(ParDo.of(new FormatPaneInfo()));

  // The actual elements produced are nondeterministic. Could be one, could be two.
  // But it should certainly have a final element with the correct final sum.
  PAssert.that(output)
      .satisfies(
          input1 -> {
            assertThat(input1, hasItem("2: true"));
            return null;
          });

  pipeline.run();
}
 
Example #6
Source File: TextIOReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesUnboundedSplittableParDo.class})
public void testReadWatchForNewFiles() throws IOException, InterruptedException {
  final Path basePath = tempFolder.getRoot().toPath().resolve("readWatch");
  basePath.toFile().mkdir();

  p.apply(GenerateSequence.from(0).to(10).withRate(1, Duration.millis(100)))
      .apply(
          Window.<Long>into(FixedWindows.of(Duration.millis(150)))
              .withAllowedLateness(Duration.ZERO)
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .discardingFiredPanes())
      .apply(ToString.elements())
      .apply(
          TextIO.write()
              .to(basePath.resolve("data").toString())
              .withNumShards(1)
              .withWindowedWrites());

  PCollection<String> lines =
      p.apply(
          TextIO.read()
              .from(basePath.resolve("*").toString())
              .watchForNewFiles(
                  Duration.millis(100),
                  Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3))));

  PAssert.that(lines).containsInAnyOrder("0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
  p.run();
}
 
Example #7
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #8
Source File: WriteTables.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<TableDestination, String>> expand(
    PCollection<KV<ShardedKey<DestinationT>, List<String>>> input) {
  PCollectionTuple writeTablesOutputs =
      input.apply(
          ParDo.of(new WriteTablesDoFn())
              .withSideInputs(sideInputs)
              .withOutputTags(mainOutputTag, TupleTagList.of(temporaryFilesTag)));

  // Garbage collect temporary files.
  // We mustn't start garbage collecting files until we are assured that the WriteTablesDoFn has
  // succeeded in loading those files and won't be retried. Otherwise, we might fail part of the
  // way through deleting temporary files, and retry WriteTablesDoFn. This will then fail due
  // to missing files, causing either the entire workflow to fail or get stuck (depending on how
  // the runner handles persistent failures).
  writeTablesOutputs
      .get(temporaryFilesTag)
      .setCoder(StringUtf8Coder.of())
      .apply(WithKeys.of((Void) null))
      .setCoder(KvCoder.of(VoidCoder.of(), StringUtf8Coder.of()))
      .apply(
          Window.<KV<Void, String>>into(new GlobalWindows())
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .discardingFiredPanes())
      .apply(GroupByKey.create())
      .apply(Values.create())
      .apply(ParDo.of(new GarbageCollectTemporaryFiles()));

  return writeTablesOutputs.get(mainOutputTag);
}
 
Example #9
Source File: PubsubUnboundedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<PubsubMessage> input) {
  input
      .apply(
          "PubsubUnboundedSink.Window",
          Window.<PubsubMessage>into(new GlobalWindows())
              .triggering(
                  Repeatedly.forever(
                      AfterFirst.of(
                          AfterPane.elementCountAtLeast(publishBatchSize),
                          AfterProcessingTime.pastFirstElementInPane().plusDelayOf(maxLatency))))
              .discardingFiredPanes())
      .apply("PubsubUnboundedSink.Shard", ParDo.of(new ShardFn(numShards, recordIdMethod)))
      .setCoder(KvCoder.of(VarIntCoder.of(), CODER))
      .apply(GroupByKey.create())
      .apply(
          "PubsubUnboundedSink.Writer",
          ParDo.of(
              new WriterFn(
                  pubsubFactory,
                  topic,
                  timestampAttribute,
                  idAttribute,
                  publishBatchSize,
                  publishBatchBytes)));
  return PDone.in(input.getPipeline());
}
 
Example #10
Source File: KafkaExactlyOnceSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Void> expand(PCollection<ProducerRecord<K, V>> input) {

  int numShards = spec.getNumShards();
  if (numShards <= 0) {
    try (Consumer<?, ?> consumer = openConsumer(spec)) {
      numShards = consumer.partitionsFor(spec.getTopic()).size();
      LOG.info(
          "Using {} shards for exactly-once writer, matching number of partitions "
              + "for topic '{}'",
          numShards,
          spec.getTopic());
    }
  }
  checkState(numShards > 0, "Could not set number of shards");

  return input
      .apply(
          Window.<ProducerRecord<K, V>>into(new GlobalWindows()) // Everything into global window.
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .discardingFiredPanes())
      .apply(
          String.format("Shuffle across %d shards", numShards),
          ParDo.of(new Reshard<>(numShards)))
      .apply("Persist sharding", GroupByKey.create())
      .apply("Assign sequential ids", ParDo.of(new Sequencer<>()))
      .apply("Persist ids", GroupByKey.create())
      .apply(
          String.format("Write to Kafka topic '%s'", spec.getTopic()),
          ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder())));
}
 
Example #11
Source File: PipelineTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}")
public static Iterable<Pipeline> testPipelines() {
  Pipeline trivialPipeline = Pipeline.create();
  trivialPipeline.apply(Create.of(1, 2, 3));

  Pipeline sideInputPipeline = Pipeline.create();
  final PCollectionView<String> singletonView =
      sideInputPipeline.apply(Create.of("foo")).apply(View.asSingleton());
  sideInputPipeline
      .apply(Create.of("main input"))
      .apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                      // actually never executed and no effect on translation
                      c.sideInput(singletonView);
                    }
                  })
              .withSideInputs(singletonView));

  Pipeline complexPipeline = Pipeline.create();
  BigEndianLongCoder customCoder = BigEndianLongCoder.of();
  PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L));
  PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder);
  PCollection<Long> windowed =
      counted.apply(
          Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withLateFirings(AfterPane.elementCountAtLeast(19)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(3L)));
  final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
  PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.of("foo"));
  PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.create());

  return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline);
}
 
Example #12
Source File: PCollectionTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<PCollection<?>> data() {
  Pipeline pipeline = TestPipeline.create();
  PCollection<Integer> ints = pipeline.apply("ints", Create.of(1, 2, 3));
  PCollection<Long> longs = pipeline.apply("unbounded longs", GenerateSequence.from(0));
  PCollection<Long> windowedLongs =
      longs.apply(
          "into fixed windows", Window.into(FixedWindows.of(Duration.standardMinutes(10L))));
  PCollection<KV<String, Iterable<String>>> groupedStrings =
      pipeline
          .apply(
              "kvs", Create.of(KV.of("foo", "spam"), KV.of("bar", "ham"), KV.of("baz", "eggs")))
          .apply("group", GroupByKey.create());
  PCollection<Long> coderLongs =
      pipeline
          .apply("counts with alternative coder", GenerateSequence.from(0).to(10))
          .setCoder(BigEndianLongCoder.of());
  pipeline
      .apply(
          "intsWithCustomCoder",
          Create.of(1, 2).withCoder(new AutoValue_PCollectionTranslationTest_CustomIntCoder()))
      .apply(
          "into custom windows",
          Window.into(new CustomWindows())
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterFirst.of(
                              AfterPane.elementCountAtLeast(5),
                              AfterProcessingTime.pastFirstElementInPane()
                                  .plusDelayOf(Duration.millis(227L)))))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(12L)));
  return ImmutableList.of(ints, longs, windowedLongs, coderLongs, groupedStrings);
}
 
Example #13
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testMergingLateWatermarkHolds() throws Exception {
  MetricsContainerImpl container = new MetricsContainerImpl("any");
  MetricsEnvironment.setCurrentContainer(container);
  Duration gapDuration = Duration.millis(10);
  Duration allowedLateness = Duration.standardMinutes(100);
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(Sessions.withGapDuration(gapDuration))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
              .withTrigger(
                  Repeatedly.forever(
                      AfterWatermark.pastEndOfWindow()
                          .withLateFirings(AfterPane.elementCountAtLeast(10))))
              .withAllowedLateness(allowedLateness));
  tester.setAutoAdvanceOutputWatermark(false);

  // Input watermark -> null
  assertEquals(null, tester.getWatermarkHold());
  assertEquals(null, tester.getOutputWatermark());

  tester.advanceInputWatermark(new Instant(20));
  // Add two late elements that cause a window to merge.
  injectElements(tester, Arrays.asList(3));
  assertThat(tester.getWatermarkHold(), nullValue());
  injectElements(tester, Arrays.asList(4));
  Instant endOfWindow = new Instant(4).plus(gapDuration);
  // We expect a GC hold to be one less than the end of window plus the allowed lateness.
  Instant expectedGcHold = endOfWindow.plus(allowedLateness).minus(1);
  assertEquals(expectedGcHold, tester.getWatermarkHold());
  tester.advanceInputWatermark(new Instant(1000));
  assertEquals(expectedGcHold, tester.getWatermarkHold());
}
 
Example #14
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void noEmptyPanesFinalIfNonEmpty() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
              .withTrigger(
                  Repeatedly.forever(
                      AfterFirst.of(
                          AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())))
              .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
              .withAllowedLateness(Duration.millis(100))
              .withTimestampCombiner(TimestampCombiner.EARLIEST)
              .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY));

  tester.advanceInputWatermark(new Instant(0));
  tester.injectElements(
      TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));
  tester.advanceInputWatermark(new Instant(20));
  tester.advanceInputWatermark(new Instant(250));

  List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
  assertThat(
      output,
      contains(
          // Trigger with 2 elements
          isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10),
          // Trigger for the empty on time pane
          isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10)));
}
 
Example #15
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void noEmptyPanesFinalAlways() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
              .withTrigger(
                  Repeatedly.forever(
                      AfterFirst.of(
                          AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())))
              .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
              .withAllowedLateness(Duration.millis(100))
              .withTimestampCombiner(TimestampCombiner.EARLIEST)
              .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));

  tester.advanceInputWatermark(new Instant(0));
  tester.injectElements(
      TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));
  tester.advanceInputWatermark(new Instant(20));
  tester.advanceInputWatermark(new Instant(250));

  List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
  assertThat(
      output,
      contains(
          // Trigger with 2 elements
          isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10),
          // Trigger for the empty on time pane
          isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10),
          // Trigger for the final pane
          isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10)));
}
 
Example #16
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testPaneInfoFinalAndOnTime() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
              .withTrigger(
                  Repeatedly.forever(AfterPane.elementCountAtLeast(2))
                      .orFinally(AfterWatermark.pastEndOfWindow()))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
              .withAllowedLateness(Duration.millis(100))
              .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));

  tester.advanceInputWatermark(new Instant(0));

  // Should trigger due to element count
  tester.injectElements(
      TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));

  assertThat(
      tester.extractOutput(),
      contains(
          WindowMatchers.valueWithPaneInfo(
              PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));

  tester.advanceInputWatermark(new Instant(150));
  assertThat(
      tester.extractOutput(),
      contains(
          WindowMatchers.valueWithPaneInfo(
              PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0))));
}
 
Example #17
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test that it fires an empty on-time isFinished pane when OnTimeBehavior is FIRE_ALWAYS and
 * ClosingBehavior is FIRE_IF_NON_EMPTY.
 *
 * <p>This is a test just for backward compatibility.
 */
@Test
public void testEmptyOnTimeWithOnTimeBehaviorBackwardCompatibility() throws Exception {
  WindowingStrategy<?, IntervalWindow> strategy =
      WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST)
          .withTrigger(
              AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(1)))
          .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
          .withAllowedLateness(Duration.ZERO)
          .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY);

  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());

  tester.advanceInputWatermark(new Instant(0));
  tester.advanceProcessingTime(new Instant(0));

  tester.injectElements(TimestampedValue.of(1, new Instant(1)));

  // Should fire empty on time isFinished pane
  tester.advanceInputWatermark(new Instant(11));

  List<WindowedValue<Integer>> output = tester.extractOutput();
  assertEquals(2, output.size());

  assertThat(
      output.get(0),
      WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)));
  assertThat(
      output.get(1),
      WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0)));
}
 
Example #18
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * We should fire a non-empty ON_TIME pane in the GlobalWindow when the watermark moves to
 * end-of-time.
 */
@Test
public void fireNonEmptyOnDrainInGlobalWindow() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(new GlobalWindows())
              .withTrigger(Repeatedly.forever(AfterPane.elementCountAtLeast(3)))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES));

  tester.advanceInputWatermark(new Instant(0));

  final int n = 20;
  for (int i = 0; i < n; i++) {
    tester.injectElements(TimestampedValue.of(i, new Instant(i)));
  }

  List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
  assertEquals(n / 3, output.size());
  for (int i = 0; i < output.size(); i++) {
    assertEquals(Timing.EARLY, output.get(i).getPane().getTiming());
    assertEquals(i, output.get(i).getPane().getIndex());
    assertEquals(3, Iterables.size(output.get(i).getValue()));
  }

  tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);

  output = tester.extractOutput();
  assertEquals(1, output.size());
  assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming());
  assertEquals(n / 3, output.get(0).getPane().getIndex());
  assertEquals(n - ((n / 3) * 3), Iterables.size(output.get(0).getValue()));
}
 
Example #19
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Late elements should still have a garbage collection hold set so that they can make a late pane
 * rather than be dropped due to lateness.
 */
@Test
public void setGarbageCollectionHoldOnLateElements() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
              .withTrigger(
                  AfterWatermark.pastEndOfWindow()
                      .withLateFirings(AfterPane.elementCountAtLeast(2)))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
              .withAllowedLateness(Duration.millis(100))
              .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY));

  tester.advanceInputWatermark(new Instant(0));
  tester.advanceOutputWatermark(new Instant(0));
  tester.injectElements(TimestampedValue.of(1, new Instant(1)));

  // Fire ON_TIME pane @ 9 with 1

  tester.advanceInputWatermark(new Instant(109));
  tester.advanceOutputWatermark(new Instant(109));
  tester.injectElements(TimestampedValue.of(2, new Instant(2)));
  // We should have set a garbage collection hold for the final pane.
  Instant hold = tester.getWatermarkHold();
  assertEquals(new Instant(109), hold);

  tester.advanceInputWatermark(new Instant(110));
  tester.advanceOutputWatermark(new Instant(110));

  // Fire final LATE pane @ 9 with 2

  List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
  assertEquals(2, output.size());
}
 
Example #20
Source File: CreateStreamTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleStreams() throws IOException {
  CreateStream<String> source =
      CreateStream.of(StringUtf8Coder.of(), batchDuration())
          .nextBatch("foo", "bar")
          .advanceNextBatchWatermarkToInfinity();
  CreateStream<Integer> other =
      CreateStream.of(VarIntCoder.of(), batchDuration())
          .nextBatch(1, 2, 3, 4)
          .advanceNextBatchWatermarkToInfinity();

  PCollection<String> createStrings =
      p.apply("CreateStrings", source)
          .apply(
              "WindowStrings",
              Window.<String>configure()
                  .triggering(AfterPane.elementCountAtLeast(2))
                  .withAllowedLateness(Duration.ZERO)
                  .accumulatingFiredPanes());
  PAssert.that(createStrings).containsInAnyOrder("foo", "bar");

  PCollection<Integer> createInts =
      p.apply("CreateInts", other)
          .apply(
              "WindowInts",
              Window.<Integer>configure()
                  .triggering(AfterPane.elementCountAtLeast(4))
                  .withAllowedLateness(Duration.ZERO)
                  .accumulatingFiredPanes());
  PAssert.that(createInts).containsInAnyOrder(1, 2, 3, 4);

  p.run();
}
 
Example #21
Source File: BatchGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateNoncombiningWithTrigger() throws Exception {
  Coder<Long> inputCoder = VarLongCoder.of();
  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
          .withTrigger(AfterPane.elementCountAtLeast(1));

  assertThat(
      BatchGroupAlsoByWindowsDoFns.createForIterable(
          windowingStrategy, new InMemoryStateInternalsFactory<>(), inputCoder),
      instanceOf(BatchGroupAlsoByWindowViaIteratorsFn.class));
}
 
Example #22
Source File: BatchGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateCombiningWithTrigger() throws Exception {
  AppliedCombineFn<String, Long, ?, Long> appliedFn =
      AppliedCombineFn.withInputCoder(
          Sum.ofLongs(),
          CoderRegistry.createDefault(),
          KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
          .withTrigger(AfterPane.elementCountAtLeast(1));

  assertThat(
      BatchGroupAlsoByWindowsDoFns.create(windowingStrategy, appliedFn),
      instanceOf(BatchGroupAlsoByWindowAndCombineFn.class));
}
 
Example #23
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGroupByKeyFinishingEndOfWindowEarlyFiringsTriggerOk() {
  PCollection<KV<String, String>> input =
      p.apply(Create.of(KV.of("hello", "goodbye")))
          .apply(
              Window.<KV<String, String>>configure()
                  .discardingFiredPanes()
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withEarlyFirings(AfterPane.elementCountAtLeast(1)))
                  .withAllowedLateness(Duration.ZERO));

  // OK
  input.apply(GroupByKey.create());
}
 
Example #24
Source File: KafkaExactlyOnceSink.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Void> expand(PCollection<KV<K, V>> input) {

  int numShards = spec.getNumShards();
  if (numShards <= 0) {
    try (Consumer<?, ?> consumer = openConsumer(spec)) {
      numShards = consumer.partitionsFor(spec.getTopic()).size();
      LOG.info(
          "Using {} shards for exactly-once writer, matching number of partitions "
              + "for topic '{}'",
          numShards,
          spec.getTopic());
    }
  }
  checkState(numShards > 0, "Could not set number of shards");

  return input
      .apply(
          Window.<KV<K, V>>into(new GlobalWindows()) // Everything into global window.
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .discardingFiredPanes())
      .apply(
          String.format("Shuffle across %d shards", numShards),
          ParDo.of(new Reshard<>(numShards)))
      .apply("Persist sharding", GroupByKey.create())
      .apply("Assign sequential ids", ParDo.of(new Sequencer<>()))
      .apply("Persist ids", GroupByKey.create())
      .apply(
          String.format("Write to Kafka topic '%s'", spec.getTopic()),
          ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder())));
}
 
Example #25
Source File: ReadFeatureSetSpecs.java    From feast with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<FeatureSetReference, FeatureSetSpec>> expand(PBegin input) {
  return input
      .apply(
          KafkaIO.readBytes()
              .withBootstrapServers(
                  getSpecsStreamingUpdateConfig().getSource().getBootstrapServers())
              .withTopic(getSpecsStreamingUpdateConfig().getSource().getTopic())
              .withConsumerConfigUpdates(
                  ImmutableMap.of(
                      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
                      "earliest",
                      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,
                      false)))
      .apply("ParseFeatureSetSpec", ParDo.of(new KafkaRecordToFeatureSetSpec()))
      .apply("OnlyRelevantSpecs", Filter.by(new FilterRelevantFunction(getSource(), getStores())))
      .apply(
          Window.<KV<String, FeatureSetSpec>>into(new GlobalWindows())
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.ZERO))
      .apply(
          Combine.perKey(
              (SerializableFunction<Iterable<FeatureSetSpec>, FeatureSetSpec>)
                  specs -> {
                    ArrayList<FeatureSetSpec> featureSetSpecs = Lists.newArrayList(specs);
                    featureSetSpecs.sort(
                        Comparator.comparing(FeatureSetSpec::getVersion).reversed());
                    return featureSetSpecs.get(0);
                  }))
      .apply("CreateFeatureSetReferenceKey", ParDo.of(new CreateFeatureSetReference()))
      .setCoder(
          KvCoder.of(
              AvroCoder.of(FeatureSetReference.class), ProtoCoder.of(FeatureSetSpec.class)));
}
 
Example #26
Source File: BeamSqlDslAggregationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSupportsGlobalWindowWithCustomTrigger() throws Exception {
  pipeline.enableAbandonedNodeEnforcement(false);

  DateTime startTime = parseTimestampWithoutTimeZone("2017-1-1 0:0:0");

  Schema type =
      Schema.builder()
          .addInt32Field("f_intGroupingKey")
          .addInt32Field("f_intValue")
          .addDateTimeField("f_timestamp")
          .build();

  Object[] rows =
      new Object[] {
        0, 1, startTime.plusSeconds(0),
        0, 2, startTime.plusSeconds(1),
        0, 3, startTime.plusSeconds(2),
        0, 4, startTime.plusSeconds(3),
        0, 5, startTime.plusSeconds(4),
        0, 6, startTime.plusSeconds(6)
      };

  PCollection<Row> input =
      createTestPCollection(type, rows, "f_timestamp")
          .apply(
              Window.<Row>into(new GlobalWindows())
                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(2)))
                  .discardingFiredPanes()
                  .withOnTimeBehavior(Window.OnTimeBehavior.FIRE_IF_NON_EMPTY));

  String sql = "SELECT SUM(f_intValue) AS `sum` FROM PCOLLECTION GROUP BY f_intGroupingKey";

  PCollection<Row> result = input.apply("sql", SqlTransform.query(sql));

  assertEquals(new GlobalWindows(), result.getWindowingStrategy().getWindowFn());
  PAssert.that(result).containsInAnyOrder(rowsWithSingleIntField("sum", Arrays.asList(3, 7, 11)));

  pipeline.run();
}
 
Example #27
Source File: TextIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testWindowedWritesWithOnceTrigger() throws Throwable {
  p.enableAbandonedNodeEnforcement(false);
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("Unsafe trigger");

  // Tests for https://issues.apache.org/jira/browse/BEAM-3169
  PCollection<String> data =
      p.apply(Create.of("0", "1", "2"))
          .apply(
              Window.<String>into(FixedWindows.of(Duration.standardSeconds(1)))
                  // According to this trigger, all data should be written.
                  // However, the continuation of this trigger is elementCountAtLeast(1),
                  // so with a buggy implementation that used a GBK before renaming files,
                  // only 1 file would be renamed.
                  .triggering(AfterPane.elementCountAtLeast(3))
                  .withAllowedLateness(Duration.standardMinutes(1))
                  .discardingFiredPanes());
  PCollection<String> filenames =
      data.apply(
              TextIO.write()
                  .to(new File(tempFolder.getRoot(), "windowed-writes").getAbsolutePath())
                  .withNumShards(2)
                  .withWindowedWrites()
                  .<Void>withOutputFilenames())
          .getPerDestinationOutputFilenames()
          .apply(Values.create());
}
 
Example #28
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(ONE_MINUTE))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #29
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGroupByKeyFinishingTriggerRejected() {
  PCollection<KV<String, String>> input =
      p.apply(Create.of(KV.of("hello", "goodbye")))
          .apply(
              Window.<KV<String, String>>configure()
                  .discardingFiredPanes()
                  .triggering(AfterPane.elementCountAtLeast(1)));

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Unsafe trigger");
  input.apply(GroupByKey.create());
}
 
Example #30
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGroupByKeyEndOfWindowLateFiringsOk() {
  PCollection<KV<String, String>> input =
      p.apply(Create.of(KV.of("hello", "goodbye")))
          .apply(
              Window.<KV<String, String>>configure()
                  .discardingFiredPanes()
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withLateFirings(AfterPane.elementCountAtLeast(1)))
                  .withAllowedLateness(Duration.millis(10)));

  // OK
  input.apply(GroupByKey.create());
}