Java Code Examples for org.apache.beam.sdk.transforms.windowing.FixedWindows#of()

The following examples show how to use org.apache.beam.sdk.transforms.windowing.FixedWindows#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LateDataUtilsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void garbageCollectionTimeAfterEndOfGlobalWindowWithLateness() {
  FixedWindows windowFn = FixedWindows.of(Duration.standardMinutes(5));
  Duration allowedLateness = Duration.millis(Long.MAX_VALUE);
  WindowingStrategy<?, ?> strategy =
      WindowingStrategy.globalDefault()
          .withWindowFn(windowFn)
          .withAllowedLateness(allowedLateness);

  IntervalWindow window = windowFn.assignWindow(new Instant(-100));
  assertThat(
      window.maxTimestamp().plus(allowedLateness),
      Matchers.greaterThan(GlobalWindow.INSTANCE.maxTimestamp()));
  assertThat(
      LateDataUtils.garbageCollectionTime(window, strategy),
      equalTo(GlobalWindow.INSTANCE.maxTimestamp()));
}
 
Example 2
Source File: CountByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<KV<String, Long>> counted =
      CountByKey.named("CountByKey1")
          .of(dataset)
          .keyBy(s -> s)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final CountByKey count = (CountByKey) TestUtils.getProducer(counted);
  assertTrue(count.getName().isPresent());
  assertEquals("CountByKey1", count.getName().get());
  assertNotNull(count.getKeyExtractor());
  assertTrue(count.getWindow().isPresent());
  final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get());
  assertEquals(windowing, desc.getWindowFn());
  assertEquals(trigger, desc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode());
  assertEquals(Duration.millis(1000), desc.getAllowedLateness());
}
 
Example 3
Source File: CountByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<KV<String, Long>> counted =
      CountByKey.named("CountByKey1")
          .of(dataset)
          .keyBy(s -> s)
          .applyIf(true, b -> b.windowBy(windowing).triggeredBy(trigger).discardingFiredPanes())
          .output();
  final CountByKey count = (CountByKey) TestUtils.getProducer(counted);
  assertTrue(count.getWindow().isPresent());
  final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get());
  assertEquals(windowing, desc.getWindowFn());
  assertEquals(trigger, desc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode());
}
 
Example 4
Source File: DistinctTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<String> uniq =
      Distinct.named("Distinct1")
          .of(dataset)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getName().isPresent());
  assertEquals("Distinct1", distinct.getName().get());

  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(windowing, windowDesc.getWindowFn());
  assertEquals(trigger, windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
  assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness());
}
 
Example 5
Source File: WatermarkCallbackExecutorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void multipleCallbacksShouldFireFires() throws Exception {
  CountDownLatch latch = new CountDownLatch(2);
  WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(Duration.standardMinutes(10));
  IntervalWindow window =
      new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(10)));
  executor.callOnGuaranteedFiring(
      create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));
  executor.callOnGuaranteedFiring(
      create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));

  executor.fireForWatermark(create, new Instant(0L).plus(Duration.standardMinutes(10)));
  assertThat(latch.await(500, TimeUnit.MILLISECONDS), equalTo(true));
}
 
Example 6
Source File: LateDataUtilsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void garbageCollectionTimeAfterEndOfGlobalWindow() {
  FixedWindows windowFn = FixedWindows.of(Duration.standardMinutes(5));
  WindowingStrategy<?, ?> strategy = WindowingStrategy.globalDefault().withWindowFn(windowFn);

  IntervalWindow window = windowFn.assignWindow(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
  assertThat(window.maxTimestamp(), equalTo(GlobalWindow.INSTANCE.maxTimestamp()));
  assertThat(
      LateDataUtils.garbageCollectionTime(window, strategy),
      equalTo(GlobalWindow.INSTANCE.maxTimestamp()));
}
 
Example 7
Source File: LateDataUtilsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void beforeEndOfGlobalWindowSame() {
  FixedWindows windowFn = FixedWindows.of(Duration.standardMinutes(5));
  Duration allowedLateness = Duration.standardMinutes(2);
  WindowingStrategy<?, ?> strategy =
      WindowingStrategy.globalDefault()
          .withWindowFn(windowFn)
          .withAllowedLateness(allowedLateness);

  IntervalWindow window = windowFn.assignWindow(new Instant(10));
  assertThat(
      LateDataUtils.garbageCollectionTime(window, strategy),
      equalTo(window.maxTimestamp().plus(allowedLateness)));
}
 
Example 8
Source File: GroupAlsoByWindowParDoFnFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testJavaWindowingStrategyDeserialization() throws Exception {
  WindowFn windowFn = FixedWindows.of(Duration.millis(17));

  WindowingStrategy windowingStrategy = WindowingStrategy.of(windowFn);

  assertThat(windowingStrategy.getWindowFn(), equalTo(windowFn));
}
 
Example 9
Source File: PubsubWordCount.java    From cloud-bigtable-examples with Apache License 2.0 5 votes vote down vote up
/**
 * <p>Creates a dataflow pipeline that creates the following chain:</p>
 * <ol>
 *   <li> Reads from a Cloud Pubsub topic
 *   <li> Window into fixed windows of 1 minute
 *   <li> Applies word count transform
 *   <li> Creates Puts from each of the word counts in the array
 *   <li> Performs a Bigtable Put on the items
 * </ol>
 *
 * @param args Arguments to use to configure the Dataflow Pipeline.  The first three are required
 *   when running via managed resource in Google Cloud Platform.  Those options should be omitted
 *   for LOCAL runs.  The next four arguments are to configure the Bigtable connection. The last
 *   two items are for Cloud Pubsub.
 *        --runner=BlockingDataflowPipelineRunner
 *        --project=[dataflow project] \\
 *        --stagingLocation=gs://[your google storage bucket] \\
 *        --bigtableProjectId=[bigtable project] \\
 *        --bigtableInstanceId=[bigtable instance id] \\
 *        --bigtableTableId=[bigtable tableName]
 *        --inputFile=[file path on GCS]
 *        --pubsubTopic=projects/[project name]/topics/[topic name]
 */

public static void main(String[] args) throws Exception {
  // CloudBigtableOptions is one way to retrieve the options.  It's not required.
  BigtablePubsubOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtablePubsubOptions.class);

  // CloudBigtableTableConfiguration contains the project, instance and table to connect to.
  CloudBigtableTableConfiguration config =
      new CloudBigtableTableConfiguration.Builder()
      .withProjectId(options.getBigtableProjectId())
      .withInstanceId(options.getBigtableInstanceId())
      .withTableId(options.getBigtableTableId())
      .build();

  // In order to cancel the pipelines automatically,
  // DataflowPipelineRunner is forced to be used.
  // Also enables the 2 jobs to run at the same time.
  options.setRunner(DataflowRunner.class);

  options.as(DataflowPipelineOptions.class).setStreaming(true);
  Pipeline p = Pipeline.create(options);

  FixedWindows window = FixedWindows.of(Duration.standardMinutes(options.getWindowSize()));

  p
      .apply(PubsubIO.readStrings().fromTopic(options.getPubsubTopic()))
      .apply(Window.<String> into(window))
      .apply(ParDo.of(new ExtractWordsFn()))
      .apply(Count.<String> perElement())
      .apply(ParDo.of(MUTATION_TRANSFORM))
      .apply(CloudBigtableIO.writeToTable(config));

  p.run().waitUntilFinish();
  // Start a second job to inject messages into a Cloud Pubsub topic
  injectMessages(options);
}
 
Example 10
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testWindowingInStartAndFinishBundle() {

  final FixedWindows windowFn = FixedWindows.of(Duration.millis(1));
  PCollection<String> output =
      pipeline
          .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
          .apply(Window.into(windowFn))
          .apply(
              ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void processElement(
                        @Element String element,
                        @Timestamp Instant timestamp,
                        OutputReceiver<String> r) {
                      r.output(element);
                      System.out.println("Process: " + element + ":" + timestamp.getMillis());
                    }

                    @FinishBundle
                    public void finishBundle(FinishBundleContext c) {
                      Instant ts = new Instant(3);
                      c.output("finish", ts, windowFn.assignWindow(ts));
                      System.out.println("Finish: 3");
                    }
                  }))
          .apply(ParDo.of(new PrintingDoFn()));

  PAssert.that(output).satisfies(new Checker());

  pipeline.run();
}
 
Example 11
Source File: WatermarkCallbackExecutorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void unrelatedStepShouldNotFire() throws Exception {
  CountDownLatch latch = new CountDownLatch(1);
  WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(Duration.standardMinutes(10));
  IntervalWindow window =
      new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(10)));
  executor.callOnGuaranteedFiring(
      sum, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));

  executor.fireForWatermark(create, new Instant(0L).plus(Duration.standardMinutes(20)));
  assertThat(latch.await(500, TimeUnit.MILLISECONDS), equalTo(false));
}
 
Example 12
Source File: AssignWindowsRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void singleInputSingleOutputSucceeds() throws Exception {
  FixedWindows windowFn = FixedWindows.of(Duration.standardMinutes(10L));

  AssignWindowsRunner<Integer, IntervalWindow> runner = AssignWindowsRunner.create(windowFn);

  assertThat(
      runner.assignWindows(WindowedValue.valueInGlobalWindow(1)),
      equalTo(
          WindowedValue.of(
              1,
              BoundedWindow.TIMESTAMP_MIN_VALUE,
              windowFn.assignWindow(BoundedWindow.TIMESTAMP_MIN_VALUE),
              PaneInfo.NO_FIRING)));
  assertThat(
      runner.assignWindows(
          WindowedValue.of(
              2,
              new Instant(-10L),
              new IntervalWindow(new Instant(-120000L), Duration.standardMinutes(3L)),
              PaneInfo.ON_TIME_AND_ONLY_FIRING)),
      equalTo(
          WindowedValue.of(
              2,
              new Instant(-10L),
              windowFn.assignWindow(new Instant(-10L)),
              PaneInfo.ON_TIME_AND_ONLY_FIRING)));
}
 
Example 13
Source File: TopPerKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<Triple<String, Long, Long>> result =
      TopPerKey.named("TopPerKey1")
          .of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .scoreBy(s -> 1L)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final TopPerKey tpk = (TopPerKey) TestUtils.getProducer(result);
  assertTrue(tpk.getName().isPresent());
  assertEquals("TopPerKey1", tpk.getName().get());
  assertNotNull(tpk.getKeyExtractor());
  assertNotNull(tpk.getValueExtractor());
  assertNotNull(tpk.getScoreExtractor());

  assertTrue(tpk.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) tpk.getWindow().get());
  assertEquals(windowing, windowDesc.getWindowFn());
  assertEquals(trigger, windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
  assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness());
}
 
Example 14
Source File: ReduceByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<KV<String, Long>> reduced =
      ReduceByKey.named("ReduceByKey1")
          .of(dataset)
          .keyBy(s -> s)
          .valueBy(s -> 1L)
          .combineBy(Sums.ofLongs())
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.standardSeconds(1000))
          .output();

  final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced);
  assertTrue(reduce.getName().isPresent());
  assertEquals("ReduceByKey1", reduce.getName().get());
  assertNotNull(reduce.getKeyExtractor());
  assertNotNull(reduce.getValueExtractor());
  assertTrue(reduce.isCombineFnStyle());
  assertNotNull(reduce.getAccumulatorFactory());
  assertNotNull(reduce.getAccumulate());
  assertNotNull(reduce.getAccumulatorType());
  assertNotNull(reduce.getMergeAccumulators());
  assertNotNull(reduce.getOutputFn());

  assertTrue(reduce.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) reduce.getWindow().get());
  assertEquals(windowing, windowDesc.getWindowFn());
  assertEquals(trigger, windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
  assertEquals(Duration.standardSeconds(1000), windowDesc.getAllowedLateness());
}
 
Example 15
Source File: WatermarkCallbackExecutorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void noCallbacksShouldFire() throws Exception {
  CountDownLatch latch = new CountDownLatch(1);
  WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(Duration.standardMinutes(10));
  IntervalWindow window =
      new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(10)));
  executor.callOnGuaranteedFiring(
      create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch));

  executor.fireForWatermark(create, new Instant(0L).plus(Duration.standardMinutes(5)));
  assertThat(latch.await(500, TimeUnit.MILLISECONDS), equalTo(false));
}
 
Example 16
Source File: CombineTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesSideInputs.class})
public void testWindowedCombineGloballyAsSingletonView() {
  FixedWindows windowFn = FixedWindows.of(Duration.standardMinutes(1));
  final PCollectionView<Integer> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.timestamped(
                  TimestampedValue.of(1, new Instant(100)),
                  TimestampedValue.of(3, new Instant(100))))
          .apply("WindowSideInput", Window.into(windowFn))
          .apply("CombineSideInput", Sum.integersGlobally().asSingletonView());

  TimestampedValue<Void> nonEmptyElement = TimestampedValue.of(null, new Instant(100));
  TimestampedValue<Void> emptyElement = TimestampedValue.atMinimumTimestamp(null);
  PCollection<Integer> output =
      pipeline
          .apply(
              "CreateMainInput",
              Create.timestamped(nonEmptyElement, emptyElement).withCoder(VoidCoder.of()))
          .apply("WindowMainInput", Window.into(windowFn))
          .apply(
              "OutputSideInput",
              ParDo.of(
                      new DoFn<Void, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder(4, 0);
  PAssert.that(output)
      .inWindow(windowFn.assignWindow(nonEmptyElement.getTimestamp()))
      .containsInAnyOrder(4);
  PAssert.that(output)
      .inWindow(windowFn.assignWindow(emptyElement.getTimestamp()))
      .containsInAnyOrder(0);
  pipeline.run();
}
 
Example 17
Source File: CreateStreamTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testDiscardingMode() throws IOException {
  CreateStream<String> source =
      CreateStream.of(StringUtf8Coder.of(), batchDuration())
          .nextBatch(
              TimestampedValue.of("firstPane", new Instant(100)),
              TimestampedValue.of("alsoFirstPane", new Instant(200)))
          .advanceWatermarkForNextBatch(new Instant(1001L))
          .nextBatch(TimestampedValue.of("onTimePane", new Instant(500)))
          .advanceNextBatchWatermarkToInfinity()
          .nextBatch(
              TimestampedValue.of("finalLatePane", new Instant(750)),
              TimestampedValue.of("alsoFinalLatePane", new Instant(250)));

  FixedWindows windowFn = FixedWindows.of(Duration.millis(1000L));
  Duration allowedLateness = Duration.millis(5000L);
  PCollection<String> values =
      p.apply(source)
          .apply(
              Window.<String>into(windowFn)
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withEarlyFirings(AfterPane.elementCountAtLeast(2))
                          .withLateFirings(Never.ever()))
                  .discardingFiredPanes()
                  .withAllowedLateness(allowedLateness))
          .apply(WithKeys.of(1))
          .apply(GroupByKey.create())
          .apply(Values.create())
          .apply(Flatten.iterables());

  IntervalWindow window = windowFn.assignWindow(new Instant(100));
  PAssert.that(values)
      .inWindow(window)
      .containsInAnyOrder(
          "firstPane", "alsoFirstPane", "onTimePane", "finalLatePane", "alsoFinalLatePane");
  PAssert.that(values)
      .inCombinedNonLatePanes(window)
      .containsInAnyOrder("firstPane", "alsoFirstPane", "onTimePane");
  PAssert.that(values).inOnTimePane(window).containsInAnyOrder("onTimePane");
  PAssert.that(values)
      .inFinalPane(window)
      .containsInAnyOrder("finalLatePane", "alsoFinalLatePane");

  p.run();
}
 
Example 18
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the garbage collection time for a fixed window does not overflow the end of time.
 */
@Test
public void testFixedWindowEndOfTimeGarbageCollection() throws Exception {
  Duration allowedLateness = Duration.standardDays(365);
  Duration windowSize = Duration.millis(10);
  WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(windowSize);

  // This timestamp falls into a window where the end of the window is before the end of the
  // global window - the "end of time" - yet its expiration time is after.
  final Instant elementTimestamp =
      GlobalWindow.INSTANCE.maxTimestamp().minus(allowedLateness).plus(1);

  IntervalWindow window =
      Iterables.getOnlyElement(
          windowFn.assignWindows(
              windowFn.new AssignContext() {
                @Override
                public Object element() {
                  throw new UnsupportedOperationException();
                }

                @Override
                public Instant timestamp() {
                  return elementTimestamp;
                }

                @Override
                public BoundedWindow window() {
                  throw new UnsupportedOperationException();
                }
              }));

  assertTrue(window.maxTimestamp().isBefore(GlobalWindow.INSTANCE.maxTimestamp()));
  assertTrue(
      window.maxTimestamp().plus(allowedLateness).isAfter(GlobalWindow.INSTANCE.maxTimestamp()));

  // Test basic execution of a trigger using a non-combining window set and accumulating mode.

  WindowingStrategy<?, IntervalWindow> strategy =
      WindowingStrategy.of((WindowFn<?, IntervalWindow>) windowFn)
          .withTimestampCombiner(TimestampCombiner.EARLIEST)
          .withTrigger(AfterWatermark.pastEndOfWindow().withLateFirings(Never.ever()))
          .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
          .withAllowedLateness(allowedLateness);

  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());

  tester.injectElements(TimestampedValue.of(13, elementTimestamp));

  // Should fire ON_TIME pane and there will be a checkState that the cleanup time
  // is prior to timestamp max value
  tester.advanceInputWatermark(window.maxTimestamp());

  // Nothing in the ON_TIME pane (not governed by triggers, but by ReduceFnRunner)
  assertThat(tester.extractOutput(), emptyIterable());

  tester.injectElements(TimestampedValue.of(42, elementTimestamp));

  // Now the final pane should fire, demonstrating that the GC time was truncated
  tester.advanceInputWatermark(GlobalWindow.INSTANCE.maxTimestamp());
  assertThat(tester.extractOutput(), contains(isWindowedValue(equalTo(55))));
}
 
Example 19
Source File: CreateViewTransformTest.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void test() {

  final FixedWindows fixedwindows = FixedWindows.of(Duration.standardSeconds(1));

  final CreateViewTransform<String, Integer> viewTransform =
    new CreateViewTransform(new SumViewFn());

  final Instant ts1 = new Instant(1);
  final Instant ts2 = new Instant(100);
  final Instant ts3 = new Instant(300);
  final Watermark watermark = new Watermark(1003);
  final Instant ts4 = new Instant(1200);
  final Watermark watermark2 = new Watermark(1400);
  final Instant ts5 = new Instant(1600);
  final Instant ts6 = new Instant(1800);
  final Instant ts7 = new Instant(1900);
  final Watermark watermark3 = new Watermark(2100);


  final Transform.Context context = mock(Transform.Context.class);
  final TestOutputCollector<Integer> oc = new TestOutputCollector();
  viewTransform.prepare(context, oc);

  viewTransform.onData(WindowedValue.of(
    KV.of(null, "hello"), ts1, fixedwindows.assignWindow(ts1), PaneInfo.NO_FIRING));

  viewTransform.onData(WindowedValue.of(
    KV.of(null, "world"), ts2, fixedwindows.assignWindow(ts2), PaneInfo.NO_FIRING));

  viewTransform.onData(WindowedValue.of(
    KV.of(null, "hello"), ts3, fixedwindows.assignWindow(ts3), PaneInfo.NO_FIRING));

  viewTransform.onWatermark(watermark);

  // materialized data
  assertEquals(Arrays.asList(fixedwindows.assignWindow(ts1)), oc.outputs.get(0).getWindows());
  assertEquals(new Integer(3), oc.outputs.get(0).getValue());

  // check output watermark
  assertEquals(fixedwindows.assignWindow(ts1).maxTimestamp().getMillis(),
    oc.watermarks.get(0).getTimestamp());

  oc.outputs.clear();
  oc.watermarks.clear();


  viewTransform.onData(WindowedValue.of(
    KV.of(null, "a"), ts4, fixedwindows.assignWindow(ts4), PaneInfo.NO_FIRING));

  // do not emit anything
  viewTransform.onWatermark(watermark2);
  assertEquals(0, oc.outputs.size());
  assertEquals(0, oc.watermarks.size());

  viewTransform.onData(WindowedValue.of(
    KV.of(null, "a"), ts5, fixedwindows.assignWindow(ts5), PaneInfo.NO_FIRING));

  viewTransform.onData(WindowedValue.of(
    KV.of(null, "a"), ts6, fixedwindows.assignWindow(ts6), PaneInfo.NO_FIRING));

  viewTransform.onData(WindowedValue.of(
    KV.of(null, "b"), ts7, fixedwindows.assignWindow(ts7), PaneInfo.NO_FIRING));

  // emit windowed value
  viewTransform.onWatermark(watermark3);

  // materialized data
  assertEquals(Arrays.asList(fixedwindows.assignWindow(ts4)), oc.outputs.get(0).getWindows());
  assertEquals(new Integer(4), oc.outputs.get(0).getValue());

  // check output watermark
  assertEquals(fixedwindows.assignWindow(ts4).maxTimestamp().getMillis(),
    oc.watermarks.get(0).getTimestamp());

  oc.outputs.clear();

  viewTransform.close();
  assertEquals(0, oc.outputs.size());
}
 
Example 20
Source File: BeamWindowStepHandler.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public void handleStep( LogChannelInterface log, StepMeta stepMeta, Map<String, PCollection<KettleRow>> stepCollectionMap,
                                  Pipeline pipeline, RowMetaInterface inputRowMeta, List<StepMeta> previousSteps,
                                  PCollection<KettleRow> input ) throws KettleException {

  BeamWindowMeta beamWindowMeta = (BeamWindowMeta) stepMeta.getStepMetaInterface();

  if ( StringUtils.isEmpty( beamWindowMeta.getWindowType() ) ) {
    throw new KettleException( "Please specify a window type in Beam Window step '" + stepMeta.getName() + "'" );
  }

  String duration = transMeta.environmentSubstitute( beamWindowMeta.getDuration() );
  long durationSeconds = Const.toLong( duration, -1L );

  PCollection<KettleRow> stepPCollection;

  if ( BeamDefaults.WINDOW_TYPE_FIXED.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds <= 0 ) {
      throw new KettleException( "Please specify a valid positive window size (duration) for Beam window step '" + stepMeta.getName() + "'" );
    }

    FixedWindows fixedWindows = FixedWindows
      .of( Duration.standardSeconds( durationSeconds ) );
    stepPCollection = input.apply( Window.into( fixedWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_SLIDING.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds <= 0 ) {
      throw new KettleException( "Please specify a valid positive window size (duration) for Beam window step '" + stepMeta.getName() + "'" );
    }

    String every = transMeta.environmentSubstitute( beamWindowMeta.getEvery() );
    long everySeconds = Const.toLong( every, -1L );

    SlidingWindows slidingWindows = SlidingWindows
      .of( Duration.standardSeconds( durationSeconds ) )
      .every( Duration.standardSeconds( everySeconds ) );
    stepPCollection = input.apply( Window.into( slidingWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_SESSION.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds < 600 ) {
      throw new KettleException(
        "Please specify a window size (duration) of at least 600 (10 minutes) for Beam window step '" + stepMeta.getName() + "'.  This is the minimum gap between session windows." );
    }

    Sessions sessionWindows = Sessions
      .withGapDuration( Duration.standardSeconds( durationSeconds ) );
    stepPCollection = input.apply( Window.into( sessionWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_GLOBAL.equals( beamWindowMeta.getWindowType() ) ) {

    stepPCollection = input.apply( Window.into( new GlobalWindows() ) );

  } else {
    throw new KettleException( "Beam Window type '" + beamWindowMeta.getWindowType() + " is not supported in step '" + stepMeta.getName() + "'" );
  }

  // Now get window information about the window if we asked about it...
  //
  if ( StringUtils.isNotEmpty( beamWindowMeta.getStartWindowField() ) ||
    StringUtils.isNotEmpty( beamWindowMeta.getEndWindowField() ) ||
    StringUtils.isNotEmpty( beamWindowMeta.getMaxWindowField() ) ) {

    WindowInfoFn windowInfoFn = new WindowInfoFn(
      stepMeta.getName(),
      transMeta.environmentSubstitute( beamWindowMeta.getMaxWindowField() ),
      transMeta.environmentSubstitute( beamWindowMeta.getStartWindowField() ),
      transMeta.environmentSubstitute( beamWindowMeta.getMaxWindowField() ),
      JsonRowMeta.toJson( inputRowMeta ),
      stepPluginClasses,
      xpPluginClasses
    );

    stepPCollection = stepPCollection.apply( ParDo.of( windowInfoFn ) );
  }

  // Save this in the map
  //
  stepCollectionMap.put( stepMeta.getName(), stepPCollection );
  log.logBasic( "Handled step (WINDOW) : " + stepMeta.getName() + ", gets data from " + previousSteps.size() + " previous step(s)" );
}