org.apache.beam.sdk.transforms.windowing.SlidingWindows Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.windowing.SlidingWindows. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCountPerElementWithSlidingWindows() {
  PCollection<String> input =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of("a", new Instant(1)),
                  TimestampedValue.of("a", new Instant(2)),
                  TimestampedValue.of("b", new Instant(3)),
                  TimestampedValue.of("b", new Instant(4))))
          .apply(Window.into(SlidingWindows.of(Duration.millis(2)).every(Duration.millis(1))));
  PCollection<KV<String, Long>> output = input.apply(Count.perElement());
  PAssert.that(output)
      .containsInAnyOrder(
          KV.of("a", 1L),
          KV.of("a", 2L),
          KV.of("a", 1L),
          KV.of("b", 1L),
          KV.of("b", 2L),
          KV.of("b", 1L));
  pipeline.run();
}
 
Example #2
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryCombineWithSlidingWindows() {
  PCollection<Integer> input =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of(1, new Instant(1)),
                  TimestampedValue.of(3, new Instant(2)),
                  TimestampedValue.of(5, new Instant(3))))
          .apply(Window.into(SlidingWindows.of(Duration.millis(3)).every(Duration.millis(1))))
          .apply(
              Combine.globally(
                      Combine.BinaryCombineFn.of(
                          (SerializableBiFunction<Integer, Integer, Integer>)
                              (integer1, integer2) -> integer1 > integer2 ? integer1 : integer2))
                  .withoutDefaults());
  PAssert.that(input).containsInAnyOrder(1, 3, 5, 5, 5);
  pipeline.run();
}
 
Example #3
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCombinePerKeyWithSlidingWindows() {
  PCollection<KV<Integer, Integer>> input =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of(KV.of(1, 1), new Instant(1)),
                  TimestampedValue.of(KV.of(1, 3), new Instant(2)),
                  TimestampedValue.of(KV.of(1, 5), new Instant(3)),
                  TimestampedValue.of(KV.of(1, 2), new Instant(1)),
                  TimestampedValue.of(KV.of(1, 4), new Instant(2)),
                  TimestampedValue.of(KV.of(1, 6), new Instant(3))))
          .apply(Window.into(SlidingWindows.of(Duration.millis(3)).every(Duration.millis(1))))
          .apply(Sum.integersPerKey());
  PAssert.that(input)
      .containsInAnyOrder(
          KV.of(1, 1 + 2),
          KV.of(1, 1 + 2 + 3 + 4),
          KV.of(1, 1 + 3 + 5 + 2 + 4 + 6),
          KV.of(1, 3 + 4 + 5 + 6),
          KV.of(1, 5 + 6));
  pipeline.run();
}
 
Example #4
Source File: BeamAggregationRel.java    From beam with Apache License 2.0 6 votes vote down vote up
private NodeStats computeWindowingCostEffect(NodeStats inputStat) {
  if (windowFn == null) {
    return inputStat;
  }
  WindowFn w = windowFn;
  double multiplicationFactor = 1;
  // If the window is SlidingWindow, the number of tuples will increase. (Because, some of the
  // tuples repeat in multiple windows).
  if (w instanceof SlidingWindows) {
    multiplicationFactor =
        ((double) ((SlidingWindows) w).getSize().getStandardSeconds())
            / ((SlidingWindows) w).getPeriod().getStandardSeconds();
  }

  return NodeStats.create(
      inputStat.getRowCount() * multiplicationFactor,
      inputStat.getRate() * multiplicationFactor,
      BeamIOSourceRel.CONSTANT_WINDOW_SIZE);
}
 
Example #5
Source File: WindowingStrategyTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
public static WindowFn<?, ?> windowFnFromProto(FunctionSpec windowFnSpec) {
  try {
    String s = windowFnSpec.getUrn();
    if (s.equals(getUrn(GlobalWindowsPayload.Enum.PROPERTIES))) {
      return new GlobalWindows();
    } else if (s.equals(getUrn(FixedWindowsPayload.Enum.PROPERTIES))) {
      FixedWindowsPayload fixedParams = FixedWindowsPayload.parseFrom(windowFnSpec.getPayload());
      return FixedWindows.of(Duration.millis(Durations.toMillis(fixedParams.getSize())))
          .withOffset(Duration.millis(Timestamps.toMillis(fixedParams.getOffset())));
    } else if (s.equals(getUrn(SlidingWindowsPayload.Enum.PROPERTIES))) {
      SlidingWindowsPayload slidingParams =
          SlidingWindowsPayload.parseFrom(windowFnSpec.getPayload());
      return SlidingWindows.of(Duration.millis(Durations.toMillis(slidingParams.getSize())))
          .every(Duration.millis(Durations.toMillis(slidingParams.getPeriod())))
          .withOffset(Duration.millis(Timestamps.toMillis(slidingParams.getOffset())));
    } else if (s.equals(getUrn(SessionWindowsPayload.Enum.PROPERTIES))) {
      SessionWindowsPayload sessionParams =
          SessionWindowsPayload.parseFrom(windowFnSpec.getPayload());
      return Sessions.withGapDuration(
          Duration.millis(Durations.toMillis(sessionParams.getGapSize())));
    } else if (s.equals(SERIALIZED_JAVA_WINDOWFN_URN)) {
      return (WindowFn<?, ?>)
          SerializableUtils.deserializeFromByteArray(
              windowFnSpec.getPayload().toByteArray(), "WindowFn");
    } else {
      throw new IllegalArgumentException(
          "Unknown or unsupported WindowFn: " + windowFnSpec.getUrn());
    }
  } catch (InvalidProtocolBufferException e) {
    throw new IllegalArgumentException(
        String.format(
            "%s for %s with URN %s did not contain expected proto message for payload",
            FunctionSpec.class.getSimpleName(),
            WindowFn.class.getSimpleName(),
            windowFnSpec.getUrn()),
        e);
  }
}
 
Example #6
Source File: WindowRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<IndexedRecord> expand(PCollection<IndexedRecord> indexedRecordPCollection) {
    PCollection<IndexedRecord> windowed_items;

    if (properties.windowLength.getValue() < 1) {
        TalendRuntimeException.build(CommonErrorCodes.UNEXPECTED_ARGUMENT).setAndThrow(properties.windowLength.getName(),
                String.valueOf(properties.windowLength.getValue()));
    }

    // Session Window
    if (properties.windowSession.getValue()) {
        windowed_items = indexedRecordPCollection.apply(Window.<IndexedRecord> into(
                Sessions.withGapDuration(Duration.millis(properties.windowLength.getValue().intValue()))));
        return windowed_items;
    }

    if (properties.windowSlideLength.getValue() < 1) {
        // Fixed Window
        windowed_items = indexedRecordPCollection.apply(
                Window.<IndexedRecord> into(FixedWindows.of(new Duration(properties.windowLength.getValue().intValue()))));
    } else {
        // Sliding Window
        windowed_items = indexedRecordPCollection.apply(
                Window.<IndexedRecord> into(SlidingWindows.of(new Duration(properties.windowLength.getValue().intValue()))
                        .every(new Duration(properties.windowSlideLength.getValue().intValue()))));
    }
    return windowed_items;
}
 
Example #7
Source File: MultiStepCombineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiStepCombineWindowed() {
  SlidingWindows windowFn = SlidingWindows.of(Duration.millis(6L)).every(Duration.millis(3L));
  PCollection<KV<String, Long>> combined =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of(KV.of("foo", 1L), new Instant(1L)),
                  TimestampedValue.of(KV.of("bar", 2L), new Instant(2L)),
                  TimestampedValue.of(KV.of("bizzle", 3L), new Instant(3L)),
                  TimestampedValue.of(KV.of("bar", 4L), new Instant(4L)),
                  TimestampedValue.of(KV.of("bizzle", 11L), new Instant(11L))))
          .apply(Window.into(windowFn))
          .apply(Combine.perKey(new MultiStepCombineFn()));

  PAssert.that("Windows should combine only elements in their windows", combined)
      .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(6L)))
      .containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 6L), KV.of("bizzle", 3L));
  PAssert.that("Elements should appear in all the windows they are assigned to", combined)
      .inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(6L)))
      .containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 2L));
  PAssert.that(combined)
      .inWindow(new IntervalWindow(new Instant(6L), Duration.millis(6L)))
      .containsInAnyOrder(KV.of("bizzle", 11L));
  PAssert.that(combined)
      .containsInAnyOrder(
          KV.of("foo", 1L),
          KV.of("foo", 1L),
          KV.of("bar", 6L),
          KV.of("bar", 2L),
          KV.of("bar", 4L),
          KV.of("bizzle", 11L),
          KV.of("bizzle", 11L),
          KV.of("bizzle", 3L),
          KV.of("bizzle", 3L));
  pipeline.run();
}
 
Example #8
Source File: SparkCombineFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSlidingCombineFnNonMerging() throws Exception {
  WindowingStrategy<Object, IntervalWindow> strategy =
      WindowingStrategy.of(SlidingWindows.of(Duration.millis(3000)).every(Duration.millis(1000)));

  SparkCombineFn<KV<String, Integer>, Integer, Long, Long> sparkCombineFn =
      SparkCombineFn.keyed(
          combineFn,
          opts,
          Collections.emptyMap(),
          strategy,
          SparkCombineFn.WindowedAccumulator.Type.NON_MERGING);

  Instant now = Instant.ofEpochMilli(0);
  WindowedValue<KV<String, Integer>> first =
      input("key", 1, now.plus(5000), strategy.getWindowFn());
  WindowedValue<KV<String, Integer>> second =
      input("key", 2, now.plus(1500), strategy.getWindowFn());
  WindowedValue<KV<String, Integer>> third =
      input("key", 3, now.plus(500), strategy.getWindowFn());
  SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c1 =
      sparkCombineFn.createCombiner(first);
  SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c2 =
      sparkCombineFn.createCombiner(third);
  sparkCombineFn.mergeValue(c1, second);
  SparkCombineFn.WindowedAccumulator<KV<String, Integer>, Integer, Long, ?> c3 =
      sparkCombineFn.mergeCombiners(c1, c2);
  Iterable<WindowedValue<Long>> output = sparkCombineFn.extractOutput(c3);
  assertEquals(7, Iterables.size(output));
  List<String> format =
      StreamSupport.stream(output.spliterator(), false)
          .map(val -> val.getValue() + ":" + val.getTimestamp().getMillis())
          .collect(Collectors.toList());
  assertUnorderedEquals(
      Lists.newArrayList("3:999", "5:1999", "5:2999", "2:3999", "1:5999", "1:6999", "1:7999"),
      format);
}
 
Example #9
Source File: WindowingStrategyTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<ToProtoAndBackSpec> data() {
  return ImmutableList.of(
      toProtoAndBackSpec(WindowingStrategy.globalDefault()),
      toProtoAndBackSpec(
          WindowingStrategy.of(
              FixedWindows.of(Duration.millis(11)).withOffset(Duration.millis(3)))),
      toProtoAndBackSpec(
          WindowingStrategy.of(
              SlidingWindows.of(Duration.millis(37))
                  .every(Duration.millis(3))
                  .withOffset(Duration.millis(2)))),
      toProtoAndBackSpec(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(389)))),
      toProtoAndBackSpec(
          WindowingStrategy.of(REPRESENTATIVE_WINDOW_FN)
              .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS)
              .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
              .withTrigger(REPRESENTATIVE_TRIGGER)
              .withAllowedLateness(Duration.millis(71))
              .withTimestampCombiner(TimestampCombiner.EARLIEST)),
      toProtoAndBackSpec(
          WindowingStrategy.of(REPRESENTATIVE_WINDOW_FN)
              .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY)
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
              .withTrigger(REPRESENTATIVE_TRIGGER)
              .withAllowedLateness(Duration.millis(93))
              .withTimestampCombiner(TimestampCombiner.LATEST)),
      toProtoAndBackSpec(
          WindowingStrategy.of(REPRESENTATIVE_WINDOW_FN)
              .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY)
              .withMode(AccumulationMode.RETRACTING_FIRED_PANES)
              .withTrigger(REPRESENTATIVE_TRIGGER)
              .withAllowedLateness(Duration.millis(100))
              .withTimestampCombiner(TimestampCombiner.LATEST)));
}
 
Example #10
Source File: WindowIntoTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<WindowFn<?, ?>> data() {
  // This pipeline exists for construction, not to run any test.
  return ImmutableList.<WindowFn<?, ?>>builder()
      .add(FixedWindows.of(Duration.standardMinutes(10L)))
      .add(new GlobalWindows())
      .add(Sessions.withGapDuration(Duration.standardMinutes(15L)))
      .add(SlidingWindows.of(Duration.standardMinutes(5L)).every(Duration.standardMinutes(1L)))
      .add(new CustomWindows())
      .build();
}
 
Example #11
Source File: PAssertTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Tests that windowed {@code containsInAnyOrder} is actually order-independent. */
@Test
@Category(ValidatesRunner.class)
public void testWindowedContainsInAnyOrder() throws Exception {
  PCollection<Integer> pcollection =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of(1, new Instant(100L)),
                  TimestampedValue.of(2, new Instant(200L)),
                  TimestampedValue.of(3, new Instant(300L)),
                  TimestampedValue.of(4, new Instant(400L))))
          .apply(
              Window.into(
                  SlidingWindows.of(Duration.millis(200L))
                      .every(Duration.millis(100L))
                      .withOffset(Duration.millis(50L))));

  PAssert.that(pcollection)
      .inWindow(new IntervalWindow(new Instant(-50L), new Instant(150L)))
      .containsInAnyOrder(1);
  PAssert.that(pcollection)
      .inWindow(new IntervalWindow(new Instant(50L), new Instant(250L)))
      .containsInAnyOrder(2, 1);
  PAssert.that(pcollection)
      .inWindow(new IntervalWindow(new Instant(150L), new Instant(350L)))
      .containsInAnyOrder(2, 3);
  PAssert.that(pcollection)
      .inWindow(new IntervalWindow(new Instant(250L), new Instant(450L)))
      .containsInAnyOrder(4, 3);
  PAssert.that(pcollection)
      .inWindow(new IntervalWindow(new Instant(350L), new Instant(550L)))
      .containsInAnyOrder(4);
  pipeline.run();
}
 
Example #12
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testGroupByKeyMultipleWindows() {
  PCollection<KV<String, Integer>> windowedInput =
      p.apply(
              Create.timestamped(
                  TimestampedValue.of(KV.of("foo", 1), new Instant(1)),
                  TimestampedValue.of(KV.of("foo", 4), new Instant(4)),
                  TimestampedValue.of(KV.of("bar", 3), new Instant(3))))
          .apply(
              Window.into(SlidingWindows.of(Duration.millis(5L)).every(Duration.millis(3L))));

  PCollection<KV<String, Iterable<Integer>>> output = windowedInput.apply(GroupByKey.create());

  PAssert.that(output)
      .satisfies(
          containsKvs(kv("foo", 1, 4), kv("foo", 1), kv("foo", 4), kv("bar", 3), kv("bar", 3)));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(5L)))
      .satisfies(containsKvs(kv("foo", 1)));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(5L)))
      .satisfies(containsKvs(kv("foo", 1, 4), kv("bar", 3)));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(3L), Duration.millis(5L)))
      .satisfies(containsKvs(kv("foo", 4), kv("bar", 3)));

  p.run();
}
 
Example #13
Source File: WaitTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testWaitWithSignalInSlidingWindows() {
  testWaitWithParameters(
      Duration.standardMinutes(1) /* duration */,
      Duration.standardSeconds(15) /* lateness */,
      20 /* numMainElements */,
      FixedWindows.of(Duration.standardSeconds(15)),
      20 /* numSignalElements */,
      SlidingWindows.of(Duration.standardSeconds(7)).every(Duration.standardSeconds(1)));
}
 
Example #14
Source File: WindowedWordCount.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String outputFilePath = args[0];
  final String windowType = args[1];

  final Window<KV<String, Long>> windowFn;
  if (windowType.equals("fixed")) {
    windowFn = Window.<KV<String, Long>>into(FixedWindows.of(Duration.standardSeconds(5)));
  } else {
    windowFn = Window.<KV<String, Long>>into(SlidingWindows.of(Duration.standardSeconds(10))
      .every(Duration.standardSeconds(5)));
  }

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WindowedWordCount");

  final Pipeline p = Pipeline.create(options);

  getSource(p, args)
    .apply(windowFn)
    .apply(Sum.longsPerKey())
    .apply(MapElements.<KV<String, Long>, String>via(new SimpleFunction<KV<String, Long>, String>() {
      @Override
      public String apply(final KV<String, Long> kv) {
        return kv.getKey() + ": " + kv.getValue();
      }
    }))
    .apply(new WriteOneFilePerWindow(outputFilePath, 1));

  p.run().waitUntilFinish();
}
 
Example #15
Source File: AutoCompleteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWindowedAutoComplete() {
  List<TimestampedValue<String>> words =
      Arrays.asList(
          TimestampedValue.of("xA", new Instant(1)),
          TimestampedValue.of("xA", new Instant(1)),
          TimestampedValue.of("xB", new Instant(1)),
          TimestampedValue.of("xB", new Instant(2)),
          TimestampedValue.of("xB", new Instant(2)));

  PCollection<String> input = p.apply(Create.timestamped(words));

  PCollection<KV<String, List<CompletionCandidate>>> output =
      input
          .apply(Window.into(SlidingWindows.of(new Duration(2))))
          .apply(new ComputeTopCompletions(2, recursive));

  PAssert.that(output)
      .containsInAnyOrder(
          // Window [0, 2)
          KV.of("x", parseList("xA:2", "xB:1")),
          KV.of("xA", parseList("xA:2")),
          KV.of("xB", parseList("xB:1")),

          // Window [1, 3)
          KV.of("x", parseList("xB:3", "xA:2")),
          KV.of("xA", parseList("xA:2")),
          KV.of("xB", parseList("xB:3")),

          // Window [2, 3)
          KV.of("x", parseList("xB:2")),
          KV.of("xB", parseList("xB:2")));
  p.run().waitUntilFinish();
}
 
Example #16
Source File: TrafficMaxLaneFlow.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractFlowInfoFn()))
      // map the incoming data stream into sliding windows.
      .apply(
          Window.into(
              SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                  .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new MaxLaneFlow())
      .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}
 
Example #17
Source File: TrafficRoutes.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractStationSpeedFn()))
      // map the incoming data stream into sliding windows.
      .apply(
          Window.into(
              SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                  .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new TrackSpeed())
      .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}
 
Example #18
Source File: WindowedBroadcast.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String outputFilePath = args[0];

  final Window<Long> windowFn = Window
    .<Long>into(SlidingWindows.of(Duration.standardSeconds(2))
      .every(Duration.standardSeconds(1)));

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WindowedBroadcast");

  final Pipeline p = Pipeline.create(options);

  final PCollection<Long> windowedElements = getSource(p).apply(windowFn);
  final PCollectionView<List<Long>> windowedView = windowedElements.apply(View.asList());

  windowedElements.apply(ParDo.of(new DoFn<Long, String>() {
      @ProcessElement
      public void processElement(final ProcessContext c) {
        final Long anElementInTheWindow = c.element();
        final List<Long> allElementsInTheWindow = c.sideInput(windowedView);
        System.out.println(anElementInTheWindow + " / " + allElementsInTheWindow);
        if (!allElementsInTheWindow.contains(anElementInTheWindow)) {
          throw new RuntimeException(anElementInTheWindow + " not in " + allElementsInTheWindow.toString());
        } else {
          c.output(anElementInTheWindow + " is in " + allElementsInTheWindow);
        }
      }
    }).withSideInputs(windowedView)
  ).apply(new WriteOneFilePerWindow(outputFilePath, 1));

  p.run().waitUntilFinish();
}
 
Example #19
Source File: BeamWindowStepHandler.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public void handleStep( LogChannelInterface log, StepMeta stepMeta, Map<String, PCollection<KettleRow>> stepCollectionMap,
                                  Pipeline pipeline, RowMetaInterface inputRowMeta, List<StepMeta> previousSteps,
                                  PCollection<KettleRow> input ) throws KettleException {

  BeamWindowMeta beamWindowMeta = (BeamWindowMeta) stepMeta.getStepMetaInterface();

  if ( StringUtils.isEmpty( beamWindowMeta.getWindowType() ) ) {
    throw new KettleException( "Please specify a window type in Beam Window step '" + stepMeta.getName() + "'" );
  }

  String duration = transMeta.environmentSubstitute( beamWindowMeta.getDuration() );
  long durationSeconds = Const.toLong( duration, -1L );

  PCollection<KettleRow> stepPCollection;

  if ( BeamDefaults.WINDOW_TYPE_FIXED.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds <= 0 ) {
      throw new KettleException( "Please specify a valid positive window size (duration) for Beam window step '" + stepMeta.getName() + "'" );
    }

    FixedWindows fixedWindows = FixedWindows
      .of( Duration.standardSeconds( durationSeconds ) );
    stepPCollection = input.apply( Window.into( fixedWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_SLIDING.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds <= 0 ) {
      throw new KettleException( "Please specify a valid positive window size (duration) for Beam window step '" + stepMeta.getName() + "'" );
    }

    String every = transMeta.environmentSubstitute( beamWindowMeta.getEvery() );
    long everySeconds = Const.toLong( every, -1L );

    SlidingWindows slidingWindows = SlidingWindows
      .of( Duration.standardSeconds( durationSeconds ) )
      .every( Duration.standardSeconds( everySeconds ) );
    stepPCollection = input.apply( Window.into( slidingWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_SESSION.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds < 600 ) {
      throw new KettleException(
        "Please specify a window size (duration) of at least 600 (10 minutes) for Beam window step '" + stepMeta.getName() + "'.  This is the minimum gap between session windows." );
    }

    Sessions sessionWindows = Sessions
      .withGapDuration( Duration.standardSeconds( durationSeconds ) );
    stepPCollection = input.apply( Window.into( sessionWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_GLOBAL.equals( beamWindowMeta.getWindowType() ) ) {

    stepPCollection = input.apply( Window.into( new GlobalWindows() ) );

  } else {
    throw new KettleException( "Beam Window type '" + beamWindowMeta.getWindowType() + " is not supported in step '" + stepMeta.getName() + "'" );
  }

  // Now get window information about the window if we asked about it...
  //
  if ( StringUtils.isNotEmpty( beamWindowMeta.getStartWindowField() ) ||
    StringUtils.isNotEmpty( beamWindowMeta.getEndWindowField() ) ||
    StringUtils.isNotEmpty( beamWindowMeta.getMaxWindowField() ) ) {

    WindowInfoFn windowInfoFn = new WindowInfoFn(
      stepMeta.getName(),
      transMeta.environmentSubstitute( beamWindowMeta.getMaxWindowField() ),
      transMeta.environmentSubstitute( beamWindowMeta.getStartWindowField() ),
      transMeta.environmentSubstitute( beamWindowMeta.getMaxWindowField() ),
      JsonRowMeta.toJson( inputRowMeta ),
      stepPluginClasses,
      xpPluginClasses
    );

    stepPCollection = stepPCollection.apply( ParDo.of( windowInfoFn ) );
  }

  // Save this in the map
  //
  stepCollectionMap.put( stepMeta.getName(), stepPCollection );
  log.logBasic( "Handled step (WINDOW) : " + stepMeta.getName() + ", gets data from " + previousSteps.size() + " previous step(s)" );
}
 
Example #20
Source File: WindowEvaluatorFactoryTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void multipleWindowsWindowFnSucceeds() throws Exception {
  Duration windowDuration = Duration.standardDays(6);
  Duration slidingBy = Duration.standardDays(3);
  Window<Long> transform = Window.into(SlidingWindows.of(windowDuration).every(slidingBy));
  PCollection<Long> windowed = input.apply(transform);

  CommittedBundle<Long> inputBundle = createInputBundle();
  UncommittedBundle<Long> outputBundle = createOutputBundle(windowed, inputBundle);

  TransformResult<Long> result = runEvaluator(windowed, inputBundle);

  assertThat(Iterables.getOnlyElement(result.getOutputBundles()), Matchers.equalTo(outputBundle));
  CommittedBundle<Long> committed = outputBundle.commit(Instant.now());

  BoundedWindow w1 = new IntervalWindow(EPOCH, EPOCH.plus(windowDuration));
  BoundedWindow w2 =
      new IntervalWindow(EPOCH.plus(slidingBy), EPOCH.plus(slidingBy).plus(windowDuration));
  BoundedWindow wMinus1 = new IntervalWindow(EPOCH.minus(windowDuration), EPOCH);
  BoundedWindow wMinusSlide =
      new IntervalWindow(EPOCH.minus(windowDuration).plus(slidingBy), EPOCH.plus(slidingBy));

  assertThat(
      committed.getElements(),
      containsInAnyOrder(
          // Value in global window mapped to one windowed value in multiple windows
          isWindowedValue(
              valueInGlobalWindow.getValue(),
              valueInGlobalWindow.getTimestamp(),
              ImmutableSet.of(w1, wMinusSlide),
              NO_FIRING),

          // Value in interval window mapped to one windowed value in multiple windows
          isWindowedValue(
              valueInIntervalWindow.getValue(),
              valueInIntervalWindow.getTimestamp(),
              ImmutableSet.of(wMinus1, wMinusSlide),
              valueInIntervalWindow.getPane()),

          // Value in three windows mapped to three windowed values in the same multiple windows
          isWindowedValue(
              valueInGlobalAndTwoIntervalWindows.getValue(),
              valueInGlobalAndTwoIntervalWindows.getTimestamp(),
              ImmutableSet.of(w1, w2),
              valueInGlobalAndTwoIntervalWindows.getPane()),
          isWindowedValue(
              valueInGlobalAndTwoIntervalWindows.getValue(),
              valueInGlobalAndTwoIntervalWindows.getTimestamp(),
              ImmutableSet.of(w1, w2),
              valueInGlobalAndTwoIntervalWindows.getPane()),
          isWindowedValue(
              valueInGlobalAndTwoIntervalWindows.getValue(),
              valueInGlobalAndTwoIntervalWindows.getTimestamp(),
              ImmutableSet.of(w1, w2),
              valueInGlobalAndTwoIntervalWindows.getPane())));
}
 
Example #21
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that for a simple sequence of elements on the same key, the given GABW implementation
 * correctly groups and combines them according to sliding windows.
 *
 * <p>In the input here, each element occurs in multiple windows.
 */
public static void combinesElementsInSlidingWindows(
    GroupAlsoByWindowDoFnFactory<String, Long, Long> gabwFactory,
    CombineFn<Long, ?, Long> combineFn)
    throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST);

  List<WindowedValue<KV<String, Long>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "k",
          WindowedValue.of(
              1L,
              new Instant(5),
              Arrays.asList(window(-10, 10), window(0, 20)),
              PaneInfo.NO_FIRING),
          WindowedValue.of(
              2L,
              new Instant(15),
              Arrays.asList(window(0, 20), window(10, 30)),
              PaneInfo.NO_FIRING),
          WindowedValue.of(
              4L,
              new Instant(18),
              Arrays.asList(window(0, 20), window(10, 30)),
              PaneInfo.NO_FIRING));

  assertThat(result, hasSize(3));

  TimestampedValue<KV<String, Long>> item0 = getOnlyElementInWindow(result, window(-10, 10));
  assertThat(item0.getValue().getKey(), equalTo("k"));
  assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L))));
  assertThat(item0.getTimestamp(), equalTo(new Instant(5L)));

  TimestampedValue<KV<String, Long>> item1 = getOnlyElementInWindow(result, window(0, 20));
  assertThat(item1.getValue().getKey(), equalTo("k"));
  assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L, 4L))));
  // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
  assertThat(item1.getTimestamp(), equalTo(new Instant(10L)));

  TimestampedValue<KV<String, Long>> item2 = getOnlyElementInWindow(result, window(10, 30));
  assertThat(item2.getValue().getKey(), equalTo("k"));
  assertThat(item2.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(2L, 4L))));
  // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
  assertThat(item2.getTimestamp(), equalTo(new Instant(20L)));
}
 
Example #22
Source File: GroupAlsoByWindowProperties.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that for a simple sequence of elements on the same key, the given GABW implementation
 * correctly groups them into sliding windows.
 *
 * <p>In the input here, each element occurs in multiple windows.
 */
public static void groupsElementsIntoSlidingWindowsWithMinTimestamp(
    GroupAlsoByWindowDoFnFactory<String, String, Iterable<String>> gabwFactory) throws Exception {

  WindowingStrategy<?, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST);

  List<WindowedValue<KV<String, Iterable<String>>>> result =
      runGABW(
          gabwFactory,
          windowingStrategy,
          "key",
          WindowedValue.of(
              "v1",
              new Instant(5),
              Arrays.asList(window(-10, 10), window(0, 20)),
              PaneInfo.NO_FIRING),
          WindowedValue.of(
              "v2",
              new Instant(15),
              Arrays.asList(window(0, 20), window(10, 30)),
              PaneInfo.NO_FIRING));

  assertThat(result, hasSize(3));

  TimestampedValue<KV<String, Iterable<String>>> item0 =
      getOnlyElementInWindow(result, window(-10, 10));
  assertThat(item0.getValue().getValue(), contains("v1"));
  assertThat(item0.getTimestamp(), equalTo(new Instant(5)));

  TimestampedValue<KV<String, Iterable<String>>> item1 =
      getOnlyElementInWindow(result, window(0, 20));
  assertThat(item1.getValue().getValue(), containsInAnyOrder("v1", "v2"));
  // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
  assertThat(item1.getTimestamp(), equalTo(new Instant(10)));

  TimestampedValue<KV<String, Iterable<String>>> item2 =
      getOnlyElementInWindow(result, window(10, 30));
  assertThat(item2.getValue().getValue(), contains("v2"));
  // Timestamp adjusted by WindowFn to exceed the end of the prior sliding window
  assertThat(item2.getTimestamp(), equalTo(new Instant(20)));
}
 
Example #23
Source File: StreamingSideInputDoFnRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultipleWindowsNotReady() throws Exception {
  PCollectionView<String> view = createView();

  when(stepContext.getSideInputNotifications())
      .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
  when(stepContext.issueSideInputFetch(
          eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
      .thenReturn(false);

  ListOutputManager outputManager = new ListOutputManager();

  List<PCollectionView<String>> views = Arrays.asList(view);
  StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
  StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
      createRunner(
          SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)),
          outputManager,
          views,
          sideInputFetcher);

  IntervalWindow window1 = new IntervalWindow(new Instant(0), new Instant(10));
  IntervalWindow window2 = new IntervalWindow(new Instant(-5), new Instant(5));
  long timestamp = 1L;

  WindowedValue<String> elem =
      WindowedValue.of(
          "e", new Instant(timestamp), Arrays.asList(window1, window2), PaneInfo.NO_FIRING);

  runner.startBundle();
  runner.processElement(elem);
  runner.finishBundle();

  assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());

  // Verify that we added the element to an appropriate tag list, and that we buffered the element
  // in both windows separately
  ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
      state.state(
          StateNamespaces.global(),
          StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));

  Map<IntervalWindow, Set<GlobalDataRequest>> blockedMap = blockedMapState.read();

  assertThat(
      blockedMap.get(window1),
      equalTo(
          Collections.singleton(
              Windmill.GlobalDataRequest.newBuilder()
                  .setDataId(
                      Windmill.GlobalDataId.newBuilder()
                          .setTag(view.getTagInternal().getId())
                          .setVersion(
                              ByteString.copyFrom(
                                  CoderUtils.encodeToByteArray(
                                      IntervalWindow.getCoder(), window1)))
                          .build())
                  .setExistenceWatermarkDeadline(9000)
                  .build())));

  assertThat(
      blockedMap.get(window2),
      equalTo(
          Collections.singleton(
              Windmill.GlobalDataRequest.newBuilder()
                  .setDataId(
                      Windmill.GlobalDataId.newBuilder()
                          .setTag(view.getTagInternal().getId())
                          .setVersion(
                              ByteString.copyFrom(
                                  CoderUtils.encodeToByteArray(
                                      IntervalWindow.getCoder(), window1)))
                          .build())
                  .setExistenceWatermarkDeadline(9000)
                  .build())));

  assertThat(
      sideInputFetcher.elementBag(window1).read(),
      contains(Iterables.get(elem.explodeWindows(), 0)));

  assertThat(
      sideInputFetcher.elementBag(window2).read(),
      contains(Iterables.get(elem.explodeWindows(), 1)));

  assertEquals(sideInputFetcher.watermarkHold(window1).read(), new Instant(timestamp));
  assertEquals(sideInputFetcher.watermarkHold(window2).read(), new Instant(timestamp));
}
 
Example #24
Source File: StreamingGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingWindowsAndLateData() throws Exception {
  MetricsContainerImpl container = new MetricsContainerImpl("step");
  MetricsEnvironment.setCurrentContainer(container);
  TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
  ListOutputManager outputManager = new ListOutputManager();
  WindowingStrategy<? super String, IntervalWindow> windowingStrategy =
      WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST);
  GroupAlsoByWindowFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
      StreamingGroupAlsoByWindowsDoFns.createForIterable(
          windowingStrategy,
          new StepContextStateInternalsFactory<String>(stepContext),
          StringUtf8Coder.of());
  DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
      makeRunner(outputTag, outputManager, windowingStrategy, fn);

  when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));

  runner.startBundle();

  WorkItem.Builder workItem1 = WorkItem.newBuilder();
  workItem1.setKey(ByteString.copyFromUtf8(KEY));
  workItem1.setWorkToken(WORK_TOKEN);
  InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
  messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);

  Coder<String> valueCoder = StringUtf8Coder.of();
  addElement(
      messageBundle,
      Arrays.asList(window(-10, 10), window(0, 20)),
      new Instant(5),
      valueCoder,
      "v1");
  addElement(
      messageBundle,
      Arrays.asList(window(-10, 10), window(0, 20)),
      new Instant(2),
      valueCoder,
      "v0");
  addElement(
      messageBundle,
      Arrays.asList(window(0, 20), window(10, 30)),
      new Instant(15),
      valueCoder,
      "v2");

  runner.processElement(createValue(workItem1, valueCoder));

  runner.finishBundle();
  runner.startBundle();

  WorkItem.Builder workItem2 = WorkItem.newBuilder();
  workItem2.setKey(ByteString.copyFromUtf8(KEY));
  workItem2.setWorkToken(WORK_TOKEN);
  addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
  addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
  addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
  when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));

  runner.processElement(createValue(workItem2, valueCoder));

  runner.finishBundle();

  List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);

  assertThat(result.size(), equalTo(3));

  assertThat(
      result,
      containsInAnyOrder(
          WindowMatchers.isSingleWindowedValue(
              isKv(equalTo(KEY), emptyIterable()),
              equalTo(window(-10, 10).maxTimestamp()),
              equalTo(window(-10, 10))),

          // For this sliding window, the minimum output timestmap was 10, since we didn't want to
          // overlap with the previous window that was [-10, 10).
          WindowMatchers.isSingleWindowedValue(
              isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")),
              equalTo(window(-10, 10).maxTimestamp().plus(1)),
              equalTo(window(0, 20))),
          WindowMatchers.isSingleWindowedValue(
              isKv(equalTo(KEY), containsInAnyOrder("v2")),
              equalTo(window(0, 20).maxTimestamp().plus(1)),
              equalTo(window(10, 30)))));

  long droppedValues =
      container
          .getCounter(
              MetricName.named(
                  LateDataDroppingDoFnRunner.class,
                  LateDataDroppingDoFnRunner.DROPPED_DUE_TO_LATENESS))
          .getCumulative()
          .longValue();
  assertThat(droppedValues, equalTo(2L));
}
 
Example #25
Source File: StreamingGroupAlsoByWindowFnsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingWindows() throws Exception {
  TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
  ListOutputManager outputManager = new ListOutputManager();
  DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
      makeRunner(
          outputTag,
          outputManager,
          WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)))
              .withTimestampCombiner(TimestampCombiner.EARLIEST));

  when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(5));

  runner.startBundle();

  WorkItem.Builder workItem1 = WorkItem.newBuilder();
  workItem1.setKey(ByteString.copyFromUtf8(KEY));
  workItem1.setWorkToken(WORK_TOKEN);
  InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
  messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);

  Coder<String> valueCoder = StringUtf8Coder.of();
  addElement(
      messageBundle,
      Arrays.asList(window(-10, 10), window(0, 20)),
      new Instant(5),
      valueCoder,
      "v1");
  addElement(
      messageBundle,
      Arrays.asList(window(-10, 10), window(0, 20)),
      new Instant(2),
      valueCoder,
      "v0");
  addElement(
      messageBundle,
      Arrays.asList(window(0, 20), window(10, 30)),
      new Instant(15),
      valueCoder,
      "v2");

  runner.processElement(createValue(workItem1, valueCoder));

  runner.finishBundle();
  runner.startBundle();

  WorkItem.Builder workItem2 = WorkItem.newBuilder();
  workItem2.setKey(ByteString.copyFromUtf8(KEY));
  workItem2.setWorkToken(WORK_TOKEN);
  addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
  addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
  addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
  when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));

  runner.processElement(createValue(workItem2, valueCoder));

  runner.finishBundle();

  List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);

  assertThat(result.size(), equalTo(3));

  assertThat(
      result,
      containsInAnyOrder(
          WindowMatchers.isSingleWindowedValue(
              isKv(equalTo(KEY), containsInAnyOrder("v0", "v1")),
              equalTo(new Instant(2)),
              equalTo(window(-10, 10))),

          // For this sliding window, the minimum output timestmap was 10, since we didn't want to
          // overlap with the previous window that was [-10, 10).
          WindowMatchers.isSingleWindowedValue(
              isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")),
              equalTo(window(-10, 10).maxTimestamp().plus(1)),
              equalTo(window(0, 20))),
          WindowMatchers.isSingleWindowedValue(
              isKv(equalTo(KEY), containsInAnyOrder("v2")),
              equalTo(window(0, 20).maxTimestamp().plus(1)),
              equalTo(window(10, 30)))));
}
 
Example #26
Source File: DefaultTriggerStateMachineTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testDefaultTriggerSlidingWindows() throws Exception {
  tester =
      TriggerStateMachineTester.forTrigger(
          DefaultTriggerStateMachine.of(),
          SlidingWindows.of(Duration.millis(100)).every(Duration.millis(50)));

  tester.injectElements(
      1, // [-50, 50), [0, 100)
      50); // [0, 100), [50, 150)

  IntervalWindow firstWindow = new IntervalWindow(new Instant(-50), new Instant(50));
  IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(100));
  IntervalWindow thirdWindow = new IntervalWindow(new Instant(50), new Instant(150));

  assertFalse(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(thirdWindow));

  // At 50, the first becomes ready; it stays ready after firing
  tester.advanceInputWatermark(new Instant(50));
  assertTrue(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(thirdWindow));
  tester.fireIfShouldFire(firstWindow);
  assertTrue(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(thirdWindow));

  // At 99, the first is still the only one ready
  tester.advanceInputWatermark(new Instant(99));
  assertTrue(tester.shouldFire(firstWindow));
  assertFalse(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(thirdWindow));

  // At 100, the first and second are ready
  tester.advanceInputWatermark(new Instant(100));
  assertTrue(tester.shouldFire(firstWindow));
  assertTrue(tester.shouldFire(secondWindow));
  assertFalse(tester.shouldFire(thirdWindow));
  tester.fireIfShouldFire(firstWindow);

  assertFalse(tester.isMarkedFinished(firstWindow));
  assertFalse(tester.isMarkedFinished(secondWindow));
  assertFalse(tester.isMarkedFinished(thirdWindow));
}
 
Example #27
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that when data is assigned to multiple windows but some of those windows have had their
 * triggers finish, then the data is dropped and counted accurately.
 */
@Test
public void testDropDataMultipleWindowsFinishedTrigger() throws Exception {
  MetricsContainerImpl container = new MetricsContainerImpl("any");
  MetricsEnvironment.setCurrentContainer(container);
  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(
          WindowingStrategy.of(SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)))
              .withTrigger(AfterWatermark.pastEndOfWindow())
              .withAllowedLateness(Duration.millis(1000)),
          Sum.ofIntegers(),
          VarIntCoder.of());

  tester.injectElements(
      // assigned to [-60, 40), [-30, 70), [0, 100)
      TimestampedValue.of(10, new Instant(23)),
      // assigned to [-30, 70), [0, 100), [30, 130)
      TimestampedValue.of(12, new Instant(40)));

  long droppedElements =
      container
          .getCounter(
              MetricName.named(ReduceFnRunner.class, ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW))
          .getCumulative();
  assertEquals(0, droppedElements);

  tester.advanceInputWatermark(new Instant(70));
  tester.injectElements(
      // assigned to [-30, 70), [0, 100), [30, 130)
      // but [-30, 70) is closed by the trigger
      TimestampedValue.of(14, new Instant(60)));

  droppedElements =
      container
          .getCounter(
              MetricName.named(ReduceFnRunner.class, ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW))
          .getCumulative();
  assertEquals(1, droppedElements);

  tester.advanceInputWatermark(new Instant(130));
  // assigned to [-30, 70), [0, 100), [30, 130)
  // but they are all closed
  tester.injectElements(TimestampedValue.of(16, new Instant(40)));

  droppedElements =
      container
          .getCounter(
              MetricName.named(ReduceFnRunner.class, ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW))
          .getCumulative();
  assertEquals(4, droppedElements);
}
 
Example #28
Source File: BeamTableFunctionScanRel.java    From beam with Apache License 2.0 4 votes vote down vote up
public SlidingWindowDoFn(SlidingWindows windowFn, int windowFieldIndex, Schema schema) {
  this.windowFn = windowFn;
  this.windowFieldIndex = windowFieldIndex;
  this.outputSchema = schema;
}
 
Example #29
Source File: BeamAggregationRule.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Returns a {@link WindowFn} based on the SQL windowing function defined by {#code operatorKind}.
 * Supported {@link SqlKind}s:
 *
 * <ul>
 *   <li>{@link SqlKind#TUMBLE}, mapped to {@link FixedWindows};
 *   <li>{@link SqlKind#HOP}, mapped to {@link SlidingWindows};
 *   <li>{@link SqlKind#SESSION}, mapped to {@link Sessions};
 * </ul>
 *
 * <p>For example:
 *
 * <pre>{@code
 * SELECT event_timestamp, COUNT(*)
 * FROM PCOLLECTION
 * GROUP BY TUMBLE(event_timestamp, INTERVAL '1' HOUR)
 * }</pre>
 *
 * <p>SQL window functions support optional window_offset parameter which indicates a how window
 * definition is offset from the event time. Offset is zero if not specified.
 *
 * <p>Beam model does not support offset for session windows, so this method will throw {@link
 * UnsupportedOperationException} if offset is specified in SQL query for {@link SqlKind#SESSION}.
 */
private static @Nullable WindowFn createWindowFn(List<RexNode> parameters, SqlKind operatorKind) {
  switch (operatorKind) {
    case TUMBLE:

      // Fixed-size, non-intersecting time-based windows, for example:
      //   every hour aggregate elements from the previous hour;
      //
      // SQL Syntax:
      //   TUMBLE(monotonic_field, window_size [, window_offset])
      //
      // Example:
      //   TUMBLE(event_timestamp_field, INTERVAL '1' HOUR)

      FixedWindows fixedWindows = FixedWindows.of(durationParameter(parameters, 1));
      if (parameters.size() == 3) {
        fixedWindows = fixedWindows.withOffset(durationParameter(parameters, 2));
      }

      return fixedWindows;
    case HOP:

      // Sliding, fixed-size, intersecting time-based windows, for example:
      //   every minute aggregate elements from the previous hour;
      //
      // SQL Syntax:
      //   HOP(monotonic_field, emit_frequency, window_size [, window_offset])

      SlidingWindows slidingWindows =
          SlidingWindows.of(durationParameter(parameters, 2))
              .every(durationParameter(parameters, 1));

      if (parameters.size() == 4) {
        slidingWindows = slidingWindows.withOffset(durationParameter(parameters, 3));
      }

      return slidingWindows;
    case SESSION:

      // Session windows, for example:
      //   aggregate events after a gap of 1 minute of no events;
      //
      // SQL Syntax:
      //   SESSION(monotonic_field, session_gap)
      //
      // Example:
      //   SESSION(event_timestamp_field, INTERVAL '1' MINUTE)

      Sessions sessions = Sessions.withGapDuration(durationParameter(parameters, 1));
      if (parameters.size() == 3) {
        throw new UnsupportedOperationException(
            "Specifying alignment (offset) is not supported for session windows");
      }

      return sessions;
    default:
      return null;
  }
}
 
Example #30
Source File: BeamWindowStepHandler.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public void handleStep( ILogChannel log, TransformMeta transformMeta, Map<String, PCollection<HopRow>> stepCollectionMap,
                                  Pipeline pipeline, IRowMeta inputRowMeta, List<TransformMeta> previousSteps,
                                  PCollection<HopRow> input ) throws HopException {

  BeamWindowMeta beamWindowMeta = (BeamWindowMeta) transformMeta.getTransform();

  if ( StringUtils.isEmpty( beamWindowMeta.getWindowType() ) ) {
    throw new HopException( "Please specify a window type in Beam Window transform '" + transformMeta.getName() + "'" );
  }

  String duration = pipelineMeta.environmentSubstitute( beamWindowMeta.getDuration() );
  long durationSeconds = Const.toLong( duration, -1L );

  PCollection<HopRow> stepPCollection;

  if ( BeamDefaults.WINDOW_TYPE_FIXED.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds <= 0 ) {
      throw new HopException( "Please specify a valid positive window size (duration) for Beam window transform '" + transformMeta.getName() + "'" );
    }

    FixedWindows fixedWindows = FixedWindows
      .of( Duration.standardSeconds( durationSeconds ) );
    stepPCollection = input.apply( Window.into( fixedWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_SLIDING.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds <= 0 ) {
      throw new HopException( "Please specify a valid positive window size (duration) for Beam window transform '" + transformMeta.getName() + "'" );
    }

    String every = pipelineMeta.environmentSubstitute( beamWindowMeta.getEvery() );
    long everySeconds = Const.toLong( every, -1L );

    SlidingWindows slidingWindows = SlidingWindows
      .of( Duration.standardSeconds( durationSeconds ) )
      .every( Duration.standardSeconds( everySeconds ) );
    stepPCollection = input.apply( Window.into( slidingWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_SESSION.equals( beamWindowMeta.getWindowType() ) ) {

    if ( durationSeconds < 600 ) {
      throw new HopException(
        "Please specify a window size (duration) of at least 600 (10 minutes) for Beam window transform '" + transformMeta.getName() + "'.  This is the minimum gap between session windows." );
    }

    Sessions sessionWindows = Sessions
      .withGapDuration( Duration.standardSeconds( durationSeconds ) );
    stepPCollection = input.apply( Window.into( sessionWindows ) );

  } else if ( BeamDefaults.WINDOW_TYPE_GLOBAL.equals( beamWindowMeta.getWindowType() ) ) {

    stepPCollection = input.apply( Window.into( new GlobalWindows() ) );

  } else {
    throw new HopException( "Beam Window type '" + beamWindowMeta.getWindowType() + " is not supported in transform '" + transformMeta.getName() + "'" );
  }

  // Now get window information about the window if we asked about it...
  //
  if ( StringUtils.isNotEmpty( beamWindowMeta.getStartWindowField() ) ||
    StringUtils.isNotEmpty( beamWindowMeta.getEndWindowField() ) ||
    StringUtils.isNotEmpty( beamWindowMeta.getMaxWindowField() ) ) {

    WindowInfoFn windowInfoFn = new WindowInfoFn(
      transformMeta.getName(),
      pipelineMeta.environmentSubstitute( beamWindowMeta.getMaxWindowField() ),
      pipelineMeta.environmentSubstitute( beamWindowMeta.getStartWindowField() ),
      pipelineMeta.environmentSubstitute( beamWindowMeta.getMaxWindowField() ),
      JsonRowMeta.toJson( inputRowMeta ),
      transformPluginClasses,
      xpPluginClasses
    );

    stepPCollection = stepPCollection.apply( ParDo.of( windowInfoFn ) );
  }

  // Save this in the map
  //
  stepCollectionMap.put( transformMeta.getName(), stepPCollection );
  log.logBasic( "Handled transform (WINDOW) : " + transformMeta.getName() + ", gets data from " + previousSteps.size() + " previous transform(s)" );
}