org.apache.beam.sdk.transforms.windowing.Window Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.windowing.Window. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReshuffleTest.java    From beam with Apache License 2.0 7 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testReshuffleAfterSlidingWindows() {

  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.of(ARBITRARY_KVS)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L))));

  PCollection<KV<String, Integer>> output = input.apply(Reshuffle.of());

  PAssert.that(output).containsInAnyOrder(ARBITRARY_KVS);

  assertEquals(input.getWindowingStrategy(), output.getWindowingStrategy());

  pipeline.run();
}
 
Example #2
Source File: SqlQuery3.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) {
  PCollection<Event> windowed =
      allEvents.apply(
          Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))));

  String auctionName = Auction.class.getSimpleName();
  PCollection<Row> auctions =
      windowed
          .apply(getName() + ".Filter." + auctionName, Filter.by(e1 -> e1.newAuction != null))
          .apply(getName() + ".ToRecords." + auctionName, new SelectEvent(Type.AUCTION));

  String personName = Person.class.getSimpleName();
  PCollection<Row> people =
      windowed
          .apply(getName() + ".Filter." + personName, Filter.by(e -> e.newPerson != null))
          .apply(getName() + ".ToRecords." + personName, new SelectEvent(Type.PERSON));

  PCollectionTuple inputStreams =
      PCollectionTuple.of(new TupleTag<>("Auction"), auctions)
          .and(new TupleTag<>("Person"), people);

  return inputStreams
      .apply(SqlTransform.query(QUERY).withQueryPlannerClass(plannerClass))
      .apply(Convert.fromRows(NameCityStateId.class));
}
 
Example #3
Source File: LeaderBoard.java    From deployment-examples with MIT License 6 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) {
  return infos
      .apply(
          "LeaderboardTeamFixedWindows",
          Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration))
              // We will get early (speculative) results as well as cumulative
              // processing of late data.
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(FIVE_MINUTES))
                      .withLateFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(TEN_MINUTES)))
              .withAllowedLateness(allowedLateness)
              .accumulatingFiredPanes())
      // Extract and sum teamname/score pairs from the event data.
      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
}
 
Example #4
Source File: ReduceFnRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Do we need to emit? */
private boolean needToEmit(boolean isEmpty, boolean isFinished, PaneInfo.Timing timing) {
  if (!isEmpty) {
    // The pane has elements.
    return true;
  }
  if (timing == Timing.ON_TIME
      && windowingStrategy.getOnTimeBehavior() == Window.OnTimeBehavior.FIRE_ALWAYS) {
    // This is an empty ON_TIME pane.
    return true;
  }
  if (isFinished && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
    // This is known to be the final pane, and the user has requested it even when empty.
    return true;
  }
  return false;
}
 
Example #5
Source File: DistinctTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<String> uniq =
      Distinct.of(dataset)
          .applyIf(
              true,
              b ->
                  b.windowBy(FixedWindows.of(Duration.standardHours(1)))
                      .triggeredBy(DefaultTrigger.of())
                      .discardingFiredPanes())
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
}
 
Example #6
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testIdentityWindowFnPropagation() {

  List<KV<String, Integer>> ungroupedPairs = Arrays.asList();

  PCollection<KV<String, Integer>> input =
      p.apply(
              Create.of(ungroupedPairs)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));

  PCollection<KV<String, Iterable<Integer>>> output = input.apply(GroupByKey.create());

  p.run();

  Assert.assertTrue(
      output
          .getWindowingStrategy()
          .getWindowFn()
          .isCompatible(FixedWindows.of(Duration.standardMinutes(1))));
}
 
Example #7
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSessionsCombine() {
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.timestamped(
                      TimestampedValue.of(KV.of("a", 1), new Instant(0L)),
                      TimestampedValue.of(KV.of("a", 1), new Instant(4L)),
                      TimestampedValue.of(KV.of("a", 4), new Instant(7L)),
                      TimestampedValue.of(KV.of("b", 1), new Instant(10L)),
                      TimestampedValue.of(KV.of("b", 13), new Instant(16L)))
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.millis(5))));

  PCollection<Integer> sum =
      input.apply(Values.create()).apply(Combine.globally(new SumInts()).withoutDefaults());

  PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn()));

  PAssert.that(sum).containsInAnyOrder(7, 13);
  PAssert.that(sumPerKey)
      .containsInAnyOrder(Arrays.asList(KV.of("a", "114"), KV.of("b", "1"), KV.of("b", "13")));
  pipeline.run();
}
 
Example #8
Source File: BeamSqlDslJoinTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testJoinsUnboundedWithinWindowsWithDefaultTrigger() throws Exception {

  String sql =
      "SELECT o1.order_id, o1.price, o1.site_id, o2.order_id, o2.price, o2.site_id  "
          + "FROM ORDER_DETAILS1 o1"
          + " JOIN ORDER_DETAILS2 o2"
          + " on "
          + " o1.order_id=o2.site_id AND o2.price=o1.site_id";

  PCollection<Row> orders =
      ordersUnbounded()
          .apply("window", Window.into(FixedWindows.of(Duration.standardSeconds(50))));
  PCollectionTuple inputs = tuple("ORDER_DETAILS1", orders, "ORDER_DETAILS2", orders);

  PAssert.that(inputs.apply("sql", SqlTransform.query(sql)))
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(RESULT_ROW_TYPE)
              .addRows(1, 2, 2, 2, 2, 1, 1, 4, 3, 3, 3, 1)
              .getRows());

  pipeline.run();
}
 
Example #9
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCountPerElementWithSlidingWindows() {
  PCollection<String> input =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of("a", new Instant(1)),
                  TimestampedValue.of("a", new Instant(2)),
                  TimestampedValue.of("b", new Instant(3)),
                  TimestampedValue.of("b", new Instant(4))))
          .apply(Window.into(SlidingWindows.of(Duration.millis(2)).every(Duration.millis(1))));
  PCollection<KV<String, Long>> output = input.apply(Count.perElement());
  PAssert.that(output)
      .containsInAnyOrder(
          KV.of("a", 1L),
          KV.of("a", 2L),
          KV.of("a", 1L),
          KV.of("b", 1L),
          KV.of("b", 2L),
          KV.of("b", 1L));
  pipeline.run();
}
 
Example #10
Source File: ReduceWindowTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<Long> output =
      ReduceWindow.of(dataset)
          .reduceBy(e -> 1L)
          .withSortedValues(String::compareTo)
          .applyIf(
              true,
              b ->
                  b.windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1)))
                      .triggeredBy(DefaultTrigger.of())
                      .discardingFiredPanes())
          .output();
  final ReduceWindow rw = (ReduceWindow) TestUtils.getProducer(output);
  assertTrue(rw.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) rw.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
}
 
Example #11
Source File: BigQueryMerger.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
  return input
      .apply(
          Window.<KV<K, V>>into(new GlobalWindows())
              .discardingFiredPanes()
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane()
                          .plusDelayOf(Duration.ZERO)
                          .alignedTo(intervalDuration, org.joda.time.Instant.now()))))
      .apply(GroupByKey.create())
      .apply(
          ParDo.of(
              new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  LOG.debug(
                      "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane());
                  Iterator<V> it = c.element().getValue().iterator();
                  if (it.hasNext()) {
                    c.output(KV.of(c.element().getKey(), it.next()));
                  }
                }
              }));
}
 
Example #12
Source File: PeriodicImpulse.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Instant> expand(PBegin input) {
  PCollection<Instant> result =
      input
          .apply(
              Create.<PeriodicSequence.SequenceDefinition>of(
                  new PeriodicSequence.SequenceDefinition(
                      startTimestamp, stopTimestamp, fireInterval)))
          .apply(PeriodicSequence.create());

  if (this.applyWindowing) {
    result =
        result.apply(
            Window.<Instant>into(FixedWindows.of(Duration.millis(fireInterval.getMillis()))));
  }

  return result;
}
 
Example #13
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testHotKeyCombiningWithAccumulationMode() {
  PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5));

  PCollection<Integer> output =
      input
          .apply(
              Window.<Integer>into(new GlobalWindows())
                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
                  .accumulatingFiredPanes()
                  .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS))
          .apply(Sum.integersGlobally().withoutDefaults().withFanout(2))
          .apply(ParDo.of(new GetLast()));

  PAssert.that(output)
      .satisfies(
          input1 -> {
            assertThat(input1, hasItem(15));
            return null;
          });

  pipeline.run();
}
 
Example #14
Source File: DistinctTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<String> uniq =
      Distinct.named("Distinct1")
          .of(dataset)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getName().isPresent());
  assertEquals("Distinct1", distinct.getName().get());

  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(windowing, windowDesc.getWindowFn());
  assertEquals(trigger, windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
  assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness());
}
 
Example #15
Source File: CountByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<KV<String, Long>> counted =
      CountByKey.named("CountByKey1")
          .of(dataset)
          .keyBy(s -> s)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final CountByKey count = (CountByKey) TestUtils.getProducer(counted);
  assertTrue(count.getName().isPresent());
  assertEquals("CountByKey1", count.getName().get());
  assertNotNull(count.getKeyExtractor());
  assertTrue(count.getWindow().isPresent());
  final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get());
  assertEquals(windowing, desc.getWindowFn());
  assertEquals(trigger, desc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode());
  assertEquals(Duration.millis(1000), desc.getAllowedLateness());
}
 
Example #16
Source File: MyBeamJob.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static Pipeline build(PipelineOptions pipelineOptions) {
	
    Pipeline pipeline = Pipeline.create(pipelineOptions);

	pipeline
	.apply("unbounded-source", 
			Read.from(new MyUnboundedSource("beam-input")))
    .apply("reformat-and-timestamp", 
    		ParDo.of(new MyEnrichAndReformatFn()))
	.apply("window",
			 Window.<String>into(FixedWindows.of(ONE_SECOND))
			 .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
			 .discardingFiredPanes()
			 .withAllowedLateness(ONE_SECOND)
			)
    .apply("sink",
    		FileIO.<String>write()
    		.via(TextIO.sink())
            .to(".")
            .withPrefix("beam-output")
            .withNumShards(1)
    		)
	;

    return pipeline;
}
 
Example #17
Source File: StreamWordCount.java    From beam-starter with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
        .as(Options.class);
    options.setRunner(FlinkRunner.class);

    Pipeline p = Pipeline.create(options);

    KafkaIO.Read<byte[], String> kafkaIOReader = KafkaIO.read()
        .withBootstrapServers("192.168.99.100:32771")
        .withTopics(Arrays.asList("beam".split(",")))
        .updateConsumerProperties(ImmutableMap.of("auto.offset.reset", (Object)"earliest"))
        .withValueCoder(StringUtf8Coder.of());

    p.apply(kafkaIOReader.withoutMetadata())
        .apply(Values.<String>create())
        .apply(Window.<String>into(
          FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
        .apply(new CountWords())
        .apply(MapElements.via(new FormatAsTextFn()))
        .apply("WriteCounts", TextIO.Write.to(options.getOutput()));

    p.run();
  }
 
Example #18
Source File: PCollectionTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<PCollection<?>> data() {
  Pipeline pipeline = TestPipeline.create();
  PCollection<Integer> ints = pipeline.apply("ints", Create.of(1, 2, 3));
  PCollection<Long> longs = pipeline.apply("unbounded longs", GenerateSequence.from(0));
  PCollection<Long> windowedLongs =
      longs.apply(
          "into fixed windows", Window.into(FixedWindows.of(Duration.standardMinutes(10L))));
  PCollection<KV<String, Iterable<String>>> groupedStrings =
      pipeline
          .apply(
              "kvs", Create.of(KV.of("foo", "spam"), KV.of("bar", "ham"), KV.of("baz", "eggs")))
          .apply("group", GroupByKey.create());
  PCollection<Long> coderLongs =
      pipeline
          .apply("counts with alternative coder", GenerateSequence.from(0).to(10))
          .setCoder(BigEndianLongCoder.of());
  pipeline
      .apply(
          "intsWithCustomCoder",
          Create.of(1, 2).withCoder(new AutoValue_PCollectionTranslationTest_CustomIntCoder()))
      .apply(
          "into custom windows",
          Window.into(new CustomWindows())
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterFirst.of(
                              AfterPane.elementCountAtLeast(5),
                              AfterProcessingTime.pastFirstElementInPane()
                                  .plusDelayOf(Duration.millis(227L)))))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(12L)));
  return ImmutableList.of(ints, longs, windowedLongs, coderLongs, groupedStrings);
}
 
Example #19
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testWindowedSideInput(IsBounded bounded) {
  PCollection<Integer> mainInput =
      p.apply(
              "main",
              Create.timestamped(
                  TimestampedValue.of(0, new Instant(0)),
                  TimestampedValue.of(1, new Instant(1)),
                  TimestampedValue.of(2, new Instant(2)),
                  TimestampedValue.of(3, new Instant(3)),
                  TimestampedValue.of(4, new Instant(4)),
                  TimestampedValue.of(5, new Instant(5)),
                  TimestampedValue.of(6, new Instant(6)),
                  TimestampedValue.of(7, new Instant(7))))
          .apply("window 2", Window.into(FixedWindows.of(Duration.millis(2))));

  PCollectionView<String> sideInput =
      p.apply(
              "side",
              Create.timestamped(
                  TimestampedValue.of("a", new Instant(0)),
                  TimestampedValue.of("b", new Instant(4))))
          .apply("window 4", Window.into(FixedWindows.of(Duration.millis(4))))
          .apply("singleton", View.asSingleton());

  PCollection<String> res =
      mainInput.apply(ParDo.of(sdfWithSideInput(bounded, sideInput)).withSideInputs(sideInput));

  PAssert.that(res).containsInAnyOrder("a:0", "a:1", "a:2", "a:3", "b:4", "b:5", "b:6", "b:7");

  p.run();
}
 
Example #20
Source File: WindowAssignTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translate(
    Window.Assign<T> transform, TransformHierarchy.Node node, TranslationContext ctx) {
  final PCollection<T> output = ctx.getOutput(transform);

  @SuppressWarnings("unchecked")
  final WindowFn<T, ?> windowFn = (WindowFn<T, ?>) output.getWindowingStrategy().getWindowFn();

  final MessageStream<OpMessage<T>> inputStream = ctx.getMessageStream(ctx.getInput(transform));

  final MessageStream<OpMessage<T>> outputStream =
      inputStream.flatMap(OpAdapter.adapt(new WindowAssignOp<>(windowFn)));

  ctx.registerMessageStream(output, outputStream);
}
 
Example #21
Source File: SparkMetricsPusherTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Category(StreamingTest.class)
@Test
public void testInStreamingMode() throws Exception {
  Instant instant = new Instant(0);
  CreateStream<Integer> source =
      CreateStream.of(VarIntCoder.of(), batchDuration())
          .emptyBatch()
          .advanceWatermarkForNextBatch(instant)
          .nextBatch(
              TimestampedValue.of(1, instant),
              TimestampedValue.of(2, instant),
              TimestampedValue.of(3, instant))
          .advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(1L)))
          .nextBatch(
              TimestampedValue.of(4, instant.plus(Duration.standardSeconds(1L))),
              TimestampedValue.of(5, instant.plus(Duration.standardSeconds(1L))),
              TimestampedValue.of(6, instant.plus(Duration.standardSeconds(1L))))
          .advanceNextBatchWatermarkToInfinity();
  pipeline
      .apply(source)
      .apply(
          Window.<Integer>into(FixedWindows.of(Duration.standardSeconds(3L)))
              .withAllowedLateness(Duration.ZERO))
      .apply(ParDo.of(new CountingDoFn()));

  pipeline.run();
  // give metrics pusher time to push
  Thread.sleep(
      (pipeline.getOptions().as(MetricsOptions.class).getMetricsPushPeriod() + 1L) * 1000);
  assertThat(TestMetricsSink.getCounterValue(COUNTER_NAME), is(6L));
}
 
Example #22
Source File: SparkMetricsSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Category(StreamingTest.class)
@Test
public void testInStreamingMode() throws Exception {
  assertThat(InMemoryMetrics.valueOf("emptyLines"), is(nullValue()));

  Instant instant = new Instant(0);
  CreateStream<String> source =
      CreateStream.of(
              StringUtf8Coder.of(),
              Duration.millis(
                  (pipeline.getOptions().as(SparkPipelineOptions.class))
                      .getBatchIntervalMillis()))
          .emptyBatch()
          .advanceWatermarkForNextBatch(instant)
          .nextBatch(
              TimestampedValue.of(WORDS.get(0), instant),
              TimestampedValue.of(WORDS.get(1), instant),
              TimestampedValue.of(WORDS.get(2), instant))
          .advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(2L)))
          .nextBatch(
              TimestampedValue.of(WORDS.get(3), instant.plus(Duration.standardSeconds(1L))),
              TimestampedValue.of(WORDS.get(4), instant.plus(Duration.standardSeconds(1L))),
              TimestampedValue.of(WORDS.get(5), instant.plus(Duration.standardSeconds(1L))))
          .advanceNextBatchWatermarkToInfinity();
  PCollection<String> output =
      pipeline
          .apply(source)
          .apply(
              Window.<String>into(FixedWindows.of(Duration.standardSeconds(3L)))
                  .withAllowedLateness(Duration.ZERO))
          .apply(new WordCount.CountWords())
          .apply(MapElements.via(new WordCount.FormatAsTextFn()));
  PAssert.that(output).containsInAnyOrder(EXPECTED_COUNTS);
  pipeline.run();

  assertThat(InMemoryMetrics.<Double>valueOf("emptyLines"), is(1d));
}
 
Example #23
Source File: SumByKey.java    From beam with Apache License 2.0 5 votes vote down vote up
private SumByKey(
    @Nullable String name,
    UnaryFunction<InputT, KeyT> keyExtractor,
    @Nullable TypeDescriptor<KeyT> keyType,
    UnaryFunction<InputT, Long> valueExtractor,
    @Nullable Window<InputT> window,
    TypeDescriptor<KV<KeyT, Long>> outputType) {
  super(name, outputType, keyExtractor, keyType, window);
  this.valueExtractor = valueExtractor;
}
 
Example #24
Source File: DistinctTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredDistinctRepresentativeValues() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      triggeredDistinctRepresentativePipeline
          .apply(values)
          .apply(
              Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1)))
                  .triggering(
                      Repeatedly.forever(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(Duration.standardSeconds(30))))
                  .withAllowedLateness(Duration.ZERO)
                  .accumulatingFiredPanes())
          .apply(
              Distinct.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeType(TypeDescriptor.of(Integer.class)));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3"));
  triggeredDistinctRepresentativePipeline.run();
}
 
Example #25
Source File: DistinctTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStream.class})
public void testWindowedDistinct() {
  Instant base = new Instant(0);
  TestStream<String> values =
      TestStream.create(StringUtf8Coder.of())
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of("k1", base),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))),
              TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))),
              TimestampedValue.of("k4", base.plus(Duration.standardSeconds(60))),
              TimestampedValue.of("k5", base.plus(Duration.standardSeconds(70))),
              TimestampedValue.of("k6", base.plus(Duration.standardSeconds(80))))
          .advanceWatermarkToInfinity();

  PCollection<String> distinctValues =
      windowedDistinctPipeline
          .apply(values)
          .apply(Window.into(FixedWindows.of(Duration.standardSeconds(30))))
          .apply(Distinct.create());
  PAssert.that(distinctValues)
      .inWindow(new IntervalWindow(base, base.plus(Duration.standardSeconds(30))))
      .containsInAnyOrder("k1", "k2", "k3");
  PAssert.that(distinctValues)
      .inWindow(
          new IntervalWindow(
              base.plus(Duration.standardSeconds(30)), base.plus(Duration.standardSeconds(60))))
      .containsInAnyOrder("k1", "k2", "k3");
  PAssert.that(distinctValues)
      .inWindow(
          new IntervalWindow(
              base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(90))))
      .containsInAnyOrder("k4", "k5", "k6");
  windowedDistinctPipeline.run();
}
 
Example #26
Source File: AutoCompleteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWindowedAutoComplete() {
  List<TimestampedValue<String>> words =
      Arrays.asList(
          TimestampedValue.of("xA", new Instant(1)),
          TimestampedValue.of("xA", new Instant(1)),
          TimestampedValue.of("xB", new Instant(1)),
          TimestampedValue.of("xB", new Instant(2)),
          TimestampedValue.of("xB", new Instant(2)));

  PCollection<String> input = p.apply(Create.timestamped(words));

  PCollection<KV<String, List<CompletionCandidate>>> output =
      input
          .apply(Window.into(SlidingWindows.of(new Duration(2))))
          .apply(new ComputeTopCompletions(2, recursive));

  PAssert.that(output)
      .containsInAnyOrder(
          // Window [0, 2)
          KV.of("x", parseList("xA:2", "xB:1")),
          KV.of("xA", parseList("xA:2")),
          KV.of("xB", parseList("xB:1")),

          // Window [1, 3)
          KV.of("x", parseList("xB:3", "xA:2")),
          KV.of("xA", parseList("xA:2")),
          KV.of("xB", parseList("xB:3")),

          // Window [2, 3)
          KV.of("x", parseList("xB:2")),
          KV.of("xB", parseList("xB:2")));
  p.run().waitUntilFinish();
}
 
Example #27
Source File: PipelineTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}")
public static Iterable<Pipeline> testPipelines() {
  Pipeline trivialPipeline = Pipeline.create();
  trivialPipeline.apply(Create.of(1, 2, 3));

  Pipeline sideInputPipeline = Pipeline.create();
  final PCollectionView<String> singletonView =
      sideInputPipeline.apply(Create.of("foo")).apply(View.asSingleton());
  sideInputPipeline
      .apply(Create.of("main input"))
      .apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void process(ProcessContext c) {
                      // actually never executed and no effect on translation
                      c.sideInput(singletonView);
                    }
                  })
              .withSideInputs(singletonView));

  Pipeline complexPipeline = Pipeline.create();
  BigEndianLongCoder customCoder = BigEndianLongCoder.of();
  PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L));
  PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder);
  PCollection<Long> windowed =
      counted.apply(
          Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withLateFirings(AfterPane.elementCountAtLeast(19)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(3L)));
  final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
  PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.of("foo"));
  PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.create());

  return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline);
}
 
Example #28
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGroupByKeyEndOfWindowLateFiringsOk() {
  PCollection<KV<String, String>> input =
      p.apply(Create.of(KV.of("hello", "goodbye")))
          .apply(
              Window.<KV<String, String>>configure()
                  .discardingFiredPanes()
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withLateFirings(AfterPane.elementCountAtLeast(1)))
                  .withAllowedLateness(Duration.millis(10)));

  // OK
  input.apply(GroupByKey.create());
}
 
Example #29
Source File: DistinctTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Regression test: when all values are emitted by a speculative trigger, caused a null KV when
 * the on-time firing occurred.
 */
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredDistinctRepresentativeValuesEmpty() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(TimestampedValue.of(KV.of(1, "k1"), base))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      triggeredDistinctRepresentativePipeline
          .apply(values)
          .apply(
              Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1)))
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withEarlyFirings(
                              AfterProcessingTime.pastFirstElementInPane()
                                  .plusDelayOf(Duration.standardSeconds(30))))
                  .withAllowedLateness(Duration.ZERO)
                  .discardingFiredPanes())
          .apply(
              Distinct.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeType(TypeDescriptor.of(Integer.class)));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"));
  triggeredDistinctRepresentativePipeline.run();
}
 
Example #30
Source File: SideInputLoadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private PCollection<KV<byte[], byte[]>> applyWindowingIfPresent(
    PCollection<KV<byte[], byte[]>> input) {
  PCollection<KV<byte[], byte[]>> windowedInput = input;
  if (options.getWindowCount() != 1) {
    long windowDurationMilis = sourceOptions.numRecords / options.getWindowCount();
    windowedInput =
        input.apply(Window.into(FixedWindows.of(Duration.millis(windowDurationMilis))));
  }
  return windowedInput;
}