Java Code Examples for org.apache.beam.sdk.transforms.windowing.Window

The following examples show how to use org.apache.beam.sdk.transforms.windowing.Window. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: ReshuffleTest.java    License: Apache License 2.0 7 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testReshuffleAfterSlidingWindows() {

  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.of(ARBITRARY_KVS)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L))));

  PCollection<KV<String, Integer>> output = input.apply(Reshuffle.of());

  PAssert.that(output).containsInAnyOrder(ARBITRARY_KVS);

  assertEquals(input.getWindowingStrategy(), output.getWindowingStrategy());

  pipeline.run();
}
 
Example 2
Source Project: beam   Source File: CountByKeyTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<KV<String, Long>> counted =
      CountByKey.named("CountByKey1")
          .of(dataset)
          .keyBy(s -> s)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final CountByKey count = (CountByKey) TestUtils.getProducer(counted);
  assertTrue(count.getName().isPresent());
  assertEquals("CountByKey1", count.getName().get());
  assertNotNull(count.getKeyExtractor());
  assertTrue(count.getWindow().isPresent());
  final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get());
  assertEquals(windowing, desc.getWindowFn());
  assertEquals(trigger, desc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode());
  assertEquals(Duration.millis(1000), desc.getAllowedLateness());
}
 
Example 3
Source Project: beam   Source File: DistinctTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
  final DefaultTrigger trigger = DefaultTrigger.of();
  final PCollection<String> uniq =
      Distinct.named("Distinct1")
          .of(dataset)
          .windowBy(windowing)
          .triggeredBy(trigger)
          .discardingFiredPanes()
          .withAllowedLateness(Duration.millis(1000))
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getName().isPresent());
  assertEquals("Distinct1", distinct.getName().get());

  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(windowing, windowDesc.getWindowFn());
  assertEquals(trigger, windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
  assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness());
}
 
Example 4
Source Project: beam   Source File: CombineTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCountPerElementWithSlidingWindows() {
  PCollection<String> input =
      pipeline
          .apply(
              Create.timestamped(
                  TimestampedValue.of("a", new Instant(1)),
                  TimestampedValue.of("a", new Instant(2)),
                  TimestampedValue.of("b", new Instant(3)),
                  TimestampedValue.of("b", new Instant(4))))
          .apply(Window.into(SlidingWindows.of(Duration.millis(2)).every(Duration.millis(1))));
  PCollection<KV<String, Long>> output = input.apply(Count.perElement());
  PAssert.that(output)
      .containsInAnyOrder(
          KV.of("a", 1L),
          KV.of("a", 2L),
          KV.of("a", 1L),
          KV.of("b", 1L),
          KV.of("b", 2L),
          KV.of("b", 1L));
  pipeline.run();
}
 
Example 5
Source Project: beam   Source File: BeamSqlDslJoinTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testJoinsUnboundedWithinWindowsWithDefaultTrigger() throws Exception {

  String sql =
      "SELECT o1.order_id, o1.price, o1.site_id, o2.order_id, o2.price, o2.site_id  "
          + "FROM ORDER_DETAILS1 o1"
          + " JOIN ORDER_DETAILS2 o2"
          + " on "
          + " o1.order_id=o2.site_id AND o2.price=o1.site_id";

  PCollection<Row> orders =
      ordersUnbounded()
          .apply("window", Window.into(FixedWindows.of(Duration.standardSeconds(50))));
  PCollectionTuple inputs = tuple("ORDER_DETAILS1", orders, "ORDER_DETAILS2", orders);

  PAssert.that(inputs.apply("sql", SqlTransform.query(sql)))
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(RESULT_ROW_TYPE)
              .addRows(1, 2, 2, 2, 2, 1, 1, 4, 3, 3, 3, 1)
              .getRows());

  pipeline.run();
}
 
Example 6
Source Project: beam   Source File: ReduceWindowTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<Long> output =
      ReduceWindow.of(dataset)
          .reduceBy(e -> 1L)
          .withSortedValues(String::compareTo)
          .applyIf(
              true,
              b ->
                  b.windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1)))
                      .triggeredBy(DefaultTrigger.of())
                      .discardingFiredPanes())
          .output();
  final ReduceWindow rw = (ReduceWindow) TestUtils.getProducer(output);
  assertTrue(rw.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) rw.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
}
 
Example 7
Source Project: beam   Source File: GroupByKeyTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testIdentityWindowFnPropagation() {

  List<KV<String, Integer>> ungroupedPairs = Arrays.asList();

  PCollection<KV<String, Integer>> input =
      p.apply(
              Create.of(ungroupedPairs)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));

  PCollection<KV<String, Iterable<Integer>>> output = input.apply(GroupByKey.create());

  p.run();

  Assert.assertTrue(
      output
          .getWindowingStrategy()
          .getWindowFn()
          .isCompatible(FixedWindows.of(Duration.standardMinutes(1))));
}
 
Example 8
Source Project: deployment-examples   Source File: LeaderBoard.java    License: MIT License 6 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) {
  return infos
      .apply(
          "LeaderboardTeamFixedWindows",
          Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration))
              // We will get early (speculative) results as well as cumulative
              // processing of late data.
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(FIVE_MINUTES))
                      .withLateFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(TEN_MINUTES)))
              .withAllowedLateness(allowedLateness)
              .accumulatingFiredPanes())
      // Extract and sum teamname/score pairs from the event data.
      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
}
 
Example 9
Source Project: DataflowTemplates   Source File: BigQueryMerger.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
  return input
      .apply(
          Window.<KV<K, V>>into(new GlobalWindows())
              .discardingFiredPanes()
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane()
                          .plusDelayOf(Duration.ZERO)
                          .alignedTo(intervalDuration, org.joda.time.Instant.now()))))
      .apply(GroupByKey.create())
      .apply(
          ParDo.of(
              new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  LOG.debug(
                      "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane());
                  Iterator<V> it = c.element().getValue().iterator();
                  if (it.hasNext()) {
                    c.output(KV.of(c.element().getKey(), it.next()));
                  }
                }
              }));
}
 
Example 10
Source Project: beam   Source File: PeriodicImpulse.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Instant> expand(PBegin input) {
  PCollection<Instant> result =
      input
          .apply(
              Create.<PeriodicSequence.SequenceDefinition>of(
                  new PeriodicSequence.SequenceDefinition(
                      startTimestamp, stopTimestamp, fireInterval)))
          .apply(PeriodicSequence.create());

  if (this.applyWindowing) {
    result =
        result.apply(
            Window.<Instant>into(FixedWindows.of(Duration.millis(fireInterval.getMillis()))));
  }

  return result;
}
 
Example 11
Source Project: hazelcast-jet-demos   Source File: MyBeamJob.java    License: Apache License 2.0 6 votes vote down vote up
public static Pipeline build(PipelineOptions pipelineOptions) {
	
    Pipeline pipeline = Pipeline.create(pipelineOptions);

	pipeline
	.apply("unbounded-source", 
			Read.from(new MyUnboundedSource("beam-input")))
    .apply("reformat-and-timestamp", 
    		ParDo.of(new MyEnrichAndReformatFn()))
	.apply("window",
			 Window.<String>into(FixedWindows.of(ONE_SECOND))
			 .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
			 .discardingFiredPanes()
			 .withAllowedLateness(ONE_SECOND)
			)
    .apply("sink",
    		FileIO.<String>write()
    		.via(TextIO.sink())
            .to(".")
            .withPrefix("beam-output")
            .withNumShards(1)
    		)
	;

    return pipeline;
}
 
Example 12
Source Project: beam-starter   Source File: StreamWordCount.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
        .as(Options.class);
    options.setRunner(FlinkRunner.class);

    Pipeline p = Pipeline.create(options);

    KafkaIO.Read<byte[], String> kafkaIOReader = KafkaIO.read()
        .withBootstrapServers("192.168.99.100:32771")
        .withTopics(Arrays.asList("beam".split(",")))
        .updateConsumerProperties(ImmutableMap.of("auto.offset.reset", (Object)"earliest"))
        .withValueCoder(StringUtf8Coder.of());

    p.apply(kafkaIOReader.withoutMetadata())
        .apply(Values.<String>create())
        .apply(Window.<String>into(
          FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
        .apply(new CountWords())
        .apply(MapElements.via(new FormatAsTextFn()))
        .apply("WriteCounts", TextIO.Write.to(options.getOutput()));

    p.run();
  }
 
Example 13
Source Project: beam   Source File: CombineTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testHotKeyCombiningWithAccumulationMode() {
  PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5));

  PCollection<Integer> output =
      input
          .apply(
              Window.<Integer>into(new GlobalWindows())
                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
                  .accumulatingFiredPanes()
                  .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS))
          .apply(Sum.integersGlobally().withoutDefaults().withFanout(2))
          .apply(ParDo.of(new GetLast()));

  PAssert.that(output)
      .satisfies(
          input1 -> {
            assertThat(input1, hasItem(15));
            return null;
          });

  pipeline.run();
}
 
Example 14
Source Project: beam   Source File: CombineTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSessionsCombine() {
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.timestamped(
                      TimestampedValue.of(KV.of("a", 1), new Instant(0L)),
                      TimestampedValue.of(KV.of("a", 1), new Instant(4L)),
                      TimestampedValue.of(KV.of("a", 4), new Instant(7L)),
                      TimestampedValue.of(KV.of("b", 1), new Instant(10L)),
                      TimestampedValue.of(KV.of("b", 13), new Instant(16L)))
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.millis(5))));

  PCollection<Integer> sum =
      input.apply(Values.create()).apply(Combine.globally(new SumInts()).withoutDefaults());

  PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn()));

  PAssert.that(sum).containsInAnyOrder(7, 13);
  PAssert.that(sumPerKey)
      .containsInAnyOrder(Arrays.asList(KV.of("a", "114"), KV.of("b", "1"), KV.of("b", "13")));
  pipeline.run();
}
 
Example 15
Source Project: beam   Source File: SqlQuery3.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) {
  PCollection<Event> windowed =
      allEvents.apply(
          Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))));

  String auctionName = Auction.class.getSimpleName();
  PCollection<Row> auctions =
      windowed
          .apply(getName() + ".Filter." + auctionName, Filter.by(e1 -> e1.newAuction != null))
          .apply(getName() + ".ToRecords." + auctionName, new SelectEvent(Type.AUCTION));

  String personName = Person.class.getSimpleName();
  PCollection<Row> people =
      windowed
          .apply(getName() + ".Filter." + personName, Filter.by(e -> e.newPerson != null))
          .apply(getName() + ".ToRecords." + personName, new SelectEvent(Type.PERSON));

  PCollectionTuple inputStreams =
      PCollectionTuple.of(new TupleTag<>("Auction"), auctions)
          .and(new TupleTag<>("Person"), people);

  return inputStreams
      .apply(SqlTransform.query(QUERY).withQueryPlannerClass(plannerClass))
      .apply(Convert.fromRows(NameCityStateId.class));
}
 
Example 16
Source Project: beam   Source File: ReduceFnRunner.java    License: Apache License 2.0 6 votes vote down vote up
/** Do we need to emit? */
private boolean needToEmit(boolean isEmpty, boolean isFinished, PaneInfo.Timing timing) {
  if (!isEmpty) {
    // The pane has elements.
    return true;
  }
  if (timing == Timing.ON_TIME
      && windowingStrategy.getOnTimeBehavior() == Window.OnTimeBehavior.FIRE_ALWAYS) {
    // This is an empty ON_TIME pane.
    return true;
  }
  if (isFinished && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
    // This is known to be the final pane, and the user has requested it even when empty.
    return true;
  }
  return false;
}
 
Example 17
Source Project: beam   Source File: DistinctTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWindow_applyIf() {
  final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
  final PCollection<String> uniq =
      Distinct.of(dataset)
          .applyIf(
              true,
              b ->
                  b.windowBy(FixedWindows.of(Duration.standardHours(1)))
                      .triggeredBy(DefaultTrigger.of())
                      .discardingFiredPanes())
          .output();
  final Distinct distinct = (Distinct) TestUtils.getProducer(uniq);
  assertTrue(distinct.getWindow().isPresent());
  @SuppressWarnings("unchecked")
  final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get());
  assertEquals(
      FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn());
  assertEquals(DefaultTrigger.of(), windowDesc.getTrigger());
  assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode());
}
 
Example 18
public static void main(String[] args) throws IOException, GeneralSecurityException {

    TokenizePipelineOptions options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(TokenizePipelineOptions.class);

    Pipeline p = Pipeline.create(options);
    p.apply(
            FileIO.match()
                .filepattern(options.getInputFile())
                .continuously(
                    Duration.standardSeconds(options.getPollingInterval()), Watch.Growth.never()))
        .apply(FileIO.readMatches().withCompression(Compression.UNCOMPRESSED))
        .apply(
            "Text File Reader",
            ParDo.of(
                new TextFileReader(
                    options.as(GcpOptions.class).getProject(),
                    options.getFileDecryptKeyName(),
                    options.getFileDecryptKey(),
                    options.getBatchSize(),
                    options.getCsek(),
                    options.getCsekhash())))
        .apply(
            "Tokenize Data",
            ParDo.of(
                new TokenizeData(
                    options.as(GcpOptions.class).getProject(),
                    options.getDeidentifyTemplateName(),
                    options.getInspectTemplateName())))
        .apply(
            Window.<String>into(FixedWindows.of(Duration.standardMinutes(options.getInterval()))))
        .apply(new WriteOneFilePerWindow(options.getOutputFile(), 1));

    p.run();
  }
 
Example 19
Source Project: beam   Source File: AbstractJoinTranslator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<KeyT, OutputT>> translate(
    Join<LeftT, RightT, KeyT, OutputT> operator, PCollectionList<Object> inputs) {
  checkArgument(inputs.size() == 2, "Join expects exactly two inputs.");
  @SuppressWarnings("unchecked")
  final PCollection<LeftT> left = (PCollection) inputs.get(0);
  @SuppressWarnings("unchecked")
  final PCollection<RightT> right = (PCollection) inputs.get(1);
  PCollection<KV<KeyT, LeftT>> leftKeyed =
      left.apply(
          "extract-keys-left",
          new ExtractKey<>(
              operator.getLeftKeyExtractor(), TypeAwareness.orObjects(operator.getKeyType())));
  PCollection<KV<KeyT, RightT>> rightKeyed =
      right.apply(
          "extract-keys-right",
          new ExtractKey<>(
              operator.getRightKeyExtractor(), TypeAwareness.orObjects(operator.getKeyType())));
  // apply windowing if specified
  if (operator.getWindow().isPresent()) {
    @SuppressWarnings("unchecked")
    final Window<KV<KeyT, LeftT>> leftWindow = (Window) operator.getWindow().get();
    leftKeyed = leftKeyed.apply("window-left", leftWindow);
    @SuppressWarnings("unchecked")
    final Window<KV<KeyT, RightT>> rightWindow = (Window) operator.getWindow().get();
    rightKeyed = rightKeyed.apply("window-right", rightWindow);
  }

  return translate(operator, left, leftKeyed, right, rightKeyed)
      .setTypeDescriptor(
          operator
              .getOutputType()
              .orElseThrow(
                  () -> new IllegalStateException("Unable to infer output type descriptor.")));
}
 
Example 20
Source Project: beam   Source File: SplittableDoFnTest.java    License: Apache License 2.0 5 votes vote down vote up
private void testOutputAfterCheckpoint(IsBounded bounded) {
  PCollection<Integer> outputs =
      p.apply(Create.of("foo"))
          .apply(ParDo.of(sdfWithMultipleOutputsPerBlock(bounded, 3)))
          .apply(Window.<Integer>configure().triggering(Never.ever()).discardingFiredPanes());
  PAssert.thatSingleton(outputs.apply(Count.globally()))
      .isEqualTo((long) SDFWithMultipleOutputsPerBlockBase.MAX_INDEX);
  p.run();
}
 
Example 21
Source Project: streamingbook   Source File: BeamModel.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)))
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example 22
Source Project: streamingbook   Source File: BeamModel.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example 23
Source Project: beam   Source File: GroupByKeyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testRemerge() {

  List<KV<String, Integer>> ungroupedPairs = Arrays.asList();

  PCollection<KV<String, Integer>> input =
      p.apply(
              Create.of(ungroupedPairs)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.standardMinutes(1))));

  PCollection<KV<String, Iterable<Iterable<Integer>>>> middle =
      input
          .apply("GroupByKey", GroupByKey.create())
          .apply("Remerge", Window.remerge())
          .apply("GroupByKeyAgain", GroupByKey.create())
          .apply("RemergeAgain", Window.remerge());

  p.run();

  Assert.assertTrue(
      middle
          .getWindowingStrategy()
          .getWindowFn()
          .isCompatible(Sessions.withGapDuration(Duration.standardMinutes(1))));
}
 
Example 24
Source Project: beam   Source File: BeamSideInputJoinRel.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Row> expand(PCollectionList<Row> pinput) {
  Schema leftSchema = pinput.get(0).getSchema();
  Schema rightSchema = pinput.get(1).getSchema();
  PCollection<Row> leftRows =
      pinput
          .get(0)
          .apply(
              "left_TimestampCombiner",
              Window.<Row>configure().withTimestampCombiner(TimestampCombiner.EARLIEST));
  PCollection<Row> rightRows =
      pinput
          .get(1)
          .apply(
              "right_TimestampCombiner",
              Window.<Row>configure().withTimestampCombiner(TimestampCombiner.EARLIEST));

  // extract the join fields
  List<Pair<RexNode, RexNode>> pairs = extractJoinRexNodes(condition);
  int leftRowColumnCount = BeamSqlRelUtils.getBeamRelInput(left).getRowType().getFieldCount();
  FieldAccessDescriptor leftKeyFields =
      BeamJoinTransforms.getJoinColumns(true, pairs, 0, leftSchema);
  FieldAccessDescriptor rightKeyFields =
      BeamJoinTransforms.getJoinColumns(false, pairs, leftRowColumnCount, rightSchema);

  return sideInputJoin(leftRows, rightRows, leftKeyFields, rightKeyFields);
}
 
Example 25
Source Project: incubator-nemo   Source File: WindowedWordCount.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String outputFilePath = args[0];
  final String windowType = args[1];

  final Window<KV<String, Long>> windowFn;
  if (windowType.equals("fixed")) {
    windowFn = Window.<KV<String, Long>>into(FixedWindows.of(Duration.standardSeconds(5)));
  } else {
    windowFn = Window.<KV<String, Long>>into(SlidingWindows.of(Duration.standardSeconds(10))
      .every(Duration.standardSeconds(5)));
  }

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WindowedWordCount");

  final Pipeline p = Pipeline.create(options);

  getSource(p, args)
    .apply(windowFn)
    .apply(Sum.longsPerKey())
    .apply(MapElements.<KV<String, Long>, String>via(new SimpleFunction<KV<String, Long>, String>() {
      @Override
      public String apply(final KV<String, Long> kv) {
        return kv.getKey() + ": " + kv.getValue();
      }
    }))
    .apply(new WriteOneFilePerWindow(outputFilePath, 1));

  p.run().waitUntilFinish();
}
 
Example 26
Source Project: incubator-nemo   Source File: WindowedBroadcast.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String outputFilePath = args[0];

  final Window<Long> windowFn = Window
    .<Long>into(SlidingWindows.of(Duration.standardSeconds(2))
      .every(Duration.standardSeconds(1)));

  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WindowedBroadcast");

  final Pipeline p = Pipeline.create(options);

  final PCollection<Long> windowedElements = getSource(p).apply(windowFn);
  final PCollectionView<List<Long>> windowedView = windowedElements.apply(View.asList());

  windowedElements.apply(ParDo.of(new DoFn<Long, String>() {
      @ProcessElement
      public void processElement(final ProcessContext c) {
        final Long anElementInTheWindow = c.element();
        final List<Long> allElementsInTheWindow = c.sideInput(windowedView);
        System.out.println(anElementInTheWindow + " / " + allElementsInTheWindow);
        if (!allElementsInTheWindow.contains(anElementInTheWindow)) {
          throw new RuntimeException(anElementInTheWindow + " not in " + allElementsInTheWindow.toString());
        } else {
          c.output(anElementInTheWindow + " is in " + allElementsInTheWindow);
        }
      }
    }).withSideInputs(windowedView)
  ).apply(new WriteOneFilePerWindow(outputFilePath, 1));

  p.run().waitUntilFinish();
}
 
Example 27
Source Project: feast   Source File: ReadFeatureSetSpecs.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<FeatureSetReference, FeatureSetSpec>> expand(PBegin input) {
  return input
      .apply(
          KafkaIO.readBytes()
              .withBootstrapServers(
                  getSpecsStreamingUpdateConfig().getSource().getBootstrapServers())
              .withTopic(getSpecsStreamingUpdateConfig().getSource().getTopic())
              .withConsumerConfigUpdates(
                  ImmutableMap.of(
                      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
                      "earliest",
                      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,
                      false)))
      .apply("ParseFeatureSetSpec", ParDo.of(new KafkaRecordToFeatureSetSpec()))
      .apply("OnlyRelevantSpecs", Filter.by(new FilterRelevantFunction(getSource(), getStores())))
      .apply(
          Window.<KV<String, FeatureSetSpec>>into(new GlobalWindows())
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.ZERO))
      .apply(
          Combine.perKey(
              (SerializableFunction<Iterable<FeatureSetSpec>, FeatureSetSpec>)
                  specs -> {
                    ArrayList<FeatureSetSpec> featureSetSpecs = Lists.newArrayList(specs);
                    featureSetSpecs.sort(
                        Comparator.comparing(FeatureSetSpec::getVersion).reversed());
                    return featureSetSpecs.get(0);
                  }))
      .apply("CreateFeatureSetReferenceKey", ParDo.of(new CreateFeatureSetReference()))
      .setCoder(
          KvCoder.of(
              AvroCoder.of(FeatureSetReference.class), ProtoCoder.of(FeatureSetSpec.class)));
}
 
Example 28
Source Project: beam   Source File: WriteWithShardingFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionView<Integer> expand(PCollection<T> records) {
  return records
      .apply(Window.into(new GlobalWindows()))
      .apply("CountRecords", Count.globally())
      .apply("GenerateShardCount", ParDo.of(new CalculateShardsFn()))
      .apply(View.asSingleton());
}
 
Example 29
Source Project: deployment-examples   Source File: LeaderBoard.java    License: MIT License 5 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) {
  return input
      .apply(
          "LeaderboardUserGlobalWindow",
          Window.<GameActionInfo>into(new GlobalWindows())
              // Get periodic results every ten minutes.
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
              .accumulatingFiredPanes()
              .withAllowedLateness(allowedLateness))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"));
}
 
Example 30
Source Project: beam   Source File: BeamSqlDslJoinTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRejectsGlobalWindowsWithEndOfWindowTrigger() throws Exception {

  String sql =
      "SELECT o1.order_id, o1.price, o1.site_id, o2.order_id, o2.price, o2.site_id  "
          + "FROM ORDER_DETAILS1 o1"
          + " JOIN ORDER_DETAILS2 o2"
          + " on "
          + " o1.order_id=o2.site_id AND o2.price=o1.site_id";

  PCollection<Row> orders =
      ordersUnbounded()
          .apply(
              "window",
              Window.<Row>into(new GlobalWindows())
                  .triggering(AfterWatermark.pastEndOfWindow())
                  .withAllowedLateness(Duration.ZERO)
                  .accumulatingFiredPanes());
  PCollectionTuple inputs = tuple("ORDER_DETAILS1", orders, "ORDER_DETAILS2", orders);

  thrown.expect(UnsupportedOperationException.class);
  thrown.expectMessage(
      stringContainsInOrder(Arrays.asList("once per window", "default trigger")));

  inputs.apply("sql", SqlTransform.query(sql));

  pipeline.run();
}