Java Code Examples for org.apache.beam.sdk.testing.TestStream#create()

The following examples show how to use org.apache.beam.sdk.testing.TestStream#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUnboundedTable.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Row> buildIOReader(PBegin begin) {
  TestStream.Builder<Row> values = TestStream.create(schema);

  for (Pair<Duration, List<Row>> pair : timestampedRows) {
    values = values.advanceWatermarkTo(new Instant(0).plus(pair.getKey()));
    for (int i = 0; i < pair.getValue().size(); i++) {
      values =
          values.addElements(
              TimestampedValue.of(
                  pair.getValue().get(i),
                  new Instant(pair.getValue().get(i).getDateTime(timestampField))));
    }
  }

  return begin
      .apply(
          "MockedUnboundedTable_" + COUNTER.incrementAndGet(),
          values.advanceWatermarkToInfinity())
      .setRowSchema(getSchema());
}
 
Example 2
Source File: TestUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Builds an unbounded {@link PCollection} in {@link Pipeline} set by {@link
 * #inPipeline(Pipeline)}.
 *
 * <p>If timestamp field was set with {@link #withTimestampField(String)} then watermark will be
 * advanced to the values from that field.
 */
public PCollection<Row> buildUnbounded() {
  checkArgument(pipeline != null);
  checkArgument(rows.size() > 0);

  if (type == null) {
    type = rows.get(0).getSchema();
  }

  TestStream.Builder<Row> values = TestStream.create(type);

  for (Row row : rows) {
    if (timestampField != null) {
      values = values.advanceWatermarkTo(new Instant(row.getDateTime(timestampField)));
    }

    values = values.addElements(row);
  }

  return PBegin.in(pipeline).apply("unboundedPCollection", values.advanceWatermarkToInfinity());
}
 
Example 3
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testRequiresTimeSortedInputWithTestStream() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> stream = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    stream = stream.addElements(stamp);
  }
  testTimeSortedInput(numElements, pipeline.apply(stream.advanceWatermarkToInfinity()));
}
 
Example 4
Source File: BeamSqlDslBase.java    From beam with Apache License 2.0 5 votes vote down vote up
private PCollection<Row> prepareUnboundedPCollection1() {
  TestStream.Builder<Row> values = TestStream.create(schemaInTableA);

  for (Row row : rowsInTableA) {
    values = values.advanceWatermarkTo(new Instant(row.getDateTime("f_timestamp")));
    values = values.addElements(row);
  }

  return PBegin.in(pipeline)
      .apply("unboundedInput1", values.advanceWatermarkToInfinity())
      .apply(
          "unboundedInput1.fixedWindow1year",
          Window.into(FixedWindows.of(Duration.standardDays(365))));
}
 
Example 5
Source File: BeamSqlDslBase.java    From beam with Apache License 2.0 5 votes vote down vote up
private PCollection<Row> prepareUnboundedPCollection2() {
  TestStream.Builder<Row> values = TestStream.create(schemaInTableA);

  Row row = rowsInTableA.get(0);
  values = values.advanceWatermarkTo(new Instant(row.getDateTime("f_timestamp")));
  values = values.addElements(row);

  return PBegin.in(pipeline)
      .apply("unboundedInput2", values.advanceWatermarkToInfinity())
      .apply(
          "unboundedInput2.fixedWindow1year",
          Window.into(FixedWindows.of(Duration.standardDays(365))));
}
 
Example 6
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp)));
    if (stamp == 100) {
      // advance watermark when we have 100 remaining elements
      // all the rest are going to be late elements
      input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp));
    }
  }
  testTimeSortedInput(
      numElements,
      pipeline
          .apply(input.advanceWatermarkToInfinity())
          .apply(
              Window.<Long>into(new GlobalWindows())
                  .withAllowedLateness(Duration.millis(5000))));
}
 
Example 7
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testRequiresTimeSortedInputWithLateData() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp)));
    if (stamp == 100) {
      // advance watermark when we have 100 remaining elements
      // all the rest are going to be late elements
      input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp));
    }
  }
  testTimeSortedInput(
      numElements - 100,
      numElements - 1,
      pipeline.apply(input.advanceWatermarkToInfinity()),
      // cannot validate exactly which data gets dropped, because that is runner dependent
      false);
}
 
Example 8
Source File: WaitTest.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Generates a {@link TestStream} of the given duration containing the values [0, numElements) and
 * the same number of random but monotonic watermark updates, with each element within
 * allowedLateness of the respective watermark update.
 *
 * <p>TODO: Consider moving this into TestStream if it's useful enough.
 */
private PCollection<Long> generateStreamWithBoundedDisorder(
    String name,
    Instant base,
    Duration totalDuration,
    int numElements,
    Duration allowedLateness) {
  TestStream.Builder<Long> stream = TestStream.create(VarLongCoder.of());

  // Generate numElements random watermark updates. After each one also generate an element within
  // allowedLateness of it.
  List<Instant> watermarks = Lists.newArrayList();
  for (int i = 0; i < numElements; ++i) {
    watermarks.add(base.plus(new Duration((long) (totalDuration.getMillis() * Math.random()))));
  }
  Collections.sort(watermarks);

  List<Event<Long>> events = Lists.newArrayList();
  for (int i = 0; i < numElements; ++i) {
    Instant processingTimestamp =
        base.plus((long) (1.0 * i * totalDuration.getMillis() / (numElements + 1)));
    Instant watermark = watermarks.get(i);
    Instant elementTimestamp =
        watermark.minus((long) (Math.random() * allowedLateness.getMillis()));
    events.add(new Event<>(processingTimestamp, watermark));
    events.add(new Event<>(processingTimestamp, TimestampedValue.of((long) i, elementTimestamp)));
  }

  Instant lastProcessingTime = base;
  for (Event<Long> event : events) {
    Duration processingTimeDelta = new Duration(lastProcessingTime, event.processingTime);
    if (processingTimeDelta.getMillis() > 0) {
      stream = stream.advanceProcessingTime(processingTimeDelta);
    }
    lastProcessingTime = event.processingTime;

    if (event.element != null) {
      stream = stream.addElements(event.element);
    } else {
      stream = stream.advanceWatermarkTo(event.watermarkUpdate);
    }
  }
  return p.apply(name, stream.advanceWatermarkToInfinity());
}
 
Example 9
Source File: ParDoTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testTwoRequiresTimeSortedInputWithLateData() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp)));
    if (stamp == 100) {
      // advance watermark when we have 100 remaining elements
      // all the rest are going to be late elements
      input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp));
    }
  }
  // apply the sorted function for the first time
  PCollection<Long> first =
      pipeline
          .apply(input.advanceWatermarkToInfinity())
          .apply(WithTimestamps.of(e -> Instant.ofEpochMilli(e)))
          .apply(
              "first.MapElements",
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.longs()))
                  .via(e -> KV.of("", e)))
          .apply("first.ParDo", ParDo.of(timeSortedDoFn()))
          .apply(MapElements.into(TypeDescriptors.longs()).via(e -> (long) e));
  // apply the test to the already sorted outcome so that we test that we don't loose any
  // more data
  testTimeSortedInputAlreadyHavingStamps(
      numElements - 100,
      numElements - 1,
      first,
      // cannot validate exactly which data gets dropped, because that is runner dependent
      false);
}