org.apache.beam.sdk.testing.TestStream Java Examples

The following examples show how to use org.apache.beam.sdk.testing.TestStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReshuffleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesTestStream.class})
public void testReshuffleWithTimestampsStreaming() {
  TestStream<Long> stream =
      TestStream.create(VarLongCoder.of())
          .advanceWatermarkTo(new Instant(0L).plus(Duration.standardDays(48L)))
          .addElements(
              TimestampedValue.of(0L, new Instant(0L)),
              TimestampedValue.of(1L, new Instant(0L).plus(Duration.standardDays(48L))),
              TimestampedValue.of(
                  2L, BoundedWindow.TIMESTAMP_MAX_VALUE.minus(Duration.standardDays(48L))))
          .advanceWatermarkToInfinity();
  PCollection<KV<String, Long>> input =
      pipeline
          .apply(stream)
          .apply(WithKeys.of(""))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L))));

  PCollection<KV<String, Long>> reshuffled = input.apply(Reshuffle.of());
  PAssert.that(reshuffled.apply(Values.create())).containsInAnyOrder(0L, 1L, 2L);

  pipeline.run();
}
 
Example #2
Source File: TestStreamTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Produces a {@link RunnerApi.TestStreamPayload} from a {@link TestStream}. */
static <T> RunnerApi.TestStreamPayload payloadForTestStream(
    final TestStream<T> transform, SdkComponents components) throws IOException {
  List<RunnerApi.TestStreamPayload.Event> protoEvents = new ArrayList<>();
  try {
    for (TestStream.Event<T> event : transform.getEvents()) {
      protoEvents.add(eventToProto(event, transform.getValueCoder()));
    }
  } catch (IOException e) {
    throw new RuntimeException(e);
  }

  return RunnerApi.TestStreamPayload.newBuilder()
      .setCoderId(components.registerCoder(transform.getValueCoder()))
      .addAllEvents(protoEvents)
      .build();
}
 
Example #3
Source File: SpannerIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void streamingWritesNoGrouping() throws Exception {

  // verify that grouping/sorting does not occur - batches should be created in received order.
  TestStream<Mutation> testStream =
      TestStream.create(SerializableCoder.of(Mutation.class))
          .addElements(m(1L), m(5L), m(2L), m(4L), m(3L), m(6L))
          .advanceWatermarkToInfinity();

  // verify that grouping/sorting does not occur when notset.
  pipeline
      .apply(testStream)
      .apply(
          SpannerIO.write()
              .withProjectId("test-project")
              .withInstanceId("test-instance")
              .withDatabaseId("test-database")
              .withServiceFactory(serviceFactory)
              .withMaxNumRows(2));
  pipeline.run();

  verifyBatches(batch(m(1L), m(5L)), batch(m(2L), m(4L)), batch(m(3L), m(6L)));
}
 
Example #4
Source File: TestStreamTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRegistrarEncodedProto() throws Exception {
  PCollection<String> output = p.apply(testStream);

  AppliedPTransform<PBegin, PCollection<String>, TestStream<String>> appliedTestStream =
      AppliedPTransform.of("fakeName", PBegin.in(p).expand(), output.expand(), testStream, p);

  SdkComponents components = SdkComponents.create();
  components.registerEnvironment(Environments.createDockerEnvironment("java"));
  RunnerApi.FunctionSpec spec =
      PTransformTranslation.toProto(appliedTestStream, components).getSpec();

  assertThat(spec.getUrn(), equalTo(TEST_STREAM_TRANSFORM_URN));

  RunnerApi.TestStreamPayload payload = TestStreamPayload.parseFrom(spec.getPayload());

  verifyTestStreamEncoding(
      testStream, payload, RehydratedComponents.forComponents(components.toComponents()));
}
 
Example #5
Source File: StatefulTeamScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example #6
Source File: TestStreamTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T> void verifyTestStreamEncoding(
    TestStream<T> testStream,
    RunnerApi.TestStreamPayload payload,
    RehydratedComponents protoComponents)
    throws Exception {

  // This reverse direction is only valid for Java-based coders
  assertThat(protoComponents.getCoder(payload.getCoderId()), equalTo(testStream.getValueCoder()));

  assertThat(payload.getEventsList().size(), equalTo(testStream.getEvents().size()));

  for (int i = 0; i < payload.getEventsList().size(); ++i) {
    assertThat(
        TestStreamTranslation.eventFromProto(payload.getEvents(i), testStream.getValueCoder()),
        equalTo(testStream.getEvents().get(i)));
  }
}
 
Example #7
Source File: TestStreamTranslation.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Converts an {@link AppliedPTransform}, which may be a rehydrated transform or an original
 * {@link TestStream}, to a {@link TestStream}.
 */
public static <T> TestStream<T> getTestStream(
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> application)
    throws IOException {
  // For robustness, we don't take this shortcut:
  // if (application.getTransform() instanceof TestStream) {
  //   return application.getTransform()
  // }

  SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions());
  RunnerApi.PTransform transformProto = PTransformTranslation.toProto(application, sdkComponents);
  checkArgument(
      TEST_STREAM_TRANSFORM_URN.equals(transformProto.getSpec().getUrn()),
      "Attempt to get %s from a transform with wrong URN %s",
      TestStream.class.getSimpleName(),
      transformProto.getSpec().getUrn());
  RunnerApi.TestStreamPayload testStreamPayload =
      RunnerApi.TestStreamPayload.parseFrom(transformProto.getSpec().getPayload());

  return (TestStream<T>)
      testStreamFromProtoPayload(
          testStreamPayload, RehydratedComponents.forComponents(sdkComponents.toComponents()));
}
 
Example #8
Source File: SpannerIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void streamingWritesWithGrouping() throws Exception {

  // verify that grouping/sorting occurs when set.
  TestStream<Mutation> testStream =
      TestStream.create(SerializableCoder.of(Mutation.class))
          .addElements(m(1L), m(5L), m(2L), m(4L), m(3L), m(6L))
          .advanceWatermarkToInfinity();
  pipeline
      .apply(testStream)
      .apply(
          SpannerIO.write()
              .withProjectId("test-project")
              .withInstanceId("test-instance")
              .withDatabaseId("test-database")
              .withServiceFactory(serviceFactory)
              .withGroupingFactor(40)
              .withMaxNumRows(2));
  pipeline.run();

  // Output should be batches of sorted mutations.
  verifyBatches(batch(m(1L), m(2L)), batch(m(3L), m(4L)), batch(m(5L), m(6L)));
}
 
Example #9
Source File: StatefulParDoEvaluatorFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() {
  Instant now = Instant.ofEpochMilli(0);
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))
                  .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2)))
                  .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1)))
                  .advanceWatermarkTo(now.plus(1))
                  .addElements(TimestampedValue.of(KV.of("", 3), now))
                  .advanceWatermarkToInfinity())
          .apply(
              Window.<KV<String, Integer>>into(new GlobalWindows())
                  .withAllowedLateness(Duration.millis(2)));
  PCollection<String> result = input.apply(ParDo.of(statefulConcat()));
  PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1");
  pipeline.run();
}
 
Example #10
Source File: BigQueryChangeApplierTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
void testSchemasEmittedOnlyOnChanges() {
  TestStream<Row> testSream = TestStream
      .create(SerializableCoder.of(Row.class))
      .addElements(testInsertRecord(
          Row.withSchema(RECORD_SCHEMA1)
              .addValues("k1", 1, DateTime.now(), "bytes".getBytes()).build()),
          testInsertRecord(
              Row.withSchema(RECORD_SCHEMA1)
                  .addValues("k1", 2, DateTime.now(), "bytes".getBytes()).build()))
      .advanceWatermarkTo(Instant.now())
      .advanceWatermarkToInfinity();

  Pipeline p = Pipeline.create();

  PCollection<Row> input = p.apply(testSream).setRowSchema(UPDATE_RECORD_SCHEMA);

  PCollection<KV<String, KV<Schema, Schema>>> tableSchemaCollection =
      BigQueryChangeApplier.buildTableSchemaCollection(input);

  PAssert.that(tableSchemaCollection).containsInAnyOrder(
      KV.of(TABLE_NAME, KV.of(KEY_SCHEMA, RECORD_SCHEMA1)));
  p.run().waitUntilFinish();
}
 
Example #11
Source File: StatefulTeamScoreTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example #12
Source File: ReifyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStream.class})
public void globalWindowNoKeys() {
  PCollection<ValueInSingleWindow<String>> result =
      pipeline
          .apply(
              TestStream.create(StringUtf8Coder.of())
                  .addElements(TimestampedValue.of("dei", new Instant(123L)))
                  .advanceWatermarkToInfinity())
          .apply(Reify.windows());
  PAssert.that(result)
      .containsInAnyOrder(
          ValueInSingleWindow.of(
              "dei", new Instant(123L), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
  pipeline.run();
}
 
Example #13
Source File: TestUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Builds an unbounded {@link PCollection} in {@link Pipeline} set by {@link
 * #inPipeline(Pipeline)}.
 *
 * <p>If timestamp field was set with {@link #withTimestampField(String)} then watermark will be
 * advanced to the values from that field.
 */
public PCollection<Row> buildUnbounded() {
  checkArgument(pipeline != null);
  checkArgument(rows.size() > 0);

  if (type == null) {
    type = rows.get(0).getSchema();
  }

  TestStream.Builder<Row> values = TestStream.create(type);

  for (Row row : rows) {
    if (timestampField != null) {
      values = values.advanceWatermarkTo(new Instant(row.getDateTime(timestampField)));
    }

    values = values.addElements(row);
  }

  return PBegin.in(pipeline).apply("unboundedPCollection", values.advanceWatermarkToInfinity());
}
 
Example #14
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testRequiresTimeSortedInputWithTestStream() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> stream = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    stream = stream.addElements(stamp);
  }
  testTimeSortedInput(numElements, pipeline.apply(stream.advanceWatermarkToInfinity()));
}
 
Example #15
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesTimersInParDo.class,
  UsesTestStream.class,
  UsesStatefulParDo.class,
  UsesStrictTimerOrdering.class
})
public void testEventTimeTimerOrdering() throws Exception {
  final int numTestElements = 100;
  final Instant now = new Instant(1500000000000L);
  TestStream.Builder<KV<String, String>> builder =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(new Instant(0));

  for (int i = 0; i < numTestElements; i++) {
    builder =
        builder.addElements(TimestampedValue.of(KV.of("dummy", "" + i), now.plus(i * 1000)));
    if ((i + 1) % 10 == 0) {
      builder = builder.advanceWatermarkTo(now.plus((i + 1) * 1000));
    }
  }

  testEventTimeTimerOrderingWithInputPTransform(
      now, numTestElements, builder.advanceWatermarkToInfinity());
}
 
Example #16
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesTimersInParDo.class,
  UsesTestStream.class,
  UsesStrictTimerOrdering.class
})
public void testTwoTimersSettingEachOther() {
  Instant now = new Instant(1500000000000L);
  Instant end = now.plus(100);
  TestStream<KV<Void, Void>> input =
      TestStream.create(KvCoder.of(VoidCoder.of(), VoidCoder.of()))
          .addElements(KV.of(null, null))
          .advanceWatermarkToInfinity();
  pipeline.apply(TwoTimerTest.of(now, end, input));
  pipeline.run();
}
 
Example #17
Source File: FileBasedDeadLetterQueueReconsumerTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
public void testAllFilesAreConsumed() throws IOException {
  TestStream<String> inputFiles = TestStream.create(StringUtf8Coder.of())
      .addElements(
          createJsonFile("dlqFile1.json", JSON_FILE_CONTENTS_1),
          createJsonFile("dlqFile2.json", JSON_FILE_CONTENTS_1))
      .addElements(createJsonFile("dlqFile3.json", JSON_FILE_CONTENTS_1))
      .advanceWatermarkToInfinity();

  PCollection<String> jsonData = p.apply(inputFiles)
      .apply(FileIO.matchAll())
      .apply(FileBasedDeadLetterQueueReconsumer.moveAndConsumeMatches());

  PAssert.that(jsonData)
      .containsInAnyOrder(
          Stream.of(JSON_FILE_CONTENTS_1)
              .flatMap(line -> Stream.of(line, line, line))
              .collect(Collectors.toList()));

  p.run().waitUntilFinish();
}
 
Example #18
Source File: SpannerIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void streamingWrites() throws Exception {
  TestStream<Mutation> testStream =
      TestStream.create(SerializableCoder.of(Mutation.class))
          .addElements(m(1L), m(2L))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(m(3L), m(4L))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(m(5L), m(6L))
          .advanceWatermarkToInfinity();
  pipeline
      .apply(testStream)
      .apply(
          SpannerIO.write()
              .withProjectId("test-project")
              .withInstanceId("test-instance")
              .withDatabaseId("test-database")
              .withServiceFactory(serviceFactory));
  pipeline.run();

  verifyBatches(batch(m(1L), m(2L)), batch(m(3L), m(4L)), batch(m(5L), m(6L)));
}
 
Example #19
Source File: StateAndTimersTest.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
private static TestStream<KV<String, VisitOrImpression>> createStream() {
// Impressions and visits, in event-time order, for two (logical) attributable impressions and one unattributable impression.
       Impression signupImpression = new Impression(123L, "http://search.com?q=xyz", "http://xyz.com/", Utils.parseTime("12:01:00"));
       Visit signupVisit = new Visit("http://xyz.com/", Utils.parseTime("12:01:10"), "http://search.com?q=xyz", false/*isGoal*/);
       Visit signupGoal = new Visit("http://xyz.com/join-mailing-list", Utils.parseTime("12:01:30"), "http://xyz.com/", true/*isGoal*/);

       Impression shoppingImpression = new Impression(456L, "http://search.com?q=thing", "http://xyz.com/thing", Utils.parseTime("12:02:00"));
       Impression shoppingImpressionDup = new Impression(789L, "http://search.com?q=thing", "http://xyz.com/thing", Utils.parseTime("12:02:10"));
       Visit shoppingVisit1 = new Visit("http://xyz.com/thing", Utils.parseTime("12:02:30"), "http://search.com?q=thing", false/*isGoal*/);
       Visit shoppingVisit2 = new Visit("http://xyz.com/thing/add-to-cart", Utils.parseTime("12:03:00"), "http://xyz.com/thing", false/*isGoal*/);
       Visit shoppingVisit3 = new Visit("http://xyz.com/thing/purchase", Utils.parseTime("12:03:20"), "http://xyz.com/thing/add-to-cart", false/*isGoal*/);
       Visit shoppingGoal = new Visit("http://xyz.com/thing/receipt", Utils.parseTime("12:03:45"), "http://xyz.com/thing/purchase", true/*isGoal*/);

       Impression unattributedImpression = new Impression(000L, "http://search.com?q=thing", "http://xyz.com/other-thing", Utils.parseTime("12:04:00"));
       Visit unattributedVisit = new Visit("http://xyz.com/other-thing", Utils.parseTime("12:04:20"), "http://search.com?q=other thing", false/*isGoal*/);

       // Create a stream of visits and impressions, with data arriving out of order.
       return TestStream.create(
           KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(VisitOrImpression.class)))
           .advanceWatermarkTo(Utils.parseTime("12:00:00"))
           .addElements(visitOrImpression(shoppingVisit2, null))
           .addElements(visitOrImpression(shoppingGoal, null))
           .addElements(visitOrImpression(shoppingVisit3, null))
           .addElements(visitOrImpression(signupGoal, null))
           .advanceWatermarkTo(Utils.parseTime("12:00:30"))
           .addElements(visitOrImpression(null, signupImpression))
           .advanceWatermarkTo(Utils.parseTime("12:01:00"))
           .addElements(visitOrImpression(null, shoppingImpression))
           .addElements(visitOrImpression(signupVisit, null))
           .advanceWatermarkTo(Utils.parseTime("12:01:30"))
           .addElements(visitOrImpression(null, shoppingImpressionDup))
           .addElements(visitOrImpression(shoppingVisit1, null))
           .advanceWatermarkTo(Utils.parseTime("12:03:45"))
           .addElements(visitOrImpression(null, unattributedImpression))
           .advanceWatermarkTo(Utils.parseTime("12:04:00"))
           .addElements(visitOrImpression(unattributedVisit, null))
           .advanceWatermarkToInfinity();
   }
 
Example #20
Source File: TestJetRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Vertex translate(
    Pipeline pipeline,
    AppliedPTransform<?, ?, ?> appliedTransform,
    TransformHierarchy.Node node,
    JetTranslationContext context) {
  String transformName = appliedTransform.getFullName();
  DAGBuilder dagBuilder = context.getDagBuilder();
  String vertexId = dagBuilder.newVertexId(transformName);

  TestStream<T> testStream = (TestStream<T>) appliedTransform.getTransform();

  // events in the transform are not serializable, we have to translate them. We'll also flatten
  // the collection.
  Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform);
  Coder outputCoder = Utils.getCoder((PCollection) output.getValue());
  TestStream.TestStreamCoder<T> payloadCoder =
      TestStream.TestStreamCoder.of(testStream.getValueCoder());
  byte[] encodedPayload = getEncodedPayload(testStream, payloadCoder);
  Vertex vertex =
      dagBuilder.addVertex(
          vertexId, TestStreamP.supplier(encodedPayload, payloadCoder, outputCoder));

  String outputEdgeId = Utils.getTupleTagId(output.getValue());
  dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId());
  dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder);
  return vertex;
}
 
Example #21
Source File: DeduplicateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStream.class})
public void testEventTime() {
  Instant base = new Instant(0);
  TestStream<String> values =
      TestStream.create(StringUtf8Coder.of())
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of("k1", base),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))),
              TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59))))
          .advanceWatermarkTo(base.plus(Duration.standardMinutes(1)))
          .addElements(
              TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkTo(
              base.plus(Duration.standardMinutes(1)).plus(Deduplicate.DEFAULT_DURATION))
          .addElements(TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59))))
          .advanceWatermarkToInfinity();

  PCollection<String> distinctValues =
      p.apply(values).apply(Deduplicate.<String>values().withTimeDomain(TimeDomain.EVENT_TIME));
  PAssert.that(distinctValues)
      .satisfies(
          (Iterable<String> input) -> {
            assertEquals(1, Iterables.frequency(input, "k1"));
            assertEquals(1, Iterables.frequency(input, "k2"));
            assertEquals(1, Iterables.frequency(input, "k3"));
            assertTrue(
                Iterables.frequency(input, "maybedup") == 1
                    || Iterables.frequency(input, "maybedup") == 2);
            return null;
          });
  p.run();
}
 
Example #22
Source File: TestStreamTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
private <T> RunnerApi.FunctionSpec translateTyped(
    final TestStream<T> testStream, SdkComponents components) throws IOException {
  return RunnerApi.FunctionSpec.newBuilder()
      .setUrn(TEST_STREAM_TRANSFORM_URN)
      .setPayload(payloadForTestStream(testStream, components).toByteString())
      .build();
}
 
Example #23
Source File: DeduplicateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testProcessingTime() {
  Instant base = new Instant(0);
  TestStream<String> values =
      TestStream.create(StringUtf8Coder.of())
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of("k1", base),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))),
              TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))))
          .advanceProcessingTime(Deduplicate.DEFAULT_DURATION)
          .addElements(TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59))))
          .advanceWatermarkToInfinity();

  PCollection<String> distinctValues = p.apply(values).apply(Deduplicate.values());
  PAssert.that(distinctValues)
      .satisfies(
          (Iterable<String> input) -> {
            assertEquals(1, Iterables.frequency(input, "k1"));
            assertEquals(1, Iterables.frequency(input, "k2"));
            assertEquals(1, Iterables.frequency(input, "k3"));
            assertTrue(
                Iterables.frequency(input, "maybedup") == 1
                    || Iterables.frequency(input, "maybedup") == 2);
            return null;
          });
  p.run();
}
 
Example #24
Source File: DeduplicateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testRepresentativeValuesWithCoder() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      p.apply(values)
          .apply(
              Deduplicate.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeCoder(VarIntCoder.of()));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3"));
  p.run();
}
 
Example #25
Source File: DeduplicateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredRepresentativeValuesWithType() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      p.apply(values)
          .apply(
              Deduplicate.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeCoder(VarIntCoder.of()));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3"));
  p.run();
}
 
Example #26
Source File: TestStreamTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<TestStream<?>> data() {
  return ImmutableList.of(
      TestStream.create(VarIntCoder.of()).advanceWatermarkToInfinity(),
      TestStream.create(VarIntCoder.of())
          .advanceWatermarkTo(new Instant(42))
          .advanceWatermarkToInfinity(),
      TestStream.create(VarIntCoder.of())
          .addElements(TimestampedValue.of(3, new Instant(17)))
          .advanceWatermarkToInfinity(),
      TestStream.create(StringUtf8Coder.of())
          .advanceProcessingTime(Duration.millis(82))
          .advanceWatermarkToInfinity());
}
 
Example #27
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp)));
    if (stamp == 100) {
      // advance watermark when we have 100 remaining elements
      // all the rest are going to be late elements
      input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp));
    }
  }
  testTimeSortedInput(
      numElements,
      pipeline
          .apply(input.advanceWatermarkToInfinity())
          .apply(
              Window.<Long>into(new GlobalWindows())
                  .withAllowedLateness(Duration.millis(5000))));
}
 
Example #28
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesStatefulParDo.class,
  UsesRequiresTimeSortedInput.class,
  UsesStrictTimerOrdering.class,
  UsesTestStream.class
})
public void testRequiresTimeSortedInputWithLateData() {
  // generate list long enough to rule out random shuffle in sorted order
  int numElements = 1000;
  List<Long> eventStamps =
      LongStream.range(0, numElements)
          .mapToObj(i -> numElements - i)
          .collect(Collectors.toList());
  TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of());
  for (Long stamp : eventStamps) {
    input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp)));
    if (stamp == 100) {
      // advance watermark when we have 100 remaining elements
      // all the rest are going to be late elements
      input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp));
    }
  }
  testTimeSortedInput(
      numElements - 100,
      numElements - 1,
      pipeline.apply(input.advanceWatermarkToInfinity()),
      // cannot validate exactly which data gets dropped, because that is runner dependent
      false);
}
 
Example #29
Source File: TestStreamTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
static <T> TestStream.Event<T> eventFromProto(
    RunnerApi.TestStreamPayload.Event protoEvent, Coder<T> coder) throws IOException {
  switch (protoEvent.getEventCase()) {
    case WATERMARK_EVENT:
      return TestStream.WatermarkEvent.advanceTo(
          new Instant(protoEvent.getWatermarkEvent().getNewWatermark()));
    case PROCESSING_TIME_EVENT:
      return TestStream.ProcessingTimeEvent.advanceBy(
          Duration.millis(protoEvent.getProcessingTimeEvent().getAdvanceDuration()));
    case ELEMENT_EVENT:
      List<TimestampedValue<T>> decodedElements = new ArrayList<>();
      for (RunnerApi.TestStreamPayload.TimestampedElement element :
          protoEvent.getElementEvent().getElementsList()) {
        decodedElements.add(
            TimestampedValue.of(
                CoderUtils.decodeFromByteArray(coder, element.getEncodedElement().toByteArray()),
                new Instant(element.getTimestamp())));
      }
      return TestStream.ElementEvent.add(decodedElements);
    case EVENT_NOT_SET:
    default:
      throw new IllegalArgumentException(
          String.format(
              "Unsupported type of %s: %s",
              RunnerApi.TestStreamPayload.Event.class.getCanonicalName(),
              protoEvent.getEventCase()));
  }
}
 
Example #30
Source File: DeduplicateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStream.class})
public void testInDifferentWindows() {
  Instant base = new Instant(0);
  TestStream<String> values =
      TestStream.create(StringUtf8Coder.of())
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of("k1", base),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))),
              TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))),
              TimestampedValue.of("k4", base.plus(Duration.standardSeconds(60))),
              TimestampedValue.of("k5", base.plus(Duration.standardSeconds(70))),
              TimestampedValue.of("k6", base.plus(Duration.standardSeconds(80))))
          .advanceWatermarkToInfinity();

  PCollection<String> distinctValues =
      p.apply(values)
          .apply(Window.into(FixedWindows.of(Duration.standardSeconds(30))))
          .apply(Deduplicate.values());
  PAssert.that(distinctValues)
      .inWindow(new IntervalWindow(base, base.plus(Duration.standardSeconds(30))))
      .containsInAnyOrder("k1", "k2", "k3");
  PAssert.that(distinctValues)
      .inWindow(
          new IntervalWindow(
              base.plus(Duration.standardSeconds(30)), base.plus(Duration.standardSeconds(60))))
      .containsInAnyOrder("k1", "k2", "k3");
  PAssert.that(distinctValues)
      .inWindow(
          new IntervalWindow(
              base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(90))))
      .containsInAnyOrder("k4", "k5", "k6");
  p.run();
}