Java Code Examples for org.apache.beam.sdk.testing.TestPipeline#create()

The following examples show how to use org.apache.beam.sdk.testing.TestPipeline#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a batch job terminates in a failure state even if all assertions passed, it
 * throws an error to that effect.
 */
@Test
public void testRunBatchJobThatFails() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, false /* tentative */));
  expectedException.expect(RuntimeException.class);
  runner.run(p, mockRunner);
  // Note that fail throws an AssertionError which is why it is placed out here
  // instead of inside the try-catch block.
  fail("AssertionError expected");
}
 
Example 2
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a streaming pipeline terminates and doesn't fail due to {@link PAssert} that
 * the {@link TestPipelineOptions#setOnSuccessMatcher(SerializableMatcher) on success matcher} is
 * invoked.
 */
@Test
public void testStreamingOnSuccessMatcherWhenPipelineSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestSuccessMatcher(mockJob, 1));

  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  runner.run(p, mockRunner);
}
 
Example 3
Source File: PCollectionListTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testEquals() {
  Pipeline p = TestPipeline.create();
  PCollection<String> first = p.apply("Meta", Create.of("foo", "bar"));
  PCollection<String> second = p.apply("Pythonic", Create.of("spam, ham"));
  PCollection<String> third = p.apply("Syntactic", Create.of("eggs", "baz"));

  EqualsTester tester = new EqualsTester();
  //    tester.addEqualityGroup(PCollectionList.empty(p), PCollectionList.empty(p));
  //    tester.addEqualityGroup(PCollectionList.of(first).and(second));
  // Constructors should all produce equivalent
  tester.addEqualityGroup(
      PCollectionList.of(first).and(second).and(third),
      PCollectionList.of(first).and(second).and(third),
      //        PCollectionList.<String>empty(p).and(first).and(second).and(third),
      //        PCollectionList.of(ImmutableList.of(first, second, third)),
      //        PCollectionList.of(first).and(ImmutableList.of(second, third)),
      PCollectionList.of(ImmutableList.of(first, second)).and(third));
  // Order is considered
  tester.addEqualityGroup(PCollectionList.of(first).and(third).and(second));
  tester.addEqualityGroup(PCollectionList.empty(TestPipeline.create()));

  tester.testEquals();
}
 
Example 4
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a streaming pipeline terminates in FAIL that the {@link
 * TestPipelineOptions#setOnSuccessMatcher(SerializableMatcher) on success matcher} is not
 * invoked.
 */
@Test
public void testStreamingOnSuccessMatcherWhenPipelineFails() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestFailureMatcher());

  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.FAILED);

  expectedException.expect(RuntimeException.class);
  runner.run(p, mockRunner);
  // If the onSuccessMatcher were invoked, it would have crashed here with AssertionError
}
 
Example 5
Source File: AbstractOperatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Run all tests with given runner.
 *
 * @param tc the test case to executeSync
 */
@SuppressWarnings("unchecked")
public <T> void execute(TestCase<T> tc) {

  final SingleJvmAccumulatorProvider.Factory accumulatorProvider =
      SingleJvmAccumulatorProvider.Factory.get();
  final PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
  final EuphoriaOptions euphoriaOptions = pipelineOptions.as(EuphoriaOptions.class);
  euphoriaOptions.setAccumulatorProviderFactory(accumulatorProvider);
  final Pipeline pipeline = TestPipeline.create(pipelineOptions);
  pipeline.getCoderRegistry().registerCoderForClass(Object.class, KryoCoder.of(pipelineOptions));
  final PCollection<T> output = tc.getOutput(pipeline);
  tc.validate(output);
  pipeline.run().waitUntilFinish();
  tc.validateAccumulators(accumulatorProvider);
}
 
Example 6
Source File: TCompBoundedSourceSinkAdapterTest.java    From components with Apache License 2.0 6 votes vote down vote up
@Test
public void testSource() {
    Pipeline pipeline = TestPipeline.create();

    FixedFlowProperties fixedFlowProperties = new FixedFlowProperties("fixedFlowProperties");
    fixedFlowProperties.init();
    fixedFlowProperties.data.setValue("a;b;c");
    fixedFlowProperties.rowDelimited.setValue(";");


    FixedFlowSource fixedFlowSource = new FixedFlowSource();
    fixedFlowSource.initialize(null, fixedFlowProperties);

    TCompBoundedSourceAdapter source = new TCompBoundedSourceAdapter(fixedFlowSource);

    PCollection<String> result = pipeline.apply(Read.from(source)).apply(ParDo.of(new DoFn<IndexedRecord, String>() {
        @DoFn.ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().get(0).toString());
        }
    }));

    PAssert.that(result).containsInAnyOrder(Arrays.asList("a", "b", "c"));

    pipeline.run();
}
 
Example 7
Source File: WriteFilesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Performs a WriteFiles transform with the desired number of shards. Verifies the WriteFiles
 * transform calls the appropriate methods on a test sink in the correct order, as well as
 * verifies that the elements of a PCollection are written to the sink. If numConfiguredShards is
 * not null, also verifies that the output number of shards is correct.
 */
private void runShardedWrite(
    List<String> inputs,
    PTransform<PCollection<String>, PCollection<String>> transform,
    String baseName,
    WriteFiles<String, ?, String> write,
    BiFunction<Integer, List<String>, Void> shardContentChecker)
    throws IOException {
  // Flag to validate that the pipeline options are passed to the Sink
  WriteOptions options = TestPipeline.testingPipelineOptions().as(WriteOptions.class);
  options.setTestFlag("test_value");
  Pipeline p = TestPipeline.create(options);

  // Prepare timestamps for the elements.
  List<Long> timestamps = new ArrayList<>();
  for (long i = 0; i < inputs.size(); i++) {
    timestamps.add(i + 1);
  }
  p.apply(Create.timestamped(inputs, timestamps).withCoder(StringUtf8Coder.of()))
      .apply(transform)
      .apply(write)
      .getPerDestinationOutputFilenames()
      .apply(new VerifyFilesExist<>());
  p.run();

  Optional<Integer> numShards =
      (write.getNumShardsProvider() != null && !write.getWindowedWrites())
          ? Optional.of(write.getNumShardsProvider().get())
          : Optional.absent();
  checkFileContents(baseName, inputs, numShards, !write.getWindowedWrites(), shardContentChecker);
}
 
Example 8
Source File: TCompBoundedSourceSinkAdapterTest.java    From components with Apache License 2.0 5 votes vote down vote up
@Test
public void testSink() {
    Pipeline pipeline = TestPipeline.create();

    AssertResultProperties assertResultProperties = new AssertResultProperties("assertResultProperties");
    assertResultProperties.init();
    assertResultProperties.data.setValue("b;c;a");
    assertResultProperties.rowDelimited.setValue(";");

    AssertResultSink assertResultSink = new AssertResultSink();
    assertResultSink.initialize(null, assertResultProperties);

    TCompSinkAdapter sink = new TCompSinkAdapter(assertResultSink);

    final String schemaStr = assertResultProperties.schema.getValue().toString();

    pipeline.apply(Create.of("a", "b", "c")).apply(ParDo.of(new DoFn<String, IndexedRecord>() {
        @DoFn.ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            IndexedRecord row = new GenericData.Record(new Schema.Parser().parse(schemaStr));
            row.put(0,c.element());
            c.output(row);
        }
    })).setCoder(LazyAvroCoder.of()).apply(Write.to(sink));

    pipeline.run();
}
 
Example 9
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetJobMetricsThatFailsForException() throws Exception {
  DataflowPipelineJob job = spy(new DataflowPipelineJob(mockClient, "test-job", options, null));
  Pipeline p = TestPipeline.create(options);
  p.apply(Create.of(1, 2, 3));

  when(mockClient.getJobMetrics(anyString())).thenThrow(new IOException());
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  assertNull(runner.getJobMetrics(job));
}
 
Example 10
Source File: KuduIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteThenRead() throws Exception {
  runWrite();
  runReadAll();
  readPipeline = TestPipeline.create();
  runReadProjectedColumns();
  readPipeline = TestPipeline.create();
  runReadWithPredicates();
}
 
Example 11
Source File: DatastoreV1Test.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test to ensure that {@link ValueProvider} values are not accessed at pipeline construction time
 * when built with {@link DatastoreV1.Read#withLiteralGqlQuery(String)}.
 */
@Test
public void testRuntimeOptionsNotCalledInApplyGqlQuery() {
  RuntimeTestOptions options = PipelineOptionsFactory.as(RuntimeTestOptions.class);
  Pipeline pipeline = TestPipeline.create(options);
  pipeline
      .apply(
          DatastoreIO.v1()
              .read()
              .withProjectId(options.getDatastoreProject())
              .withLiteralGqlQuery(options.getGqlQuery()))
      .apply(DatastoreIO.v1().write().withProjectId(options.getDatastoreProject()));
}
 
Example 12
Source File: DatastoreV1Test.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test to ensure that {@link ValueProvider} values are not accessed at pipeline construction time
 * when built with {@link DatastoreV1.Read#withQuery(Query)}.
 */
@Test
public void testRuntimeOptionsNotCalledInApplyQuery() {
  RuntimeTestOptions options = PipelineOptionsFactory.as(RuntimeTestOptions.class);
  Pipeline pipeline = TestPipeline.create(options);
  pipeline
      .apply(
          DatastoreIO.v1()
              .read()
              .withProjectId(options.getDatastoreProject())
              .withQuery(QUERY)
              .withNamespace(options.getNamespace()))
      .apply(DatastoreIO.v1().write().withProjectId(options.getDatastoreProject()));
}
 
Example 13
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that if a streaming pipeline crash loops for a non-assertion reason that the test run
 * throws an {@link AssertionError}.
 *
 * <p>This is a known limitation/bug of the runner that it does not distinguish the two modes of
 * failure.
 */
@Test
public void testStreamingPipelineFailsIfException() throws Exception {
  options.setStreaming(true);
  Pipeline pipeline = TestPipeline.create(options);
  PCollection<Integer> pc = pipeline.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.RUNNING);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenAnswer(
          invocation -> {
            JobMessage message = new JobMessage();
            message.setMessageText("FooException");
            message.setTime(TimeUtil.toCloudTime(Instant.now()));
            message.setMessageImportance("JOB_MESSAGE_ERROR");
            ((JobMessagesHandler) invocation.getArguments()[1]).process(Arrays.asList(message));
            return State.CANCELLED;
          });

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(false /* success */, true /* tentative */));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);

  expectedException.expect(RuntimeException.class);
  runner.run(pipeline, mockRunner);
}
 
Example 14
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetJobMetricsThatSucceeds() throws Exception {
  DataflowPipelineJob job = spy(new DataflowPipelineJob(mockClient, "test-job", options, null));
  Pipeline p = TestPipeline.create(options);
  p.apply(Create.of(1, 2, 3));

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  JobMetrics metrics = runner.getJobMetrics(job);

  assertEquals(1, metrics.getMetrics().size());
  assertEquals(
      generateMockMetrics(true /* success */, true /* tentative */), metrics.getMetrics());
}
 
Example 15
Source File: DirectGroupByKeyOverrideFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void getInputSucceeds() {
  TestPipeline p = TestPipeline.create();
  PCollection<KV<String, Integer>> input =
      p.apply(
          Create.of(KV.of("foo", 1))
              .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())));
  PCollection<KV<String, Iterable<Integer>>> grouped = input.apply(GroupByKey.create());
  AppliedPTransform<?, ?, ?> producer = DirectGraphs.getProducer(grouped);
  PTransformReplacement<
          PCollection<KV<String, Integer>>, PCollection<KV<String, Iterable<Integer>>>>
      replacement = factory.getReplacementTransform((AppliedPTransform) producer);
  assertThat(replacement.getInput(), Matchers.<PCollection<?>>equalTo(input));
}
 
Example 16
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchOnSuccessMatcherWhenPipelineFails() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestFailureMatcher());

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(false /* success */, true /* tentative */));
  try {
    runner.run(p, mockRunner);
  } catch (AssertionError expected) {
    verify(mockJob, Mockito.times(1))
        .waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class));
    return;
  }
  fail("Expected an exception on pipeline failure.");
}
 
Example 17
Source File: PCollectionListTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTagNames() {
  Pipeline p = TestPipeline.create();
  PCollection<String> first = p.apply("first", Create.of("1"));
  PCollection<String> second = p.apply("second", Create.of("2"));
  PCollection<String> third = p.apply("third", Create.of("3"));

  PCollectionList<String> list = PCollectionList.of(first).and(second).and(third);
  assertThat(list.pcollections.get(0).getTag().id, equalTo("0"));
  assertThat(list.pcollections.get(1).getTag().id, equalTo("1"));
  assertThat(list.pcollections.get(2).getTag().id, equalTo("2"));
}
 
Example 18
Source File: PCollectionListTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testExpandWithDuplicates() {
  Pipeline p = TestPipeline.create();
  PCollection<Long> createOne = p.apply("CreateOne", Create.of(1L, 2L, 3L));

  PCollectionList<Long> list = PCollectionList.of(createOne).and(createOne).and(createOne);
  assertThat(list.expand().values(), containsInAnyOrder(createOne, createOne, createOne));
}
 
Example 19
Source File: BeamSortRelTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testOrderBy_exception() {
  thrown.expect(UnsupportedOperationException.class);
  thrown.expectMessage("`ORDER BY` is only supported for GlobalWindows");

  String sql =
      "INSERT INTO SUB_ORDER_RAM(order_id, site_id)  SELECT "
          + " order_id, COUNT(*) "
          + "FROM ORDER_DETAILS "
          + "GROUP BY order_id, TUMBLE(order_time, INTERVAL '1' HOUR)"
          + "ORDER BY order_id asc limit 11";

  TestPipeline pipeline = TestPipeline.create();
  compilePipeline(sql, pipeline);
}
 
Example 20
Source File: PCollectionListTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testIterationOrder() {
  Pipeline p = TestPipeline.create();
  PCollection<Long> createOne = p.apply("CreateOne", Create.of(1L, 2L, 3L));
  PCollection<Long> boundedCount = p.apply("CountBounded", GenerateSequence.from(0).to(23));
  PCollection<Long> unboundedCount = p.apply("CountUnbounded", GenerateSequence.from(0));
  PCollection<Long> createTwo = p.apply("CreateTwo", Create.of(-1L, -2L));
  PCollection<Long> maxReadTimeCount =
      p.apply(
          "CountLimited", GenerateSequence.from(0).withMaxReadTime(Duration.standardSeconds(5)));

  ImmutableList<PCollection<Long>> counts =
      ImmutableList.of(boundedCount, maxReadTimeCount, unboundedCount);
  // Build a PCollectionList from a list. This should have the same order as the input list.
  PCollectionList<Long> pcList = PCollectionList.of(counts);
  // Contains is the order-dependent matcher
  assertThat(pcList.getAll(), contains(boundedCount, maxReadTimeCount, unboundedCount));

  // A list that is expanded with builder methods has the added value at the end
  PCollectionList<Long> withOneCreate = pcList.and(createTwo);
  assertThat(
      withOneCreate.getAll(),
      contains(boundedCount, maxReadTimeCount, unboundedCount, createTwo));

  // Lists that are built entirely from the builder return outputs in the order they were added
  PCollectionList<Long> fromEmpty =
      PCollectionList.<Long>empty(p)
          .and(unboundedCount)
          .and(createOne)
          .and(ImmutableList.of(boundedCount, maxReadTimeCount));
  assertThat(
      fromEmpty.getAll(), contains(unboundedCount, createOne, boundedCount, maxReadTimeCount));

  Map<TupleTag<?>, PValue> expansion = fromEmpty.expand();
  // Tag->PValue mappings are stable between expansions. They don't need to be stable across
  // different list instances, though
  assertThat(expansion, equalTo(fromEmpty.expand()));

  List<PCollection<Long>> expectedList =
      ImmutableList.of(unboundedCount, createOne, boundedCount, maxReadTimeCount);
  assertThat(expansion.values(), containsInAnyOrder(expectedList.toArray()));
}