org.apache.beam.sdk.testing.ValidatesRunner Java Examples

The following examples show how to use org.apache.beam.sdk.testing.ValidatesRunner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReshuffleTest.java    From beam with Apache License 2.0 7 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testReshuffleAfterSlidingWindows() {

  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.of(ARBITRARY_KVS)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L))));

  PCollection<KV<String, Integer>> output = input.apply(Reshuffle.of());

  PAssert.that(output).containsInAnyOrder(ARBITRARY_KVS);

  assertEquals(input.getWindowingStrategy(), output.getWindowingStrategy());

  pipeline.run();
}
 
Example #2
Source File: WindowingTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testNonPartitioningWindowing() {
  PCollection<String> input =
      p.apply(
          Create.timestamped(
              TimestampedValue.of("a", new Instant(1)),
              TimestampedValue.of("a", new Instant(7)),
              TimestampedValue.of("b", new Instant(8))));

  PCollection<String> output =
      input.apply(new WindowedCount(SlidingWindows.of(new Duration(10)).every(new Duration(5))));

  PAssert.that(output)
      .containsInAnyOrder(
          output("a", 1, 1, -5, 5),
          output("a", 2, 5, 0, 10),
          output("a", 1, 10, 5, 15),
          output("b", 1, 8, 0, 10),
          output("b", 1, 10, 5, 15));

  p.run();
}
 
Example #3
Source File: ReifyTimestampsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void inValuesSucceeds() {
  PCollection<KV<String, Integer>> timestamped =
      pipeline
          .apply(Create.of(KV.of("foo", 0), KV.of("foo", 1), KV.of("bar", 2), KV.of("baz", 3)))
          .apply(WithTimestamps.of(input -> new Instant(input.getValue().longValue())));

  PCollection<KV<String, TimestampedValue<Integer>>> reified =
      timestamped.apply(ReifyTimestamps.inValues());

  PAssert.that(reified)
      .containsInAnyOrder(
          KV.of("foo", TimestampedValue.of(0, new Instant(0))),
          KV.of("foo", TimestampedValue.of(1, new Instant(1))),
          KV.of("bar", TimestampedValue.of(2, new Instant(2))),
          KV.of("baz", TimestampedValue.of(3, new Instant(3))));

  pipeline.run();
}
 
Example #4
Source File: ReshuffleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testReshuffleAfterSessionsAndGroupByKey() {

  PCollection<KV<String, Iterable<Integer>>> input =
      pipeline
          .apply(
              Create.of(GBK_TESTABLE_KVS)
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.standardMinutes(10))))
          .apply(GroupByKey.create());

  PCollection<KV<String, Iterable<Integer>>> output = input.apply(Reshuffle.of());

  PAssert.that(output).satisfies(new AssertThatHasExpectedContents());

  assertEquals(input.getWindowingStrategy(), output.getWindowingStrategy());

  pipeline.run();
}
 
Example #5
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when two elements are combined via a GroupByKey their output timestamp agrees with
 * the windowing function customized to use the latest value.
 */
@Test
@Category(ValidatesRunner.class)
public void testTimestampCombinerLatest() {
  p.apply(
          Create.timestamped(
              TimestampedValue.of(KV.of(0, "hello"), new Instant(0)),
              TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10))))
      .apply(
          Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(10)))
              .withTimestampCombiner(TimestampCombiner.LATEST))
      .apply(GroupByKey.create())
      .apply(ParDo.of(new AssertTimestamp(new Instant(10))));

  p.run();
}
 
Example #6
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesTimersInParDo.class,
  UsesTestStream.class,
  UsesStatefulParDo.class,
  UsesStrictTimerOrdering.class
})
public void testEventTimeTimerOrdering() throws Exception {
  final int numTestElements = 100;
  final Instant now = new Instant(1500000000000L);
  TestStream.Builder<KV<String, String>> builder =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(new Instant(0));

  for (int i = 0; i < numTestElements; i++) {
    builder =
        builder.addElements(TimestampedValue.of(KV.of("dummy", "" + i), now.plus(i * 1000)));
    if ((i + 1) % 10 == 0) {
      builder = builder.advanceWatermarkTo(now.plus((i + 1) * 1000));
    }
  }

  testEventTimeTimerOrderingWithInputPTransform(
      now, numTestElements, builder.advanceWatermarkToInfinity());
}
 
Example #7
Source File: FlattenTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFlattenWithDifferentInputAndOutputCoders() {
  // This test exists to prevent a regression in Dataflow. It tests a
  // GroupByKey preceded by a Flatten with an SDK-specific input coder.
  PCollection<KV<String, String>> flattenInput =
      p.apply(Create.of(LINES))
          .apply(WithKeys.of("a"))
          .setCoder(SerializableCoder.of(new TypeDescriptor<KV<String, String>>() {}));
  PCollection<String> output =
      PCollectionList.of(flattenInput)
          .apply(Flatten.pCollections())
          .setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
          .apply(GroupByKey.create())
          .apply(Values.create())
          .apply(
              FlatMapElements.into(TypeDescriptors.strings())
                  .via((Iterable<String> values) -> values));
  PAssert.that(output).containsInAnyOrder(LINES);
  p.run();
}
 
Example #8
Source File: MapElementsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testPrimitiveDisplayData() {
  SimpleFunction<Integer, ?> mapFn =
      new SimpleFunction<Integer, Integer>() {
        @Override
        public Integer apply(Integer input) {
          return input;
        }
      };

  MapElements<Integer, ?> map = MapElements.via(mapFn);
  DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();

  Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(map);
  assertThat(
      "MapElements should include the mapFn in its primitive display data",
      displayData,
      hasItem(hasDisplayItem("class", mapFn.getClass())));
}
 
Example #9
Source File: ParDoLifecycleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesStatefulParDo.class, UsesParDoLifecycle.class})
public void testTeardownCalledAfterExceptionInFinishBundleStateful() {
  ExceptionThrowingFn fn = new ExceptionThrowingStatefulFn(MethodForException.FINISH_BUNDLE);
  p.apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3))).apply(ParDo.of(fn));
  try {
    p.run();
    fail("Pipeline should have failed with an exception");
  } catch (Exception e) {
    validate(
        CallState.SETUP,
        CallState.START_BUNDLE,
        CallState.PROCESS_ELEMENT,
        CallState.FINISH_BUNDLE,
        CallState.TEARDOWN);
  }
}
 
Example #10
Source File: KvSwapTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testKvSwap() {
  PCollection<KV<String, Integer>> input =
      p.apply(
          Create.of(Arrays.asList(TABLE))
              .withCoder(
                  KvCoder.of(
                      StringUtf8Coder.of(), NullableCoder.of(BigEndianIntegerCoder.of()))));

  PCollection<KV<Integer, String>> output = input.apply(KvSwap.create());

  PAssert.that(output)
      .containsInAnyOrder(
          KV.of(1, "one"),
          KV.of(2, "two"),
          KV.of(3, "three"),
          KV.of(4, "four"),
          KV.of(4, "dup"),
          KV.of(5, "dup"),
          KV.of((Integer) null, "null"));
  p.run();
}
 
Example #11
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSessionsCombine() {
  PCollection<KV<String, Integer>> input =
      pipeline
          .apply(
              Create.timestamped(
                      TimestampedValue.of(KV.of("a", 1), new Instant(0L)),
                      TimestampedValue.of(KV.of("a", 1), new Instant(4L)),
                      TimestampedValue.of(KV.of("a", 4), new Instant(7L)),
                      TimestampedValue.of(KV.of("b", 1), new Instant(10L)),
                      TimestampedValue.of(KV.of("b", 13), new Instant(16L)))
                  .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.into(Sessions.withGapDuration(Duration.millis(5))));

  PCollection<Integer> sum =
      input.apply(Values.create()).apply(Combine.globally(new SumInts()).withoutDefaults());

  PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn()));

  PAssert.that(sum).containsInAnyOrder(7, 13);
  PAssert.that(sumPerKey)
      .containsInAnyOrder(Arrays.asList(KV.of("a", "114"), KV.of("b", "1"), KV.of("b", "13")));
  pipeline.run();
}
 
Example #12
Source File: WindowingTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testMergingWindowing() {
  PCollection<String> input =
      p.apply(
          Create.timestamped(
              TimestampedValue.of("a", new Instant(1)),
              TimestampedValue.of("a", new Instant(5)),
              TimestampedValue.of("a", new Instant(20))));

  PCollection<String> output =
      input.apply(new WindowedCount(Sessions.withGapDuration(new Duration(10))));

  PAssert.that(output).containsInAnyOrder(output("a", 2, 1, 1, 15), output("a", 1, 20, 20, 30));

  p.run();
}
 
Example #13
Source File: UserScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Integer>> extract =
      input
          .apply(ParDo.of(new ParseEventFn()))
          .apply(
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                  .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

  PAssert.that(extract).empty();

  p.run().waitUntilFinish();
}
 
Example #14
Source File: UserScoreTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Integer>> extract =
      input
          .apply(ParDo.of(new ParseEventFn()))
          .apply(
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                  .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

  PAssert.that(extract).empty();

  p.run().waitUntilFinish();
}
 
Example #15
Source File: UserScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/** Tests ExtractAndSumScore("team"). */
@Test
@Category(ValidatesRunner.class)
public void testTeamScoreSums() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS));

  PCollection<KV<String, Integer>> output =
      input
          .apply(ParDo.of(new ParseEventFn()))
          // Extract and sum teamname/score pairs from the event data.
          .apply("ExtractTeamScore", new ExtractAndSumScore("team"));

  // Check the team score sums.
  PAssert.that(output).containsInAnyOrder(TEAM_SUMS);

  p.run().waitUntilFinish();
}
 
Example #16
Source File: CombineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Tests creation of a per-key {@link Combine} via a Java 8 lambda. */
@Test
@Category(ValidatesRunner.class)
public void testCombinePerKeyLambda() {

  PCollection<KV<String, Integer>> output =
      pipeline
          .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
          .apply(
              Combine.perKey(
                  integers -> {
                    int sum = 0;
                    for (int i : integers) {
                      sum += i;
                    }
                    return sum;
                  }));

  PAssert.that(output).containsInAnyOrder(KV.of("a", 4), KV.of("b", 2), KV.of("c", 4));
  pipeline.run();
}
 
Example #17
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testParDoEmpty() {

  List<Integer> inputs = Arrays.asList();

  PCollection<String> output =
      pipeline
          .apply(Create.of(inputs).withCoder(VarIntCoder.of()))
          .apply("TestDoFn", ParDo.of(new TestDoFn()));

  PAssert.that(output).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));

  pipeline.run();
}
 
Example #18
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesStatefulParDo.class})
public void testValueStateCoderInferenceFailure() throws Exception {
  final String stateId = "foo";
  MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();

  DoFn<KV<String, Integer>, MyInteger> fn =
      new DoFn<KV<String, Integer>, MyInteger>() {

        @StateId(stateId)
        private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(
            @StateId(stateId) ValueState<MyInteger> state, OutputReceiver<MyInteger> r) {
          MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
          r.output(currentValue);
          state.write(new MyInteger(currentValue.getValue() + 1));
        }
      };

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Unable to infer a coder for ValueState and no Coder was specified.");

  pipeline
      .apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84)))
      .apply(ParDo.of(fn))
      .setCoder(myIntegerCoder);

  pipeline.run();
}
 
Example #19
Source File: CreateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testCreateWithNullsAndValues() throws Exception {
  PCollection<String> output =
      p.apply(
          Create.of(null, "test1", null, "test2", null)
              .withCoder(SerializableCoder.of(String.class)));
  PAssert.that(output).containsInAnyOrder(null, "test1", null, "test2", null);
  p.run();
}
 
Example #20
Source File: FlattenTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFlattenMultiplePCollectionsHavingMultipleConsumers() {
  PCollection<String> input = p.apply(Create.of("AA", "BBB", "CC"));
  final TupleTag<String> outputEvenLengthTag = new TupleTag<String>() {};
  final TupleTag<String> outputOddLengthTag = new TupleTag<String>() {};

  PCollectionTuple tuple =
      input.apply(
          ParDo.of(
                  new DoFn<String, String>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) {
                      if (c.element().length() % 2 == 0) {
                        c.output(c.element());
                      } else {
                        c.output(outputOddLengthTag, c.element());
                      }
                    }
                  })
              .withOutputTags(outputEvenLengthTag, TupleTagList.of(outputOddLengthTag)));

  PCollection<String> outputEvenLength = tuple.get(outputEvenLengthTag);
  PCollection<String> outputOddLength = tuple.get(outputOddLengthTag);

  PCollection<String> outputMerged =
      PCollectionList.of(outputEvenLength).and(outputOddLength).apply(Flatten.pCollections());

  PAssert.that(outputMerged).containsInAnyOrder("AA", "BBB", "CC");
  PAssert.that(outputEvenLength).containsInAnyOrder("AA", "CC");
  PAssert.that(outputOddLength).containsInAnyOrder("BBB");

  p.run();
}
 
Example #21
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesSideInputs.class})
public void testParDoWithSideInputs() {

  List<Integer> inputs = Arrays.asList(3, -42, 666);

  PCollectionView<Integer> sideInput1 =
      pipeline
          .apply("CreateSideInput1", Create.of(11))
          .apply("ViewSideInput1", View.asSingleton());
  PCollectionView<Integer> sideInputUnread =
      pipeline
          .apply("CreateSideInputUnread", Create.of(-3333))
          .apply("ViewSideInputUnread", View.asSingleton());
  PCollectionView<Integer> sideInput2 =
      pipeline
          .apply("CreateSideInput2", Create.of(222))
          .apply("ViewSideInput2", View.asSingleton());

  PCollection<String> output =
      pipeline
          .apply(Create.of(inputs))
          .apply(
              ParDo.of(new TestDoFn(Arrays.asList(sideInput1, sideInput2), Arrays.asList()))
                  .withSideInputs(sideInput1, sideInputUnread, sideInput2));

  PAssert.that(output)
      .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs).andSideInputs(11, 222));

  pipeline.run();
}
 
Example #22
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Verify that runners correctly hash/group on the encoded value and not the value itself. */
@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testGroupByKeyWithBadEqualsHashCode() throws Exception {
  final int numValues = 10;
  final int numKeys = 5;

  p.getCoderRegistry()
      .registerCoderProvider(
          CoderProviders.fromStaticMethods(BadEqualityKey.class, DeterministicKeyCoder.class));

  // construct input data
  List<KV<BadEqualityKey, Long>> input = new ArrayList<>();
  for (int i = 0; i < numValues; i++) {
    for (int key = 0; key < numKeys; key++) {
      input.add(KV.of(new BadEqualityKey(key), 1L));
    }
  }

  // We first ensure that the values are randomly partitioned in the beginning.
  // Some runners might otherwise keep all values on the machine where
  // they are initially created.
  PCollection<KV<BadEqualityKey, Long>> dataset1 =
      p.apply(Create.of(input))
          .apply(ParDo.of(new AssignRandomKey()))
          .apply(Reshuffle.of())
          .apply(Values.create());

  // Make the GroupByKey and Count implicit, in real-world code
  // this would be a Count.perKey()
  PCollection<KV<BadEqualityKey, Long>> result =
      dataset1.apply(GroupByKey.create()).apply(Combine.groupedValues(new CountFn()));

  PAssert.that(result).satisfies(new AssertThatCountPerKeyCorrect(numValues));

  PAssert.that(result.apply(Keys.create())).satisfies(new AssertThatAllKeysExist(numKeys));

  p.run();
}
 
Example #23
Source File: ViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testWindowedSideInputFixedToFixed() {

  final PCollectionView<Integer> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.timestamped(
                  TimestampedValue.of(1, new Instant(1)),
                  TimestampedValue.of(2, new Instant(11)),
                  TimestampedValue.of(3, new Instant(13))))
          .apply("WindowSideInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(Sum.integersGlobally().withoutDefaults())
          .apply(View.asSingleton());

  PCollection<String> output =
      pipeline
          .apply(
              "CreateMainInput",
              Create.timestamped(
                  TimestampedValue.of("A", new Instant(4)),
                  TimestampedValue.of("B", new Instant(15)),
                  TimestampedValue.of("C", new Instant(7))))
          .apply("WindowMainInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(
              "OutputMainAndSideInputs",
              ParDo.of(
                      new DoFn<String, String>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.element() + c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder("A1", "B5", "C1");

  pipeline.run();
}
 
Example #24
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({
  ValidatesRunner.class,
  UsesTimersInParDo.class,
  DataflowPortabilityApiUnsupported.class
})
public void testOutputTimestampDefaultBounded() throws Exception {
  runTestOutputTimestampDefault(false);
}
 
Example #25
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesTestStream.class})
public void duplicateTimerSetting() {
  TestStream<KV<String, String>> stream =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
          .addElements(KV.of("key1", "v1"))
          .advanceWatermarkToInfinity();

  PCollection<String> result = pipeline.apply(stream).apply(ParDo.of(new TwoTimerDoFn()));
  PAssert.that(result).containsInAnyOrder("It works");

  pipeline.run().waitUntilFinish();
}
 
Example #26
Source File: ViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testMultimapSideInputWithNonDeterministicKeyCoder() {

  final PCollectionView<Map<String, Iterable<Integer>>> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.of(KV.of("a", 1), KV.of("a", 1), KV.of("a", 2), KV.of("b", 3))
                  .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
          .apply(View.asMultimap());

  PCollection<KV<String, Integer>> output =
      pipeline
          .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<String, KV<String, Integer>>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
                            c.output(KV.of(c.element(), v));
                          }
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output)
      .containsInAnyOrder(
          KV.of("apple", 1),
          KV.of("apple", 1),
          KV.of("apple", 2),
          KV.of("banana", 3),
          KV.of("blackberry", 3));

  pipeline.run();
}
 
Example #27
Source File: WordCountTest.java    From deployment-examples with MIT License 5 votes vote down vote up
/** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
@Test
@Category(ValidatesRunner.class)
public void testCountWords() throws Exception {
  PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));

  PCollection<String> output =
      input.apply(new CountWords()).apply(MapElements.via(new FormatAsTextFn()));

  PAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
  p.run().waitUntilFinish();
}
 
Example #28
Source File: ParDoLifecycleTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesParDoLifecycle.class})
public void testFnCallSequence() {
  PCollectionList.of(p.apply("Impolite", Create.of(1, 2, 4)))
      .and(p.apply("Polite", Create.of(3, 5, 6, 7)))
      .apply(Flatten.pCollections())
      .apply(ParDo.of(new CallSequenceEnforcingFn<>()));

  p.run();
}
 
Example #29
Source File: ViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testMapAsEntrySetSideInput() {

  final PCollectionView<Map<String, Integer>> view =
      pipeline
          .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)))
          .apply(View.asMap());

  PCollection<KV<String, Integer>> output =
      pipeline
          .apply("CreateMainInput", Create.of(2 /* size */))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<Integer, KV<String, Integer>>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          assertEquals((int) c.element(), c.sideInput(view).size());
                          assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
                          for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) {
                            c.output(KV.of(entry.getKey(), entry.getValue()));
                          }
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("b", 3));

  pipeline.run();
}
 
Example #30
Source File: ParDoTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testParDo() {
  List<Integer> inputs = Arrays.asList(3, -42, 666);

  PCollection<String> output =
      pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestDoFn()));

  PAssert.that(output).satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
  pipeline.run();
}