org.apache.beam.sdk.values.PCollection Java Examples

The following examples show how to use org.apache.beam.sdk.values.PCollection. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlatMapElementsTest.java    From beam with Apache License 2.0 7 votes vote down vote up
/**
 * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a {@link
 * ProcessFunction}).
 */
@Test
@Category(NeedsRunner.class)
public void testFlatMapBasicWithLambda() throws Exception {
  PCollection<Integer> output =
      pipeline
          .apply(Create.of(1, 2, 3))
          .apply(
              FlatMapElements
                  // Note that the input type annotation is required.
                  .into(TypeDescriptors.integers())
                  .via((Integer i) -> ImmutableList.of(i, -i)));

  PAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
  pipeline.run();
}
 
Example #2
Source File: EvaluationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Add output of transform to context map and possibly cache if it conforms {@link
 * #shouldCache(PTransform, PValue)}.
 *
 * @param transform from which Dataset was created
 * @param pvalue output of transform
 * @param dataset created Dataset from transform
 */
private void putDataset(
    @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) {
  try {
    dataset.setName(pvalue.getName());
  } catch (IllegalStateException e) {
    // name not set, ignore
  }
  if (shouldCache(transform, pvalue)) {
    // we cache only PCollection
    Coder<?> coder = ((PCollection<?>) pvalue).getCoder();
    Coder<? extends BoundedWindow> wCoder =
        ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder();
    dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder));
  }
  datasets.put(pvalue, dataset);
  leaves.add(dataset);
}
 
Example #3
Source File: TestBoundedTable.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public POutput buildIOWriter(PCollection<Row> input) {
  input.apply(
      ParDo.of(
          new DoFn<Row, Void>() {
            @ProcessElement
            public void processElement(ProcessContext c) {
              CONTENT.add(c.element());
            }

            @Teardown
            public void close() {
              CONTENT.clear();
            }
          }));
  return PDone.in(input.getPipeline());
}
 
Example #4
Source File: NativeWrappedIOTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Test
public void source() {
    final String plugin = COMPONENTS.getTestPlugins().iterator().next();
    final PTransform<PBegin, PCollection<JsonObject>> jdbc = PTransform.class
            .cast(COMPONENTS
                    .asManager()
                    .createComponent("beamtest", "source", ComponentManager.ComponentType.MAPPER, 1, emptyMap())
                    .orElseThrow(() -> new IllegalArgumentException("no beamtest#source component")));
    PAssert
            .that(pipeline.apply(jdbc).setCoder(JsonpJsonObjectCoder.of(plugin)))
            .satisfies((SerializableFunction<Iterable<JsonObject>, Void>) input -> {
                assertEquals("test", input.iterator().next().getString("id"));
                return null;
            });
    pipeline.run().waitUntilFinish();
}
 
Example #5
Source File: SampleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testSampleAnyZero() {
  PCollection<Integer> input =
      pipeline.apply(
          Create.timestamped(ImmutableList.of(tv(0), tv(1), tv(2), tv(3), tv(4), tv(5)))
              .withCoder(BigEndianIntegerCoder.of()));
  PCollection<Integer> output =
      input
          .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3))))
          .apply(Sample.any(0));

  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(0), Duration.standardSeconds(3)))
      .satisfies(new VerifyCorrectSample<>(0, EMPTY));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(3000), Duration.standardSeconds(3)))
      .satisfies(new VerifyCorrectSample<>(0, EMPTY));
  pipeline.run();
}
 
Example #6
Source File: PTransformTranslationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void toAndFromProto() throws IOException {
  SdkComponents components = SdkComponents.create(spec.getTransform().getPipeline().getOptions());
  RunnerApi.PTransform converted = convert(spec, components);
  Components protoComponents = components.toComponents();

  // Sanity checks
  assertThat(converted.getInputsCount(), equalTo(spec.getTransform().getInputs().size()));
  assertThat(converted.getOutputsCount(), equalTo(spec.getTransform().getOutputs().size()));
  assertThat(converted.getSubtransformsCount(), equalTo(spec.getChildren().size()));

  assertThat(converted.getUniqueName(), equalTo(spec.getTransform().getFullName()));
  for (PValue inputValue : spec.getTransform().getInputs().values()) {
    PCollection<?> inputPc = (PCollection<?>) inputValue;
    protoComponents.getPcollectionsOrThrow(components.registerPCollection(inputPc));
  }
  for (PValue outputValue : spec.getTransform().getOutputs().values()) {
    PCollection<?> outputPc = (PCollection<?>) outputValue;
    protoComponents.getPcollectionsOrThrow(components.registerPCollection(outputPc));
  }
}
 
Example #7
Source File: ForwardingPTransformTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void getDefaultOutputCoderDelegates() throws Exception {
  @SuppressWarnings("unchecked")
  PCollection<Integer> input =
      PCollection.createPrimitiveOutputInternal(
          null /* pipeline */,
          WindowingStrategy.globalDefault(),
          PCollection.IsBounded.BOUNDED,
          null /* coder */);
  @SuppressWarnings("unchecked")
  PCollection<String> output =
      PCollection.createPrimitiveOutputInternal(
          null /* pipeline */,
          WindowingStrategy.globalDefault(),
          PCollection.IsBounded.BOUNDED,
          null /* coder */);
  @SuppressWarnings("unchecked")
  Coder<String> outputCoder = mock(Coder.class);

  when(delegate.expand(input)).thenReturn(output);
  when(delegate.getDefaultOutputCoder(input, output)).thenReturn(outputCoder);
  assertThat(forwarding.expand(input).getCoder(), equalTo(outputCoder));
}
 
Example #8
Source File: BeamSalUhfSpecialTypeAndValueTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testIsInf() throws Exception {
  Schema resultType =
      Schema.builder()
          .addBooleanField("field_1")
          .addBooleanField("field_2")
          .addBooleanField("field_3")
          .addBooleanField("field_4")
          .build();
  Row resultRow = Row.withSchema(resultType).addValues(true, true, true, true).build();

  String sql =
      "SELECT IS_INF(f_float_1), IS_INF(f_double_1), IS_INF(f_float_2), IS_INF(f_double_2) FROM PCOLLECTION";
  PCollection<Row> result = boundedInputFloatDouble.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow);
  pipeline.run().waitUntilFinish();
}
 
Example #9
Source File: OuterRightJoinTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testJoinNoneToOneMapping() {
  leftListOfKv.add(KV.of("Key2", 4L));
  PCollection<KV<String, Long>> leftCollection = p.apply("CreateLeft", Create.of(leftListOfKv));

  rightListOfKv.add(KV.of("Key3", "bar"));
  PCollection<KV<String, String>> rightCollection =
      p.apply("CreateRight", Create.of(rightListOfKv));

  PCollection<KV<String, KV<Long, String>>> output =
      Join.rightOuterJoin(leftCollection, rightCollection, -1L);

  expectedResult.add(KV.of("Key3", KV.of(-1L, "bar")));
  PAssert.that(output).containsInAnyOrder(expectedResult);
  p.run();
}
 
Example #10
Source File: JoinTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild_ImplicitName() {
  final Pipeline pipeline = TestUtils.createTestPipeline();
  final PCollection<String> left =
      TestUtils.createMockDataset(pipeline, TypeDescriptors.strings());
  final PCollection<String> right =
      TestUtils.createMockDataset(pipeline, TypeDescriptors.strings());
  final PCollection<KV<Integer, String>> joined =
      Join.of(left, right)
          .by(String::length, String::length)
          .using(
              (String l, String r, Collector<String> c) -> {
                // no-op
              })
          .output();
  final Join join = (Join) TestUtils.getProducer(joined);
  assertFalse(join.getName().isPresent());
}
 
Example #11
Source File: FlattenTranslatorBatch.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollectionList<T>, PCollection<T>> transform, TranslationContext context) {
  Collection<PValue> pcollectionList = context.getInputs().values();
  Dataset<WindowedValue<T>> result = null;
  if (pcollectionList.isEmpty()) {
    result = context.emptyDataset();
  } else {
    for (PValue pValue : pcollectionList) {
      checkArgument(
          pValue instanceof PCollection,
          "Got non-PCollection input to flatten: %s of type %s",
          pValue,
          pValue.getClass().getSimpleName());
      @SuppressWarnings("unchecked")
      PCollection<T> pCollection = (PCollection<T>) pValue;
      Dataset<WindowedValue<T>> current = context.getDataset(pCollection);
      if (result == null) {
        result = current;
      } else {
        result = result.union(current);
      }
    }
  }
  context.putDataset(context.getOutput(), result);
}
 
Example #12
Source File: ReadEvaluatorFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Collection<CommittedBundle<SourceShard<T>>> getInitialInputs(
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>
        appliedTransform,
    int targetParallelism)
    throws Exception {
  switch (ReadTranslation.sourceIsBounded(appliedTransform)) {
    case BOUNDED:
      // This cast could be made unnecessary, but too much bounded polymorphism
      return (Collection)
          boundedInputProvider.getInitialInputs(appliedTransform, targetParallelism);
    case UNBOUNDED:
      // This cast could be made unnecessary, but too much bounded polymorphism
      return (Collection)
          unboundedInputProvider.getInitialInputs(appliedTransform, targetParallelism);
    default:
      throw new IllegalArgumentException("PCollection is neither bounded nor unbounded?!?");
  }
}
 
Example #13
Source File: SimpleFileIOInputRuntimeTest.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
 */
@Test
public void testBasicAvro() throws IOException, URISyntaxException {
    RecordSet rs = getSimpleTestData(0);
    writeRandomAvroFile(mini.getFs(), "/user/test/input.avro", rs);
    String fileSpec = mini.getFs().getUri().resolve("/user/test/input.avro").toString();

    // Configure the component.
    SimpleFileIOInputProperties inputProps = createInputComponentProperties();
    inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
    inputProps.getDatasetProperties().path.setValue(fileSpec);

    // Create the runtime.
    SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
    runtime.initialize(null, inputProps);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = beam.createPipeline();
    PCollection<IndexedRecord> readLines = p.apply(runtime);

    // Check the expected values.
    PAssert.that(readLines).containsInAnyOrder(rs.getAllData());

    // And run the test.
    p.run().waitUntilFinish();
}
 
Example #14
Source File: ViewTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSingletonSideInput() {

  final PCollectionView<Integer> view =
      pipeline.apply("Create47", Create.of(47)).apply(View.asSingleton());

  PCollection<Integer> output =
      pipeline
          .apply("Create123", Create.of(1, 2, 3))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder(47, 47, 47);

  pipeline.run();
}
 
Example #15
Source File: PubsubIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadMessagesWithCoderAndParseFn() {
  Coder<PubsubMessage> coder = PubsubMessagePayloadOnlyCoder.of();
  List<PubsubMessage> inputs =
      ImmutableList.of(
          new PubsubMessage("foo".getBytes(StandardCharsets.UTF_8), new HashMap<>()),
          new PubsubMessage("bar".getBytes(StandardCharsets.UTF_8), new HashMap<>()));
  setupTestClient(inputs, coder);

  PCollection<String> read =
      readPipeline.apply(
          PubsubIO.readMessagesWithCoderAndParseFn(
                  StringUtf8Coder.of(), new StringPayloadParseFn())
              .fromSubscription(SUBSCRIPTION.getPath())
              .withClock(CLOCK)
              .withClientFactory(clientFactory));

  List<String> outputs = ImmutableList.of("foo", "bar");
  PAssert.that(read).containsInAnyOrder(outputs);
  readPipeline.run();
}
 
Example #16
Source File: JacksonTransformsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(expected = Pipeline.PipelineExecutionException.class)
public void failParsingWithoutCustomMapper() {
  PCollection<MyPojo> output =
      pipeline
          .apply(Create.of(EXTRA_PROPERTIES_JSONS))
          .apply(ParseJsons.of(MyPojo.class))
          .setCoder(SerializableCoder.of(MyPojo.class));

  PAssert.that(output).empty();

  pipeline.run();
}
 
Example #17
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testWindowedSideInput(IsBounded bounded) {
  PCollection<Integer> mainInput =
      p.apply(
              "main",
              Create.timestamped(
                  TimestampedValue.of(0, new Instant(0)),
                  TimestampedValue.of(1, new Instant(1)),
                  TimestampedValue.of(2, new Instant(2)),
                  TimestampedValue.of(3, new Instant(3)),
                  TimestampedValue.of(4, new Instant(4)),
                  TimestampedValue.of(5, new Instant(5)),
                  TimestampedValue.of(6, new Instant(6)),
                  TimestampedValue.of(7, new Instant(7))))
          .apply("window 2", Window.into(FixedWindows.of(Duration.millis(2))));

  PCollectionView<String> sideInput =
      p.apply(
              "side",
              Create.timestamped(
                  TimestampedValue.of("a", new Instant(0)),
                  TimestampedValue.of("b", new Instant(4))))
          .apply("window 4", Window.into(FixedWindows.of(Duration.millis(4))))
          .apply("singleton", View.asSingleton());

  PCollection<String> res =
      mainInput.apply(ParDo.of(sdfWithSideInput(bounded, sideInput)).withSideInputs(sideInput));

  PAssert.that(res).containsInAnyOrder("a:0", "a:1", "a:2", "a:3", "b:4", "b:5", "b:6", "b:7");

  p.run();
}
 
Example #18
Source File: TaskTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void combine_binaryCombineFn_lambda() {
  Create.Values<BigInteger> values = Create.of(
      BigInteger.valueOf(10), BigInteger.valueOf(20), BigInteger.valueOf(30),
      BigInteger.valueOf(40), BigInteger.valueOf(50)
  );
  PCollection<BigInteger> numbers = testPipeline.apply(values);

  PCollection<BigInteger> results = Task.applyTransform(numbers);

  PAssert.that(results)
      .containsInAnyOrder(BigInteger.valueOf(150));

  testPipeline.run().waitUntilFinish();
}
 
Example #19
Source File: ViewTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testWindowedSideInputFixedToGlobal() {

  final PCollectionView<Integer> view =
      pipeline
          .apply(
              "CreateSideInput",
              Create.timestamped(
                  TimestampedValue.of(1, new Instant(1)),
                  TimestampedValue.of(2, new Instant(11)),
                  TimestampedValue.of(3, new Instant(13))))
          .apply("WindowSideInput", Window.into(new GlobalWindows()))
          .apply(Sum.integersGlobally())
          .apply(View.asSingleton());

  PCollection<String> output =
      pipeline
          .apply(
              "CreateMainInput",
              Create.timestamped(
                  TimestampedValue.of("A", new Instant(4)),
                  TimestampedValue.of("B", new Instant(15)),
                  TimestampedValue.of("C", new Instant(7))))
          .apply("WindowMainInput", Window.into(FixedWindows.of(Duration.millis(10))))
          .apply(
              "OutputMainAndSideInputs",
              ParDo.of(
                      new DoFn<String, String>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.element() + c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder("A6", "B6", "C6");

  pipeline.run();
}
 
Example #20
Source File: HadoopFormatIOReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadingData() {
  HadoopFormatIO.Read<Text, Employee> read =
      HadoopFormatIO.<Text, Employee>read().withConfiguration(serConf.get());
  List<KV<Text, Employee>> expected = TestEmployeeDataSet.getEmployeeData();
  PCollection<KV<Text, Employee>> actual = p.apply("ReadTest", read);
  PAssert.that(actual).containsInAnyOrder(expected);
  p.run();
}
 
Example #21
Source File: HllCountTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testExtractGloballyForEmptySketch() {
  PCollection<Long> result = p.apply(Create.of(EMPTY_SKETCH)).apply(HllCount.Extract.globally());

  PAssert.thatSingleton(result).isEqualTo(0L);
  p.run();
}
 
Example #22
Source File: JacksonTransformsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testParsingInvalidJsonsWithFailuresDefaultHandler() {
  WithFailures.Result<PCollection<MyPojo>, KV<String, Map<String, String>>> result =
      pipeline
          .apply(Create.of(Iterables.concat(VALID_JSONS, INVALID_JSONS)))
          .apply(ParseJsons.of(MyPojo.class).exceptionsVia());

  result.output().setCoder(SerializableCoder.of(MyPojo.class));

  PAssert.that(result.output()).containsInAnyOrder(POJOS);
  assertParsingWithErrorMapHandler(result);

  pipeline.run();
}
 
Example #23
Source File: SolrIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRead() throws Exception {
  SolrIOTestUtils.insertTestDocuments(SOLR_COLLECTION, NUM_DOCS, solrClient);

  PCollection<SolrDocument> output =
      pipeline.apply(
          SolrIO.read()
              .withConnectionConfiguration(connectionConfiguration)
              .from(SOLR_COLLECTION)
              .withBatchSize(101));
  PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(NUM_DOCS);
  pipeline.run();
}
 
Example #24
Source File: ParDoSchemaTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSchemaFieldDescriptorSelectionUnboxing() {
  List<ForExtraction> pojoList =
      Lists.newArrayList(
          new AutoValue_ParDoSchemaTest_ForExtraction(1, "a", Lists.newArrayList(1, 2)),
          new AutoValue_ParDoSchemaTest_ForExtraction(2, "b", Lists.newArrayList(2, 3)),
          new AutoValue_ParDoSchemaTest_ForExtraction(3, "c", Lists.newArrayList(3, 4)));

  PCollection<String> output =
      pipeline
          .apply(Create.of(pojoList))
          .apply(
              ParDo.of(
                  new DoFn<ForExtraction, String>() {
                    @FieldAccess("stringSelector")
                    final FieldAccessDescriptor stringSelector =
                        FieldAccessDescriptor.withFieldNames("stringField");

                    @FieldAccess("intSelector")
                    final FieldAccessDescriptor intSelector =
                        FieldAccessDescriptor.withFieldNames("integerField");

                    @FieldAccess("intsSelector")
                    final FieldAccessDescriptor intsSelector =
                        FieldAccessDescriptor.withFieldNames("ints");

                    @ProcessElement
                    public void process(
                        @FieldAccess("stringSelector") String stringField,
                        @FieldAccess("intSelector") int integerField,
                        @FieldAccess("intsSelector") int[] intArray,
                        OutputReceiver<String> r) {
                      r.output(
                          stringField + ":" + integerField + ":" + Arrays.toString(intArray));
                    }
                  }));
  PAssert.that(output).containsInAnyOrder("a:1:[1, 2]", "b:2:[2, 3]", "c:3:[3, 4]");
  pipeline.run();
}
 
Example #25
Source File: VerifyBamIdTest.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testCombineReads() throws Exception {
  PCollection<KV<Position, AlleleFreq>> refCounts = p.apply("createInput", Create.of(refCountList));
  PAssert.that(refCounts).containsInAnyOrder(refCountList);

  Read read = Read.newBuilder()
      .setProperPlacement(true)
      .setAlignment(LinearAlignment.newBuilder()
          .setPosition(com.google.genomics.v1.Position.newBuilder()
              .setReferenceName("1")
              .setPosition(123))
          .addCigar(CigarUnit.newBuilder()
              .setOperation(Operation.ALIGNMENT_MATCH)
              .setOperationLength(3)))
      .setAlignedSequence("ATG")
      .addAllAlignedQuality(ImmutableList.of(3, 4, 5))
      .build();

  PCollection<Read> reads = p.apply(Create.of(read));
  PAssert.that(reads).containsInAnyOrder(read);

  PCollection<KV<Position, ReadCounts>> results =
      VerifyBamId.combineReads(reads, 1.0, "", refCounts);

  ReadCounts one = new ReadCounts();
  one.setRefFreq(0.8);
  one.addReadQualityCount(ReadQualityCount.Base.REF, 3, 1L);
  ReadCounts two = new ReadCounts();
  two.setRefFreq(0.5);
  two.addReadQualityCount(ReadQualityCount.Base.NONREF, 4, 1L);
  ReadCounts three = new ReadCounts();
  three.setRefFreq(0.6);
  three.addReadQualityCount(ReadQualityCount.Base.OTHER, 5, 1L);

  PAssert.that(results)
      .containsInAnyOrder(KV.of(position1, one), KV.of(position2, two), KV.of(position3, three));
  p.run();
}
 
Example #26
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(TWO_MINUTES)
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #27
Source File: FlattenTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFlattenPCollections() {
  List<List<String>> inputs = Arrays.asList(LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES);

  PCollection<String> output =
      makePCollectionListOfStrings(p, inputs).apply(Flatten.pCollections());

  PAssert.that(output).containsInAnyOrder(flattenLists(inputs));
  p.run();
}
 
Example #28
Source File: CreateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testCreateParameterizedType() throws Exception {
  PCollection<TimestampedValue<String>> output =
      p.apply(
          Create.of(
              TimestampedValue.of("a", new Instant(0)),
              TimestampedValue.of("b", new Instant(0))));

  PAssert.that(output)
      .containsInAnyOrder(
          TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0)));

  p.run();
}
 
Example #29
Source File: RegexTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testSplits() {

  PCollection<String> output =
      p.apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
          .apply(Regex.split("\\W+"));

  PAssert.that(output)
      .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
  p.run();
}
 
Example #30
Source File: SdkComponentsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void registerPCollectionExistingNameCollision() throws IOException {
  PCollection<Long> pCollection =
      pipeline.apply("FirstCount", GenerateSequence.from(0)).setName("foo");
  String firstId = components.registerPCollection(pCollection);
  PCollection<Long> duplicate =
      pipeline.apply("SecondCount", GenerateSequence.from(0)).setName("foo");
  String secondId = components.registerPCollection(duplicate);
  assertThat(firstId, equalTo("foo"));
  assertThat(secondId, containsString("foo"));
  assertThat(secondId, not(equalTo("foo")));
  components.toComponents().getPcollectionsOrThrow(firstId);
  components.toComponents().getPcollectionsOrThrow(secondId);
}