Java Code Examples for org.apache.beam.sdk.values.PCollection

The following examples show how to use org.apache.beam.sdk.values.PCollection. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: SampleTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testSampleAnyZero() {
  PCollection<Integer> input =
      pipeline.apply(
          Create.timestamped(ImmutableList.of(tv(0), tv(1), tv(2), tv(3), tv(4), tv(5)))
              .withCoder(BigEndianIntegerCoder.of()));
  PCollection<Integer> output =
      input
          .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3))))
          .apply(Sample.any(0));

  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(0), Duration.standardSeconds(3)))
      .satisfies(new VerifyCorrectSample<>(0, EMPTY));
  PAssert.that(output)
      .inWindow(new IntervalWindow(new Instant(3000), Duration.standardSeconds(3)))
      .satisfies(new VerifyCorrectSample<>(0, EMPTY));
  pipeline.run();
}
 
Example 2
Source Project: beam   Source File: TestBoundedTable.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public POutput buildIOWriter(PCollection<Row> input) {
  input.apply(
      ParDo.of(
          new DoFn<Row, Void>() {
            @ProcessElement
            public void processElement(ProcessContext c) {
              CONTENT.add(c.element());
            }

            @Teardown
            public void close() {
              CONTENT.clear();
            }
          }));
  return PDone.in(input.getPipeline());
}
 
Example 3
Source Project: component-runtime   Source File: NativeWrappedIOTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void source() {
    final String plugin = COMPONENTS.getTestPlugins().iterator().next();
    final PTransform<PBegin, PCollection<JsonObject>> jdbc = PTransform.class
            .cast(COMPONENTS
                    .asManager()
                    .createComponent("beamtest", "source", ComponentManager.ComponentType.MAPPER, 1, emptyMap())
                    .orElseThrow(() -> new IllegalArgumentException("no beamtest#source component")));
    PAssert
            .that(pipeline.apply(jdbc).setCoder(JsonpJsonObjectCoder.of(plugin)))
            .satisfies((SerializableFunction<Iterable<JsonObject>, Void>) input -> {
                assertEquals("test", input.iterator().next().getString("id"));
                return null;
            });
    pipeline.run().waitUntilFinish();
}
 
Example 4
Source Project: beam   Source File: PTransformTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void toAndFromProto() throws IOException {
  SdkComponents components = SdkComponents.create(spec.getTransform().getPipeline().getOptions());
  RunnerApi.PTransform converted = convert(spec, components);
  Components protoComponents = components.toComponents();

  // Sanity checks
  assertThat(converted.getInputsCount(), equalTo(spec.getTransform().getInputs().size()));
  assertThat(converted.getOutputsCount(), equalTo(spec.getTransform().getOutputs().size()));
  assertThat(converted.getSubtransformsCount(), equalTo(spec.getChildren().size()));

  assertThat(converted.getUniqueName(), equalTo(spec.getTransform().getFullName()));
  for (PValue inputValue : spec.getTransform().getInputs().values()) {
    PCollection<?> inputPc = (PCollection<?>) inputValue;
    protoComponents.getPcollectionsOrThrow(components.registerPCollection(inputPc));
  }
  for (PValue outputValue : spec.getTransform().getOutputs().values()) {
    PCollection<?> outputPc = (PCollection<?>) outputValue;
    protoComponents.getPcollectionsOrThrow(components.registerPCollection(outputPc));
  }
}
 
Example 5
Source Project: beam   Source File: FlatMapElementsTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a {@link
 * ProcessFunction}).
 */
@Test
@Category(NeedsRunner.class)
public void testFlatMapBasicWithLambda() throws Exception {
  PCollection<Integer> output =
      pipeline
          .apply(Create.of(1, 2, 3))
          .apply(
              FlatMapElements
                  // Note that the input type annotation is required.
                  .into(TypeDescriptors.integers())
                  .via((Integer i) -> ImmutableList.of(i, -i)));

  PAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
  pipeline.run();
}
 
Example 6
Source Project: beam   Source File: ForwardingPTransformTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void getDefaultOutputCoderDelegates() throws Exception {
  @SuppressWarnings("unchecked")
  PCollection<Integer> input =
      PCollection.createPrimitiveOutputInternal(
          null /* pipeline */,
          WindowingStrategy.globalDefault(),
          PCollection.IsBounded.BOUNDED,
          null /* coder */);
  @SuppressWarnings("unchecked")
  PCollection<String> output =
      PCollection.createPrimitiveOutputInternal(
          null /* pipeline */,
          WindowingStrategy.globalDefault(),
          PCollection.IsBounded.BOUNDED,
          null /* coder */);
  @SuppressWarnings("unchecked")
  Coder<String> outputCoder = mock(Coder.class);

  when(delegate.expand(input)).thenReturn(output);
  when(delegate.getDefaultOutputCoder(input, output)).thenReturn(outputCoder);
  assertThat(forwarding.expand(input).getCoder(), equalTo(outputCoder));
}
 
Example 7
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIsInf() throws Exception {
  Schema resultType =
      Schema.builder()
          .addBooleanField("field_1")
          .addBooleanField("field_2")
          .addBooleanField("field_3")
          .addBooleanField("field_4")
          .build();
  Row resultRow = Row.withSchema(resultType).addValues(true, true, true, true).build();

  String sql =
      "SELECT IS_INF(f_float_1), IS_INF(f_double_1), IS_INF(f_float_2), IS_INF(f_double_2) FROM PCOLLECTION";
  PCollection<Row> result = boundedInputFloatDouble.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow);
  pipeline.run().waitUntilFinish();
}
 
Example 8
Source Project: beam   Source File: OuterRightJoinTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testJoinNoneToOneMapping() {
  leftListOfKv.add(KV.of("Key2", 4L));
  PCollection<KV<String, Long>> leftCollection = p.apply("CreateLeft", Create.of(leftListOfKv));

  rightListOfKv.add(KV.of("Key3", "bar"));
  PCollection<KV<String, String>> rightCollection =
      p.apply("CreateRight", Create.of(rightListOfKv));

  PCollection<KV<String, KV<Long, String>>> output =
      Join.rightOuterJoin(leftCollection, rightCollection, -1L);

  expectedResult.add(KV.of("Key3", KV.of(-1L, "bar")));
  PAssert.that(output).containsInAnyOrder(expectedResult);
  p.run();
}
 
Example 9
Source Project: beam   Source File: JoinTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuild_ImplicitName() {
  final Pipeline pipeline = TestUtils.createTestPipeline();
  final PCollection<String> left =
      TestUtils.createMockDataset(pipeline, TypeDescriptors.strings());
  final PCollection<String> right =
      TestUtils.createMockDataset(pipeline, TypeDescriptors.strings());
  final PCollection<KV<Integer, String>> joined =
      Join.of(left, right)
          .by(String::length, String::length)
          .using(
              (String l, String r, Collector<String> c) -> {
                // no-op
              })
          .output();
  final Join join = (Join) TestUtils.getProducer(joined);
  assertFalse(join.getName().isPresent());
}
 
Example 10
Source Project: beam   Source File: FlattenTranslatorBatch.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollectionList<T>, PCollection<T>> transform, TranslationContext context) {
  Collection<PValue> pcollectionList = context.getInputs().values();
  Dataset<WindowedValue<T>> result = null;
  if (pcollectionList.isEmpty()) {
    result = context.emptyDataset();
  } else {
    for (PValue pValue : pcollectionList) {
      checkArgument(
          pValue instanceof PCollection,
          "Got non-PCollection input to flatten: %s of type %s",
          pValue,
          pValue.getClass().getSimpleName());
      @SuppressWarnings("unchecked")
      PCollection<T> pCollection = (PCollection<T>) pValue;
      Dataset<WindowedValue<T>> current = context.getDataset(pCollection);
      if (result == null) {
        result = current;
      } else {
        result = result.union(current);
      }
    }
  }
  context.putDataset(context.getOutput(), result);
}
 
Example 11
Source Project: beam   Source File: ReadEvaluatorFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Collection<CommittedBundle<SourceShard<T>>> getInitialInputs(
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>
        appliedTransform,
    int targetParallelism)
    throws Exception {
  switch (ReadTranslation.sourceIsBounded(appliedTransform)) {
    case BOUNDED:
      // This cast could be made unnecessary, but too much bounded polymorphism
      return (Collection)
          boundedInputProvider.getInitialInputs(appliedTransform, targetParallelism);
    case UNBOUNDED:
      // This cast could be made unnecessary, but too much bounded polymorphism
      return (Collection)
          unboundedInputProvider.getInitialInputs(appliedTransform, targetParallelism);
    default:
      throw new IllegalArgumentException("PCollection is neither bounded nor unbounded?!?");
  }
}
 
Example 12
Source Project: components   Source File: SimpleFileIOInputRuntimeTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
 */
@Test
public void testBasicAvro() throws IOException, URISyntaxException {
    RecordSet rs = getSimpleTestData(0);
    writeRandomAvroFile(mini.getFs(), "/user/test/input.avro", rs);
    String fileSpec = mini.getFs().getUri().resolve("/user/test/input.avro").toString();

    // Configure the component.
    SimpleFileIOInputProperties inputProps = createInputComponentProperties();
    inputProps.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
    inputProps.getDatasetProperties().path.setValue(fileSpec);

    // Create the runtime.
    SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
    runtime.initialize(null, inputProps);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = beam.createPipeline();
    PCollection<IndexedRecord> readLines = p.apply(runtime);

    // Check the expected values.
    PAssert.that(readLines).containsInAnyOrder(rs.getAllData());

    // And run the test.
    p.run().waitUntilFinish();
}
 
Example 13
Source Project: beam   Source File: ViewTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testSingletonSideInput() {

  final PCollectionView<Integer> view =
      pipeline.apply("Create47", Create.of(47)).apply(View.asSingleton());

  PCollection<Integer> output =
      pipeline
          .apply("Create123", Create.of(1, 2, 3))
          .apply(
              "OutputSideInputs",
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          c.output(c.sideInput(view));
                        }
                      })
                  .withSideInputs(view));

  PAssert.that(output).containsInAnyOrder(47, 47, 47);

  pipeline.run();
}
 
Example 14
Source Project: beam   Source File: PubsubIOTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadMessagesWithCoderAndParseFn() {
  Coder<PubsubMessage> coder = PubsubMessagePayloadOnlyCoder.of();
  List<PubsubMessage> inputs =
      ImmutableList.of(
          new PubsubMessage("foo".getBytes(StandardCharsets.UTF_8), new HashMap<>()),
          new PubsubMessage("bar".getBytes(StandardCharsets.UTF_8), new HashMap<>()));
  setupTestClient(inputs, coder);

  PCollection<String> read =
      readPipeline.apply(
          PubsubIO.readMessagesWithCoderAndParseFn(
                  StringUtf8Coder.of(), new StringPayloadParseFn())
              .fromSubscription(SUBSCRIPTION.getPath())
              .withClock(CLOCK)
              .withClientFactory(clientFactory));

  List<String> outputs = ImmutableList.of("foo", "bar");
  PAssert.that(read).containsInAnyOrder(outputs);
  readPipeline.run();
}
 
Example 15
Source Project: beam   Source File: EvaluationContext.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Add output of transform to context map and possibly cache if it conforms {@link
 * #shouldCache(PTransform, PValue)}.
 *
 * @param transform from which Dataset was created
 * @param pvalue output of transform
 * @param dataset created Dataset from transform
 */
private void putDataset(
    @Nullable PTransform<?, ? extends PValue> transform, PValue pvalue, Dataset dataset) {
  try {
    dataset.setName(pvalue.getName());
  } catch (IllegalStateException e) {
    // name not set, ignore
  }
  if (shouldCache(transform, pvalue)) {
    // we cache only PCollection
    Coder<?> coder = ((PCollection<?>) pvalue).getCoder();
    Coder<? extends BoundedWindow> wCoder =
        ((PCollection<?>) pvalue).getWindowingStrategy().getWindowFn().windowCoder();
    dataset.cache(storageLevel(), WindowedValue.getFullCoder(coder, wCoder));
  }
  datasets.put(pvalue, dataset);
  leaves.add(dataset);
}
 
Example 16
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesTestStream.class})
public void duplicateTimerSetting() {
  TestStream<KV<String, String>> stream =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))
          .addElements(KV.of("key1", "v1"))
          .advanceWatermarkToInfinity();

  PCollection<String> result = pipeline.apply(stream).apply(ParDo.of(new TwoTimerDoFn()));
  PAssert.that(result).containsInAnyOrder("It works");

  pipeline.run().waitUntilFinish();
}
 
Example 17
Source Project: beam   Source File: FlattenTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFlattenPCollections() {
  List<List<String>> inputs = Arrays.asList(LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES);

  PCollection<String> output =
      makePCollectionListOfStrings(p, inputs).apply(Flatten.pCollections());

  PAssert.that(output).containsInAnyOrder(flattenLists(inputs));
  p.run();
}
 
Example 18
Source Project: beam   Source File: TaskTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void combine_binaryCombineFn_lambda() {
  Create.Values<BigInteger> values = Create.of(
      BigInteger.valueOf(10), BigInteger.valueOf(20), BigInteger.valueOf(30),
      BigInteger.valueOf(40), BigInteger.valueOf(50)
  );
  PCollection<BigInteger> numbers = testPipeline.apply(values);

  PCollection<BigInteger> results = Task.applyTransform(numbers);

  PAssert.that(results)
      .containsInAnyOrder(BigInteger.valueOf(150));

  testPipeline.run().waitUntilFinish();
}
 
Example 19
Source Project: beam   Source File: RegexTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testSplits() {

  PCollection<String> output =
      p.apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
          .apply(Regex.split("\\W+"));

  PAssert.that(output)
      .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
  p.run();
}
 
Example 20
Source Project: beam   Source File: CreateTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testCreateParameterizedType() throws Exception {
  PCollection<TimestampedValue<String>> output =
      p.apply(
          Create.of(
              TimestampedValue.of("a", new Instant(0)),
              TimestampedValue.of("b", new Instant(0))));

  PAssert.that(output)
      .containsInAnyOrder(
          TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(0)));

  p.run();
}
 
Example 21
Source Project: beam   Source File: BigQueryIOPushDownIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void readUsingDirectReadMethod() {
  List<RelOptRule> ruleList = new ArrayList<>();
  for (RuleSet x : getRuleSets()) {
    x.iterator().forEachRemaining(ruleList::add);
  }
  // Remove push-down rule
  ruleList.remove(BeamIOPushDownRule.INSTANCE);

  InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
  inMemoryMetaStore.registerProvider(
      new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
  sqlEnv =
      BeamSqlEnv.builder(inMemoryMetaStore)
          .setPipelineOptions(PipelineOptionsFactory.create())
          .setRuleSets(new RuleSet[] {RuleSets.ofList(ruleList)})
          .build();
  sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));

  BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
          .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));

  PipelineResult result = pipeline.run();
  result.waitUntilFinish();
  collectAndPublishMetrics(result, "_directread");
}
 
Example 22
Source Project: beam   Source File: DocumentationExamplesTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Note that this one is not mentioned in documentation due to high number of RBK examples and
 * rather lower explanation value. Please consider to include it in future
 */
@Test
public void reduceByKeyTestOperatorContextManyOutputs() {

  PCollection<String> animals =
      pipeline.apply(Create.of("mouse", "rat", "elephant", "cat", "X", "duck"));

  PCollection<KV<Integer, Long>> countOfAnimalNamesByLength =
      ReduceByKey.named("to-letters-couts")
          .of(animals)
          .keyBy(String::length) // length of animal name will e used as grouping key
          // we need to count each animal name once, so why not to optimize each string to 1
          .valueBy(e -> 1)
          .reduceBy(
              (Stream<Integer> s, Collector<Long> collector) -> {
                long count = s.count();
                collector.collect(count);
                collector.collect(2L * count);
              })
          .output();

  PAssert.that(countOfAnimalNamesByLength)
      .containsInAnyOrder(
          asList(
              KV.of(1, 1L),
              KV.of(3, 2L),
              KV.of(4, 1L),
              KV.of(5, 1L),
              KV.of(8, 1L),
              KV.of(1, 2L),
              KV.of(3, 4L),
              KV.of(4, 2L),
              KV.of(5, 2L),
              KV.of(8, 2L)));

  pipeline.run();
}
 
Example 23
Source Project: beam   Source File: BeamSqlDslAggregationTest.java    License: Apache License 2.0 5 votes vote down vote up
private void runTumbleWindowFor31Days(PCollection<Row> input) throws Exception {
  String sql =
      "SELECT f_int2, COUNT(*) AS `getFieldCount`,"
          + " TUMBLE_START(f_timestamp, INTERVAL '31' DAY) AS `window_start`, "
          + " TUMBLE_END(f_timestamp, INTERVAL '31' DAY) AS `window_end` "
          + " FROM TABLE_A"
          + " GROUP BY f_int2, TUMBLE(f_timestamp, INTERVAL '31' DAY)";
  PCollection<Row> result =
      PCollectionTuple.of(new TupleTag<>("TABLE_A"), input)
          .apply("testTumbleWindow", SqlTransform.query(sql));

  Schema resultType =
      Schema.builder()
          .addInt32Field("f_int2")
          .addInt64Field("size")
          .addDateTimeField("window_start")
          .addDateTimeField("window_end")
          .build();

  List<Row> expectedRows =
      TestUtils.RowsBuilder.of(resultType)
          .addRows(
              0,
              1L,
              parseTimestampWithUTCTimeZone("2016-12-08 00:00:00"),
              parseTimestampWithUTCTimeZone("2017-01-08 00:00:00"),
              0,
              1L,
              parseTimestampWithUTCTimeZone("2017-01-08 00:00:00"),
              parseTimestampWithUTCTimeZone("2017-02-08 00:00:00"),
              0,
              1L,
              parseTimestampWithUTCTimeZone("2017-02-08 00:00:00"),
              parseTimestampWithUTCTimeZone("2017-03-11 00:00:00"))
          .getRows();

  PAssert.that(result).containsInAnyOrder(expectedRows);

  pipeline.run().waitUntilFinish();
}
 
Example 24
Source Project: beam   Source File: SplittableParDoOverrides.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PTransformReplacement<PCollection<InputT>, PCollectionTuple> getReplacementTransform(
    AppliedPTransform<PCollection<InputT>, PCollectionTuple, ParDo.MultiOutput<InputT, OutputT>>
        appliedTransform) {
  return PTransformReplacement.of(
      PTransformReplacements.getSingletonMainInput(appliedTransform),
      SplittableParDo.forAppliedParDo(appliedTransform));
}
 
Example 25
Source Project: beam   Source File: KafkaIO.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<V> input) {
  return input
      .apply(
          "Kafka values with default key",
          MapElements.via(
              new SimpleFunction<V, KV<K, V>>() {
                @Override
                public KV<K, V> apply(V element) {
                  return KV.of(null, element);
                }
              }))
      .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder()))
      .apply(kvWriteTransform);
}
 
Example 26
Source Project: beam   Source File: BeamSqlDslAggregationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSupportsAggregationWithFilterWithoutProjection() throws Exception {
  pipeline.enableAbandonedNodeEnforcement(false);

  Schema schema =
      Schema.builder().addInt32Field("f_intGroupingKey").addInt32Field("f_intValue").build();

  PCollection<Row> inputRows =
      pipeline
          .apply(
              Create.of(
                  TestUtils.rowsBuilderOf(schema)
                      .addRows(
                          0, 1,
                          0, 2,
                          1, 3,
                          2, 4,
                          2, 5)
                      .getRows()))
          .setRowSchema(schema);

  String sql =
      "SELECT SUM(f_intValue) FROM PCOLLECTION WHERE f_intValue < 5 GROUP BY f_intGroupingKey";

  PCollection<Row> result = inputRows.apply("sql", SqlTransform.query(sql));

  PAssert.that(result).containsInAnyOrder(rowsWithSingleIntField("sum", Arrays.asList(3, 3, 4)));

  pipeline.run();
}
 
Example 27
Source Project: beam   Source File: CombineTranslation.java    License: Apache License 2.0 5 votes vote down vote up
private static <InputT, AccumT> Coder<AccumT> extractAccumulatorCoder(
    GlobalCombineFn<InputT, AccumT, ?> combineFn,
    AppliedPTransform<PCollection<InputT>, ?, Combine.Globally<InputT, ?>> transform)
    throws IOException {
  try {
    @SuppressWarnings("unchecked")
    PCollection<InputT> mainInput =
        (PCollection<InputT>)
            Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform));
    return combineFn.getAccumulatorCoder(
        transform.getPipeline().getCoderRegistry(), mainInput.getCoder());
  } catch (CannotProvideCoderException e) {
    throw new IOException("Could not obtain a Coder for the accumulator", e);
  }
}
 
Example 28
Source Project: beam   Source File: PartitionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testZeroNumPartitions() {

  PCollection<Integer> input = pipeline.apply(Create.of(591));

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("numPartitions must be > 0");
  input.apply(Partition.of(0, new IdentityFn()));
}
 
Example 29
Source Project: beam   Source File: FilterExamplesTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testProjectionFn() {
  PCollection<TableRow> input = p.apply(Create.of(row1, row2, row3));

  PCollection<TableRow> results = input.apply(ParDo.of(new ProjectionFn()));

  PAssert.that(results).containsInAnyOrder(outRow1, outRow2, outRow3);
  p.run().waitUntilFinish();
}
 
Example 30
Source Project: component-runtime   Source File: BeamSampleSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<JsonObject> expand(final PBegin input) {
    return input.apply(Create.of((Void) null)).apply(ParDo.of(new DoFn<Void, JsonObject>() {

        @ProcessElement
        public void processElement(final ProcessContext context) throws Exception {
            context
                    .output(jsonBuilderFactory
                            .createObjectBuilder()
                            .add(configuration.getColumnName(), configuration.getValue())
                            .build());
        }
    }));
}