org.apache.beam.sdk.transforms.ParDo Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.ParDo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryMerger.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
  return input
      .apply(
          Window.<KV<K, V>>into(new GlobalWindows())
              .discardingFiredPanes()
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane()
                          .plusDelayOf(Duration.ZERO)
                          .alignedTo(intervalDuration, org.joda.time.Instant.now()))))
      .apply(GroupByKey.create())
      .apply(
          ParDo.of(
              new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  LOG.debug(
                      "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane());
                  Iterator<V> it = c.element().getValue().iterator();
                  if (it.hasNext()) {
                    c.output(KV.of(c.element().getKey(), it.next()));
                  }
                }
              }));
}
 
Example #2
Source File: StarterPipeline.java    From beam with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  Pipeline p = Pipeline.create(PipelineOptionsFactory.fromArgs(args).withValidation().create());

  p.apply(Create.of("Hello", "World"))
      .apply(
          MapElements.via(
              new SimpleFunction<String, String>() {
                @Override
                public String apply(String input) {
                  return input.toUpperCase();
                }
              }))
      .apply(
          ParDo.of(
              new DoFn<String, Void>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  LOG.info(c.element());
                }
              }));

  p.run();
}
 
Example #3
Source File: SpannerWriteIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWrite() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}
 
Example #4
Source File: UserScore.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Run a batch pipeline. */
// [START DocInclude_USMain]
public static void main(String[] args) throws Exception {
  // Begin constructing a pipeline configured by commandline flags.
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  // Read events from a text file and parse them.
  pipeline
      .apply(TextIO.read().from(options.getInput()))
      .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
      .apply(
          "WriteUserScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), false));

  // Run the batch pipeline.
  pipeline.run().waitUntilFinish();
}
 
Example #5
Source File: BatchStatefulParDoOverrides.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<OutputT> expand(PCollection<KV<K, InputT>> input) {
  DoFn<KV<K, InputT>, OutputT> fn = originalParDo.getFn();
  verifyFnIsStateful(fn);
  DataflowRunner.verifyDoFnSupportedBatch(fn);
  DataflowRunner.verifyStateSupportForWindowingStrategy(input.getWindowingStrategy());

  if (isFnApi) {
    return input.apply(Reshuffle.of()).apply(originalParDo);
  }

  PTransform<
          PCollection<? extends KV<K, Iterable<KV<Instant, WindowedValue<KV<K, InputT>>>>>>,
          PCollection<OutputT>>
      statefulParDo =
          ParDo.of(new BatchStatefulDoFn<>(fn)).withSideInputs(originalParDo.getSideInputs());

  return input.apply(new GbkBeforeStatefulParDo<>()).apply(statefulParDo);
}
 
Example #6
Source File: TFRecordIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void runTestWrite(String[] elems, String... base64) throws IOException {
  File tmpFile =
      Files.createTempFile(tempFolder.getRoot().toPath(), "file", ".tfrecords").toFile();
  String filename = tmpFile.getPath();

  PCollection<byte[]> input =
      writePipeline
          .apply(Create.of(Arrays.asList(elems)))
          .apply(ParDo.of(new StringToByteArray()));

  TFRecordIO.Write write = TFRecordIO.write().to(filename).withoutSharding();
  input.apply(write);

  writePipeline.run();

  FileInputStream fis = new FileInputStream(tmpFile);
  String written = BaseEncoding.base64().encode(ByteStreams.toByteArray(fis));
  // bytes written may vary depending the order of elems
  assertThat(written, is(in(base64)));
}
 
Example #7
Source File: NexmarkLauncher.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Return source of events from Kafka. */
private PCollection<Event> sourceEventsFromKafka(Pipeline p, final Instant now) {
  checkArgument((options.getBootstrapServers() != null), "Missing --bootstrapServers");
  NexmarkUtils.console("Reading events from Kafka Topic %s", options.getKafkaTopic());

  KafkaIO.Read<Long, byte[]> read =
      KafkaIO.<Long, byte[]>read()
          .withBootstrapServers(options.getBootstrapServers())
          .withTopic(options.getKafkaTopic())
          .withKeyDeserializer(LongDeserializer.class)
          .withValueDeserializer(ByteArrayDeserializer.class)
          .withStartReadTime(now)
          .withMaxNumRecords(
              options.getNumEvents() != null ? options.getNumEvents() : Long.MAX_VALUE);

  return p.apply(queryName + ".ReadKafkaEvents", read.withoutMetadata())
      .apply(queryName + ".KafkaToEvents", ParDo.of(BYTEARRAY_TO_EVENT));
}
 
Example #8
Source File: InsertFetchAndFilterStreamingSideInputNodesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSdkParDoWithoutSideInput() throws Exception {
  Pipeline p = Pipeline.create();
  PCollection<String> pc = p.apply(Create.of("a", "b", "c"));
  pc.apply(ParDo.of(new TestDoFn(null)));
  RunnerApi.Pipeline pipeline = PipelineTranslation.toProto(p);

  Node predecessor = createParDoNode("predecessor");
  Node mainInput = InstructionOutputNode.create(new InstructionOutput(), "fakeId");
  Node sideInputParDo = createParDoNode("noSideInput");

  MutableNetwork<Node, Edge> network = createEmptyNetwork();
  network.addNode(predecessor);
  network.addNode(mainInput);
  network.addNode(sideInputParDo);
  network.addEdge(predecessor, mainInput, DefaultEdge.create());
  network.addEdge(mainInput, sideInputParDo, DefaultEdge.create());

  Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
  network = InsertFetchAndFilterStreamingSideInputNodes.with(pipeline).forNetwork(network);

  assertThatNetworksAreIdentical(inputNetwork, network);
}
 
Example #9
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSideInputAsMap() {
  PCollectionView<Map<String, Integer>> sideInputView =
      pipeline
          .apply("Create sideInput", Create.of(KV.of("key1", 1), KV.of("key2", 2)))
          .apply(View.asMap());
  PCollection<Integer> input =
      pipeline
          .apply("Create input", Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          Map<String, Integer> sideInputValue = c.sideInput(sideInputView);
                          if (!sideInputValue.containsKey("key" + c.element())) {
                            c.output(c.element());
                          }
                        }
                      })
                  .withSideInputs(sideInputView));
  PAssert.that(input).containsInAnyOrder(3, 4, 5, 6, 7, 8, 9, 10);
  pipeline.run();
}
 
Example #10
Source File: ReadSourceStreamingTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static void runProgram(String resultPath) {

    Pipeline p = FlinkTestPipeline.createForStreaming();

    p.apply(GenerateSequence.from(0).to(10))
        .apply(
            ParDo.of(
                new DoFn<Long, String>() {
                  @ProcessElement
                  public void processElement(ProcessContext c) throws Exception {
                    c.output(c.element().toString());
                  }
                }))
        .apply(TextIO.write().to(resultPath));

    p.run();
  }
 
Example #11
Source File: BeamSideInputJoinRelTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testInnerJoin_boundedTableOnTheLeftSide() throws Exception {
  String sql =
      "SELECT o1.order_id, o1.sum_site_id, o2.buyer FROM "
          + " ORDER_DETAILS1 o2 "
          + " JOIN "
          + "(select order_id, sum(site_id) as sum_site_id FROM ORDER_DETAILS "
          + "          GROUP BY order_id, TUMBLE(order_time, INTERVAL '1' HOUR)) o1 "
          + " on "
          + " o1.order_id=o2.order_id";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows.apply(ParDo.of(new TestUtils.BeamSqlRow2StringDoFn())))
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT32, "order_id",
                  Schema.FieldType.INT32, "sum_site_id",
                  Schema.FieldType.STRING, "buyer")
              .addRows(1, 3, "james", 2, 5, "bond")
              .getStringRows());
  pipeline.run();
}
 
Example #12
Source File: GenerateSequenceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testUnboundedInputTimestamps() {
  long numElements = 1000;

  PCollection<Long> input =
      p.apply(GenerateSequence.from(0).to(numElements).withTimestampFn(new ValueAsTimestampFn()));
  addCountingAsserts(input, 0, numElements);

  PCollection<Long> diffs =
      input
          .apply("TimestampDiff", ParDo.of(new ElementValueDiff()))
          .apply("DistinctTimestamps", Distinct.create());
  // This assert also confirms that diffs only has one unique value.
  PAssert.thatSingleton(diffs).isEqualTo(0L);

  p.run();
}
 
Example #13
Source File: MyBeamJob.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static Pipeline build(PipelineOptions pipelineOptions) {
	
    Pipeline pipeline = Pipeline.create(pipelineOptions);

	pipeline
	.apply("unbounded-source", 
			Read.from(new MyUnboundedSource("beam-input")))
    .apply("reformat-and-timestamp", 
    		ParDo.of(new MyEnrichAndReformatFn()))
	.apply("window",
			 Window.<String>into(FixedWindows.of(ONE_SECOND))
			 .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
			 .discardingFiredPanes()
			 .withAllowedLateness(ONE_SECOND)
			)
    .apply("sink",
    		FileIO.<String>write()
    		.via(TextIO.sink())
            .to(".")
            .withPrefix("beam-output")
            .withNumShards(1)
    		)
	;

    return pipeline;
}
 
Example #14
Source File: BigQueryDeadletterSink.java    From feast with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<FailedElement> input) {
  TimePartitioning partition = new TimePartitioning().setType("DAY");
  partition.setField(TIMESTAMP_COLUMN);
  input
      .apply("FailedElementToTableRow", ParDo.of(new FailedElementToTableRowFn()))
      .apply(
          "WriteFailedElementsToBigQuery",
          BigQueryIO.writeTableRows()
              .to(getTableSpec())
              .withJsonSchema(getJsonSchema())
              .withTimePartitioning(partition)
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  return PDone.in(input.getPipeline());
}
 
Example #15
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the {@link
 * DirectRunner}.
 */
@Test
public void testMutatingOutputThenTerminateDoFnError() throws Exception {
  Pipeline pipeline = getPipeline();

  pipeline
      .apply(Create.of(42))
      .apply(
          ParDo.of(
              new DoFn<Integer, List<Integer>>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
                  c.output(outputList);
                  outputList.set(0, 37);
                }
              }));

  thrown.expect(IllegalMutationException.class);
  thrown.expectMessage("output");
  thrown.expectMessage("must not be mutated");
  pipeline.run();
}
 
Example #16
Source File: KinesisIOIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Write test dataset into Kinesis stream. */
private void runWrite() {
  pipelineWrite
      .apply("Generate Sequence", GenerateSequence.from(0).to((long) numberOfRows))
      .apply("Prepare TestRows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn()))
      .apply("Prepare Kinesis input records", ParDo.of(new ConvertToBytes()))
      .apply(
          "Write to Kinesis",
          KinesisIO.write()
              .withStreamName(options.getAwsKinesisStream())
              .withPartitioner(new RandomPartitioner())
              .withAWSClientsProvider(
                  options.getAwsAccessKey(),
                  options.getAwsSecretKey(),
                  Regions.fromName(options.getAwsKinesisRegion())));

  pipelineWrite.run().waitUntilFinish();
}
 
Example #17
Source File: BulkCompressorTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Tests the {@link BulkCompressor.Compressor} performs compression properly. */
@Test
public void testCompressFile() throws Exception {
  // Setup test
  final Compression compression = Compression.GZIP;

  final ValueProvider<String> outputDirectoryProvider =
      pipeline.newProvider(tempFolderCompressedPath.toString());

  final ValueProvider<Compression> compressionProvider = StaticValueProvider.of(compression);

  final Metadata metadata = FileSystems.matchSingleFileSpec(textFile.toString());

  // Execute the compressor
  PCollection<String> lines = pipeline
      .apply("Create File Input", Create.of(metadata))
      .apply("Compress", ParDo.of(new Compressor(outputDirectoryProvider, compressionProvider)))
      .apply("Read the Files", TextIO.readAll().withCompression(Compression.AUTO));

  // Test the result
  PAssert.that(lines).containsInAnyOrder(FILE_CONTENT);
  pipeline.run();
}
 
Example #18
Source File: UserScoreTest.java    From deployment-examples with MIT License 6 votes vote down vote up
/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {

  PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Integer>> extract =
      input
          .apply(ParDo.of(new ParseEventFn()))
          .apply(
              MapElements.into(
                      TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers()))
                  .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));

  PAssert.that(extract).empty();

  p.run().waitUntilFinish();
}
 
Example #19
Source File: RabbitMqIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<?> expand(PCollection<RabbitMqMessage> input) {
  checkArgument(
      exchange() != null || queue() != null, "Either exchange or queue has to be specified");
  if (exchange() != null) {
    checkArgument(queue() == null, "Queue can't be set in the same time as exchange");
  }
  if (queue() != null) {
    checkArgument(exchange() == null, "Exchange can't be set in the same time as queue");
  }
  if (queueDeclare()) {
    checkArgument(queue() != null, "Queue is required for the queue declare");
  }
  if (exchangeDeclare()) {
    checkArgument(exchange() != null, "Exchange is required for the exchange declare");
  }
  return input.apply(ParDo.of(new WriteFn(this)));
}
 
Example #20
Source File: Log.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<T> expand(PCollection<T> input) {
  return input.apply(
      ParDo.of(
          new DoFn<T, T>() {

            @ProcessElement
            public void processElement(
                @Element T element, OutputReceiver<T> out, BoundedWindow window) {

              String message = prefix + element.toString();

              if (!(window instanceof GlobalWindow)) {
                message = message + "  Window:" + window.toString();
              }

              LOG.info(message);

              out.output(element);
            }
          }));
}
 
Example #21
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link DoFn} that mutates an input with a bad equals() still fails in the {@link
 * DirectRunner}.
 */
@Test
public void testMutatingInputCoderDoFnError() throws Exception {
  Pipeline pipeline = getPipeline();

  pipeline
      .apply(Create.of(new byte[] {0x1, 0x2, 0x3}, new byte[] {0x4, 0x5, 0x6}))
      .apply(
          ParDo.of(
              new DoFn<byte[], Integer>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  byte[] inputArray = c.element();
                  inputArray[0] = 0xa;
                  c.output(13);
                }
              }));

  thrown.expect(IllegalMutationException.class);
  thrown.expectMessage("Input");
  thrown.expectMessage("must not be mutated");
  pipeline.run();
}
 
Example #22
Source File: BeamSideInputLookupJoinRelTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testLookupTableRightOuterJoinWithBoundedTable() throws Exception {
  String sql =
      "SELECT o1.order_id, o2.site_name FROM "
          + " SITE_LKP o2 "
          + " RIGHT OUTER JOIN "
          + " ORDER_DETAILS1 o1 "
          + " on "
          + " o1.site_id=o2.site_id ";
  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows.apply(ParDo.of(new TestUtils.BeamSqlRow2StringDoFn())))
      .containsInAnyOrder(
          TestUtils.RowsBuilder.ofNullable(
                  Schema.FieldType.INT32,
                  "order_id",
                  nullable,
                  Schema.FieldType.STRING,
                  "site_name",
                  nullable)
              .addRows(1, "SITE1")
              .addRows(2, null)
              .addRows(3, null)
              .getStringRows());
  pipeline.run();
}
 
Example #23
Source File: Task.java    From beam with Apache License 2.0 6 votes vote down vote up
static PCollectionTuple applyTransform(
    PCollection<Integer> numbers, TupleTag<Integer> numBelow100Tag,
    TupleTag<Integer> numAbove100Tag) {

  return numbers.apply(ParDo.of(new DoFn<Integer, Integer>() {

    @ProcessElement
    public void processElement(@Element Integer number, MultiOutputReceiver out) {
      if (number <= 100) {
        out.get(numBelow100Tag).output(number);
      } else {
        out.get(numAbove100Tag).output(number);
      }
    }

  }).withOutputTags(numBelow100Tag, TupleTagList.of(numAbove100Tag)));
}
 
Example #24
Source File: BeamUncollectRel.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Row> expand(PCollectionList<Row> pinput) {
  checkArgument(
      pinput.size() == 1,
      "Wrong number of inputs for %s: %s",
      BeamUncollectRel.class.getSimpleName(),
      pinput);
  PCollection<Row> upstream = pinput.get(0);

  // Each row of the input contains a single array of things to be emitted; Calcite knows
  // what the row looks like
  Schema outputSchema = CalciteUtils.toSchema(getRowType());

  PCollection<Row> uncollected =
      upstream.apply(ParDo.of(new UncollectDoFn(outputSchema))).setRowSchema(outputSchema);

  return uncollected;
}
 
Example #25
Source File: AvroToBigtable.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
public static PipelineResult run(Options options) {
  Pipeline pipeline = Pipeline.create(options);

  BigtableIO.Write write =
      BigtableIO.write()
          .withProjectId(options.getBigtableProjectId())
          .withInstanceId(options.getBigtableInstanceId())
          .withTableId(options.getBigtableTableId());

  pipeline
      .apply("Read from Avro", AvroIO.read(BigtableRow.class).from(options.getInputFilePattern()))
      .apply(
          "Transform to Bigtable",
          ParDo.of(
              AvroToBigtableFn.createWithSplitLargeRows(
                  options.getSplitLargeRows(), MAX_MUTATIONS_PER_ROW)))
      .apply("Write to Bigtable", write);

  return pipeline.run();
}
 
Example #26
Source File: StatefulTeamScoreTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs
 * correctly for one team.
 */
@Test
public void testScoreUpdatesOneTeam() {

  TestStream<KV<String, GameActionInfo>> createEvents =
      TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class)))
          .advanceWatermarkTo(baseTime)
          .addElements(
              event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)),
              event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)),
              event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)),
              event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)),
              event(TestUser.RED_TWO, 201, Duration.standardSeconds(50)))
          .advanceWatermarkToInfinity();

  PCollection<KV<String, Integer>> teamScores =
      p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100)));

  String redTeam = TestUser.RED_ONE.getTeam();

  PAssert.that(teamScores)
      .inWindow(GlobalWindow.INSTANCE)
      .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401));

  p.run().waitUntilFinish();
}
 
Example #27
Source File: LocalSpannerIO.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public PCollectionView<Transaction> expand(PBegin input) {
  getSpannerConfig().validate();

  return input
      .apply(Create.of(1))
      .apply("Create transaction", ParDo.of(new LocalCreateTransactionFn(this)))
      .apply("As PCollectionView", View.asSingleton());
}
 
Example #28
Source File: WordCount.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<String, Long>> expand(PCollection<String> lines) {

  // Convert lines of text into individual words.
  PCollection<String> words = lines.apply(ParDo.of(new ExtractWordsFn()));

  // Count the number of times each word occurs.
  return words.apply(Count.perElement());
}
 
Example #29
Source File: AvroToBigtableTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void applyAvroToBigtableFnSplitLargeRows() throws Exception {
  ValueProvider<Boolean> splitlargeRows = ValueProvider.StaticValueProvider.of(true);
  BigtableRow avroRow1 = createAvroRow("row1");
  addAvroCell(avroRow1, "family1", "column1", 1, "value1");
  addAvroCell(avroRow1, "family1", "column1", 2, "value2");
  addAvroCell(avroRow1, "family1", "column2", 1, "value3");
  addAvroCell(avroRow1, "family2", "column1", 1, "value4");
  BigtableRow avroRow2 = createAvroRow("row2");
  addAvroCell(avroRow2, "family2", "column2", 2, "value2");
  List<BigtableRow> avroRows = ImmutableList.of(avroRow1, avroRow2);

  KV<ByteString, Iterable<Mutation>> rowMutations1 = createBigtableRowMutations("row1");
  addBigtableMutation(rowMutations1, "family1", "column1", 1, "value1");
  addBigtableMutation(rowMutations1, "family1", "column1", 2, "value2");
  KV<ByteString, Iterable<Mutation>> rowMutations2 = createBigtableRowMutations("row1");
  addBigtableMutation(rowMutations2, "family1", "column2", 1, "value3");
  addBigtableMutation(rowMutations2, "family2", "column1", 1, "value4");
  KV<ByteString, Iterable<Mutation>> rowMutations3 = createBigtableRowMutations("row2");
  addBigtableMutation(rowMutations3, "family2", "column2", 2, "value2");
  List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows =
      ImmutableList.of(rowMutations1, rowMutations2, rowMutations3);

  PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows =
      pipeline
          .apply("Create", Create.of(avroRows))
          .apply(
              "Transform to Bigtable",
              ParDo.of(AvroToBigtableFn.createWithSplitLargeRows(splitlargeRows, 2)));

  PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
  pipeline.run();
}
 
Example #30
Source File: BoundedSideInputJoin.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Bid> expand(PCollection<Event> events) {

  checkState(getSideInput() != null, "Configuration error: side input is null");

  final PCollectionView<Map<Long, String>> sideInputMap = getSideInput().apply(View.asMap());

  return events
      // Only want the bid events; easier to fake some side input data
      .apply(NexmarkQueryUtil.JUST_BIDS)

      // Map the conversion function over all bids.
      .apply(
          name + ".JoinToFiles",
          ParDo.of(
                  new DoFn<Bid, Bid>() {
                    @ProcessElement
                    public void processElement(ProcessContext c) {
                      Bid bid = c.element();
                      c.output(
                          new Bid(
                              bid.auction,
                              bid.bidder,
                              bid.price,
                              bid.dateTime,
                              c.sideInput(sideInputMap)
                                  .get(bid.bidder % configuration.sideInputRowCount)));
                    }
                  })
              .withSideInputs(sideInputMap));
}