com.google.cloud.dataflow.sdk.transforms.MapElements Java Examples

The following examples show how to use com.google.cloud.dataflow.sdk.transforms.MapElements. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TimestampRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0

5 votes

public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1))))

   .apply("parse timestamps",
      MapElements.via(
        (TableRow e) ->
          Instant.from(DateTimeFormatter.ISO_DATE_TIME.parse(e.get("timestamp").toString())).toEpochMilli())
      .withOutputType(TypeDescriptor.of(Long.class)))

   .apply("max timestamp in window", Max.longsGlobally().withoutDefaults())

   .apply("transform",
      MapElements.via(
        (Long t) -> {
          TableRow ride = new TableRow();
          ride.set("timestamp", Instant.ofEpochMilli(t).toString());
          return ride;
        })
      .withOutputType(TypeDescriptor.of(TableRow.class)))

   .apply(PubsubIO.Write.named("write to PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}

Example #2

Source File: ExactDollarRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0

5 votes

public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("extract dollars",
      MapElements.via((TableRow x) -> Double.parseDouble(x.get("meter_increment").toString()))
        .withOutputType(TypeDescriptor.of(Double.class)))

   .apply("fixed window", Window.into(FixedWindows.of(Duration.standardMinutes(1))))
   .apply("trigger",
      Window.<Double>triggering(
        AfterWatermark.pastEndOfWindow()
          .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardSeconds(1)))
          .withLateFirings(AfterPane.elementCountAtLeast(1)))
        .accumulatingFiredPanes()
        .withAllowedLateness(Duration.standardMinutes(5)))

   .apply("sum whole window", Sum.doublesGlobally().withoutDefaults())
   .apply("format rides", ParDo.of(new TransformRides()))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}

Example #3

Source File: LatestRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0

5 votes

public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("key rides by rideid",
      MapElements.via((TableRow ride) -> KV.of(ride.get("ride_id").toString(), ride))
        .withOutputType(new TypeDescriptor<KV<String, TableRow>>() {}))

   .apply("session windows on rides with early firings",
      Window.<KV<String, TableRow>>into(
        Sessions.withGapDuration(Duration.standardMinutes(60)))
          .triggering(
            AfterWatermark.pastEndOfWindow()
              .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(2000))))
          .accumulatingFiredPanes()
          .withAllowedLateness(Duration.ZERO))

   .apply("group ride points on same ride", Combine.perKey(new LatestPointCombine()))

   .apply("discard key",
      MapElements.via((KV<String, TableRow> a) -> a.getValue())
        .withOutputType(TypeDescriptor.of(TableRow.class)))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}

Example #4

Source File: WordCountITCase.java From flink-dataflow with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());

	input
			.apply(new WordCount.CountWords())
			.apply(MapElements.via(new WordCount.FormatAsTextFn()))
			.apply(TextIO.Write.to(resultPath));

	p.run();
}