com.google.cloud.dataflow.sdk.transforms.MapElements Java Examples

The following examples show how to use com.google.cloud.dataflow.sdk.transforms.MapElements. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TimestampRides.java    From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1))))

   .apply("parse timestamps",
      MapElements.via(
        (TableRow e) ->
          Instant.from(DateTimeFormatter.ISO_DATE_TIME.parse(e.get("timestamp").toString())).toEpochMilli())
      .withOutputType(TypeDescriptor.of(Long.class)))

   .apply("max timestamp in window", Max.longsGlobally().withoutDefaults())

   .apply("transform",
      MapElements.via(
        (Long t) -> {
          TableRow ride = new TableRow();
          ride.set("timestamp", Instant.ofEpochMilli(t).toString());
          return ride;
        })
      .withOutputType(TypeDescriptor.of(TableRow.class)))

   .apply(PubsubIO.Write.named("write to PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}
 
Example #2
Source File: ExactDollarRides.java    From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("extract dollars",
      MapElements.via((TableRow x) -> Double.parseDouble(x.get("meter_increment").toString()))
        .withOutputType(TypeDescriptor.of(Double.class)))

   .apply("fixed window", Window.into(FixedWindows.of(Duration.standardMinutes(1))))
   .apply("trigger",
      Window.<Double>triggering(
        AfterWatermark.pastEndOfWindow()
          .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardSeconds(1)))
          .withLateFirings(AfterPane.elementCountAtLeast(1)))
        .accumulatingFiredPanes()
        .withAllowedLateness(Duration.standardMinutes(5)))

   .apply("sum whole window", Sum.doublesGlobally().withoutDefaults())
   .apply("format rides", ParDo.of(new TransformRides()))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}
 
Example #3
Source File: LatestRides.java    From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("key rides by rideid",
      MapElements.via((TableRow ride) -> KV.of(ride.get("ride_id").toString(), ride))
        .withOutputType(new TypeDescriptor<KV<String, TableRow>>() {}))

   .apply("session windows on rides with early firings",
      Window.<KV<String, TableRow>>into(
        Sessions.withGapDuration(Duration.standardMinutes(60)))
          .triggering(
            AfterWatermark.pastEndOfWindow()
              .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(2000))))
          .accumulatingFiredPanes()
          .withAllowedLateness(Duration.ZERO))

   .apply("group ride points on same ride", Combine.perKey(new LatestPointCombine()))

   .apply("discard key",
      MapElements.via((KV<String, TableRow> a) -> a.getValue())
        .withOutputType(TypeDescriptor.of(TableRow.class)))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}
 
Example #4
Source File: WordCountITCase.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());

	input
			.apply(new WordCount.CountWords())
			.apply(MapElements.via(new WordCount.FormatAsTextFn()))
			.apply(TextIO.Write.to(resultPath));

	p.run();
}