Java Code Examples for org.apache.beam.sdk.values.PDone#in()

The following examples show how to use org.apache.beam.sdk.values.PDone#in() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<ProducerRecord<K, V>> input) {
  checkArgument(
      getProducerConfig().get(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) != null,
      "withBootstrapServers() is required");

  checkArgument(getKeySerializer() != null, "withKeySerializer() is required");
  checkArgument(getValueSerializer() != null, "withValueSerializer() is required");

  if (isEOS()) {
    checkArgument(getTopic() != null, "withTopic() is required when isEOS() is true");
    KafkaExactlyOnceSink.ensureEOSSupport();

    // TODO: Verify that the group_id does not have existing state stored on Kafka unless
    //       this is an upgrade. This avoids issues with simple mistake of reusing group_id
    //       across multiple runs or across multiple jobs. This is checked when the sink
    //       transform initializes while processing the output. It might be better to
    //       check here to catch common mistake.

    input.apply(new KafkaExactlyOnceSink<>(this));
  } else {
    input.apply(ParDo.of(new KafkaWriter<>(this)));
  }
  return PDone.in(input.getPipeline());
}
 
Example 2
Source File: TFRecordIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<byte[]> input) {
  checkState(
      getOutputPrefix() != null,
      "need to set the output prefix of a TFRecordIO.Write transform");
  WriteFiles<byte[], Void, byte[]> write =
      WriteFiles.to(
          new TFRecordSink(
              getOutputPrefix(), getShardTemplate(), getFilenameSuffix(), getCompression()));
  if (getNumShards() > 0) {
    write = write.withNumShards(getNumShards());
  }
  if (getNoSpilling()) {
    write = write.withNoSpilling();
  }
  input.apply("Write", write);
  return PDone.in(input.getPipeline());
}
 
Example 3
Source File: JdbcIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<T> input) {
  // fixme: validate invalid table input
  if (input.hasSchema() && !hasStatementAndSetter()) {
    checkArgument(
        inner.getTable() != null, "table cannot be null if statement is not provided");
    Schema schema = input.getSchema();
    List<SchemaUtil.FieldWithIndex> fields = getFilteredFields(schema);
    inner =
        inner.withStatement(
            JdbcUtil.generateStatement(
                inner.getTable(),
                fields.stream()
                    .map(SchemaUtil.FieldWithIndex::getField)
                    .collect(Collectors.toList())));
    inner =
        inner.withPreparedStatementSetter(
            new AutoGeneratedPreparedStatementSetter(fields, input.getToRowFunction()));
  }

  inner.expand(input);
  return PDone.in(input.getPipeline());
}
 
Example 4
Source File: BeamSqlBuiltinFunctionsIntegrationTestBase.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PBegin begin) {
  PCollection<Boolean> result =
      begin
          .apply(Create.of(DUMMY_ROW).withRowSchema(DUMMY_SCHEMA))
          .apply(SqlTransform.query("SELECT " + expr))
          .apply(MapElements.into(TypeDescriptors.booleans()).via(row -> row.getBoolean(0)));

  PAssert.that(result)
      .satisfies(
          input -> {
            assertTrue("Test expression is false: " + expr, Iterables.getOnlyElement(input));
            return null;
          });
  return PDone.in(begin.getPipeline());
}
 
Example 5
Source File: WriteToBigQuery.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<InputT> teamAndScore) {
  teamAndScore
      .apply("ConvertToRow", ParDo.of(new BuildRowFn()))
      .apply(
          BigQueryIO.writeTableRows()
              .to(getTable(projectId, datasetId, tableName))
              .withSchema(getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  return PDone.in(teamAndScore.getPipeline());
}
 
Example 6
Source File: PAssert.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<T> input) {
  input
      .apply("GroupGlobally", new GroupGlobally<>(rewindowingStrategy))
      .apply("GetPane", MapElements.via(paneExtractor))
      .setCoder(IterableCoder.of(input.getCoder()))
      .apply("RunChecks", ParDo.of(new SingletonCheckerDoFn<>(checkerFn, site)))
      .apply("VerifyAssertions", new DefaultConcludeTransform());

  return PDone.in(input.getPipeline());
}
 
Example 7
Source File: TransformTreeTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<Integer> input) {
  // Apply an operation so that this is a composite transform.
  input.apply(Count.perElement());

  return PDone.in(input.getPipeline());
}
 
Example 8
Source File: WriteWindowedToBigQuery.java    From deployment-examples with MIT License 5 votes vote down vote up
@Override
public PDone expand(PCollection<T> teamAndScore) {
  teamAndScore
      .apply("ConvertToRow", ParDo.of(new BuildRowFn()))
      .apply(
          BigQueryIO.writeTableRows()
              .to(getTable(projectId, datasetId, tableName))
              .withSchema(getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  return PDone.in(teamAndScore.getPipeline());
}
 
Example 9
Source File: JmsIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  checkArgument(getConnectionFactory() != null, "withConnectionFactory() is required");
  checkArgument(
      getQueue() != null || getTopic() != null,
      "Either withQueue(queue) or withTopic(topic) is required");
  checkArgument(
      getQueue() == null || getTopic() == null,
      "withQueue(queue) and withTopic(topic) are exclusive");

  input.apply(ParDo.of(new WriterFn(this)));
  return PDone.in(input.getPipeline());
}
 
Example 10
Source File: BeamEnumerableConverterTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public POutput buildIOWriter(PCollection<Row> input) {
  input.apply(
      ParDo.of(
          new DoFn<Row, Void>() {
            @ProcessElement
            public void processElement(ProcessContext context) {}
          }));
  return PDone.in(input.getPipeline());
}
 
Example 11
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<Integer> input) {
  // Apply an operation so that this is a composite transform.
  input.apply(Count.perElement());

  return PDone.in(input.getPipeline());
}
 
Example 12
Source File: ElasticsearchIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  ConnectionConfiguration connectionConfiguration = getConnectionConfiguration();
  checkState(connectionConfiguration != null, "withConnectionConfiguration() is required");
  input.apply(ParDo.of(new WriteFn(this)));
  return PDone.in(input.getPipeline());
}
 
Example 13
Source File: BeamIOWrappingTest.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(final PCollection<Sample> input) {
    new BeamSink(reverse).expand(input.apply(ParDo.of(new DoFn<Sample, String>() {

        @ProcessElement
        public void onElement(final ProcessContext ctx) {
            ctx.output(ctx.element().getData());
        }
    })));
    return PDone.in(input.getPipeline());
}
 
Example 14
Source File: GenericSourceSink.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Write data.
 * NEMO-365: This method could later be replaced using the HadoopFormatIO class.
 *
 * @param dataToWrite data to write
 * @param path        path to write data
 * @return returns {@link PDone}
 */
public static PDone write(final PCollection<String> dataToWrite,
                          final String path) {
  if (isHDFSPath(path)) {
    dataToWrite.apply(ParDo.of(new HDFSWrite(path)));
    return PDone.in(dataToWrite.getPipeline());
  } else {
    return dataToWrite.apply(TextIO.write().to(path));
  }
}
 
Example 15
Source File: AvroIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public PDone expand(PCollection<T> input) {
  input.apply(inner);
  return PDone.in(input.getPipeline());
}
 
Example 16
Source File: Println.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  input.apply(ParDo.of(fn));
  return PDone.in(input.getPipeline());
}
 
Example 17
Source File: ConsoleIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public PDone expand(PCollection<T> input) {
  return PDone.in(input.getPipeline());
}
 
Example 18
Source File: SqsIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public PDone expand(PCollection<SendMessageRequest> input) {
  input.apply(ParDo.of(new SqsWriteFn(this)));
  return PDone.in(input.getPipeline());
}
 
Example 19
Source File: BeamOutputTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<HopRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamHop.init(transformPluginClasses, xpPluginClasses);

      // Inflate the metadata on the node where this is running...
      //
      IRowMeta rowMeta = JsonRowMeta.fromJson( rowMetaJson );

      // This is the end of a computing chain, we write out the results
      // We write a bunch of Strings, one per line basically
      //
      PCollection<String> stringCollection = input.apply( transformName, ParDo.of( new HopToStringFn( transformName, outputLocation, separator, enclosure, rowMetaJson, transformPluginClasses, xpPluginClasses ) ) );

      // We need to transform these lines into a file and then we're PDone
      //
      TextIO.Write write = TextIO.write();
      if ( StringUtils.isNotEmpty(outputLocation)) {
        String outputPrefix = outputLocation;
        if (!outputPrefix.endsWith( File.separator)) {
          outputPrefix+=File.separator;
        }
        if (StringUtils.isNotEmpty( filePrefix )) {
          outputPrefix+=filePrefix;
        }
        write = write.to( outputPrefix );
      }
      if (StringUtils.isNotEmpty( fileSuffix )) {
        write = write.withSuffix( fileSuffix );
      }

      // For streaming data sources...
      //
      if (windowed) {
        write = write.withWindowedWrites().withNumShards( 4 ); // TODO config
      }

      stringCollection.apply(write);

      // Get it over with
      //
      return PDone.in(input.getPipeline());

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in beam output transform", e );
      throw new RuntimeException( "Error in beam output transform", e );
    }
  }
 
Example 20
Source File: TestTableProvider.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public POutput buildIOWriter(PCollection<Row> input) {
  input.apply(ParDo.of(new CollectorFn(tableWithRows))).setRowSchema(input.getSchema());
  return PDone.in(input.getPipeline());
}