org.apache.beam.sdk.PipelineResult Java Examples

The following examples show how to use org.apache.beam.sdk.PipelineResult. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MongoDBIOIT.java    From beam with Apache License 2.0 6 votes vote down vote up
private void collectAndPublishMetrics(PipelineResult writeResult, PipelineResult readResult) {
  String uuid = UUID.randomUUID().toString();
  String timestamp = Timestamp.now().toString();

  Set<Function<MetricsReader, NamedTestResult>> readSuppliers = getReadSuppliers(uuid, timestamp);
  Set<Function<MetricsReader, NamedTestResult>> writeSuppliers =
      getWriteSuppliers(uuid, timestamp);
  IOITMetrics readMetrics =
      new IOITMetrics(readSuppliers, readResult, NAMESPACE, uuid, timestamp);
  IOITMetrics writeMetrics =
      new IOITMetrics(writeSuppliers, writeResult, NAMESPACE, uuid, timestamp);
  readMetrics.publish(bigQueryDataset, bigQueryTable);
  readMetrics.publishToInflux(settings);
  writeMetrics.publish(bigQueryDataset, bigQueryTable);
  writeMetrics.publishToInflux(settings);
}
 
Example #2
Source File: SpannerWriteIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReportFailures() throws Exception {
  int numRecords = 100;
  p.apply(GenerateSequence.from(0).to(2 * numRecords))
      .apply(ParDo.of(new GenerateMutations(options.getTable(), new DivBy2())))
      .apply(
          SpannerIO.write()
              .withProjectId(project)
              .withInstanceId(options.getInstanceId())
              .withDatabaseId(databaseName)
              .withFailureMode(SpannerIO.FailureMode.REPORT_FAILURES));

  PipelineResult result = p.run();
  result.waitUntilFinish();
  assertThat(result.getState(), is(PipelineResult.State.DONE));
  assertThat(countNumberOfRecords(), equalTo((long) numRecords));
}
 
Example #3
Source File: ParquetToBigtable.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
public static PipelineResult run(Options options) {
  Pipeline pipeline = Pipeline.create(options);

  BigtableIO.Write write =
          BigtableIO.write()
                  .withProjectId(options.getBigtableProjectId())
                  .withInstanceId(options.getBigtableInstanceId())
                  .withTableId(options.getBigtableTableId());

  /**
   * Steps: 1) Read records from Parquet File. 2) Convert a GenericRecord to a
   * KV<ByteString,Iterable<Mutation>>. 3) Write KV to Bigtable's table.
   */
  pipeline
      .apply(
          "Read from Parquet",
          ParquetIO.read(BigtableRow.getClassSchema()).from(options.getInputFilePattern()))
      .apply(
          "Transform to Bigtable",
          ParDo.of(
              ParquetToBigtableFn.createWithSplitLargeRows(
                  options.getSplitLargeRows(), MAX_MUTATIONS_PER_ROW)))
      .apply("Write to Bigtable", write);

  return pipeline.run();
}
 
Example #4
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWaitUntilFinishTimeout() throws Exception {
  DirectOptions options = PipelineOptionsFactory.as(DirectOptions.class);
  options.setBlockOnRun(false);
  options.setRunner(DirectRunner.class);
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of(1L))
      .apply(
          ParDo.of(
              new DoFn<Long, Long>() {
                @ProcessElement
                public void hang(ProcessContext context) throws InterruptedException {
                  // Hangs "forever"
                  Thread.sleep(Long.MAX_VALUE);
                }
              }));
  PipelineResult result = p.run();
  // The pipeline should never complete;
  assertThat(result.getState(), is(State.RUNNING));
  // Must time out, otherwise this test will never complete
  result.waitUntilFinish(Duration.millis(1L));
  assertEquals(null, result.getState());
}
 
Example #5
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canReadPubsubInput() throws Exception {
  List<String> inputLines = Lines.resources("testdata/basic-messages-nonempty.ndjson");
  publishLines(inputLines);

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setInput(pipeline.newProvider(subscriptionName.toString()));

  PCollection<String> output = pipeline.apply(InputType.pubsub.read(sinkOptions))
      .apply("encodeJson", OutputFileFormat.json.encode());

  PAssert.that(output).containsInAnyOrder(inputLines);

  // This runs in the background and returns immediately due to setBlockOnRun above.
  PipelineResult result = pipeline.run();

  // The wait here is determined empirically; it's not entirely clear why it takes this long.
  System.err.println("Waiting 15 seconds to make sure we've processed all messages...");
  result.waitUntilFinish(Duration.millis(15000));
  System.err.println("Done waiting; now cancelling the pipeline so the test can finish.");
  result.cancel();
}
 
Example #6
Source File: IndexedRecordToJsonTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
    PAssert
            .that(pipeline
                    .apply(Create
                            .of(newIndexedRecord("first"), newIndexedRecord("second"))
                            .withCoder(AvroCoder.of(IndexedRecord.class, getSchema())))
                    .apply(new IndexedRecordToJson()))
            .satisfies(values -> {
                assertEquals(asList("first", "second"),
                        StreamSupport
                                .stream(values.spliterator(), false)
                                .map(k -> k.getString("name"))
                                .sorted()
                                .collect(toList()));
                return null;
            });
    assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
 
Example #7
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canSendPubsubOutput() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  // We would normally use pipeline.newProvider instead of StaticValueProvider in tests,
  // but something about this configuration causes the pipeline to stall when CompressPayload
  // accesses a method on the underlying enum value when defined via pipeline.newProvider.
  sinkOptions.setOutputPubsubCompression(StaticValueProvider.of(Compression.UNCOMPRESSED));

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(OutputType.pubsub.write(sinkOptions));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/truncated.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example #8
Source File: TextToPubsub.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Executes the pipeline with the provided execution
 * parameters.
 *
 * @param options The execution parameters.
 */
public static PipelineResult run(Options options) {
  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  /*
   * Steps:
   *  1) Read from the text source.
   *  2) Write each text record to Pub/Sub
   */
  pipeline
      .apply("Read Text Data", TextIO.read().from(options.getInputFilePattern()))
      .apply("Write to PubSub", PubsubIO.writeStrings().to(options.getOutputTopic()));

  return pipeline.run();
}
 
Example #9
Source File: TalendIOTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Test
public void output() {
    Output.DATA.clear();
    pipeline
            .apply(Create.of(new Sample("a"), new Sample("b")).withCoder(JsonbCoder.of(Sample.class, PLUGIN)))
            .apply(UUID.randomUUID().toString(), toRecord())
            .setCoder(SchemaRegistryCoder.of())
            .apply(new ViewsMappingTransform(emptyMap(), PLUGIN))
            .apply(TalendIO.write(new BaseTestProcessor() {

                @Override
                public void onNext(final InputFactory input, final OutputFactory factory) {
                    final Object read = input.read(Branches.DEFAULT_BRANCH);
                    Output.DATA.add(Record.class.cast(read).getString("data"));
                }
            }));
    assertEquals(PipelineResult.State.DONE, pipeline.run().getState());
    assertThat(Output.DATA, containsInAnyOrder("a", "b"));
}
 
Example #10
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canSendGzippedPayloads() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions sinkOptions = pipeline.getOptions().as(SinkOptions.class);
  sinkOptions.setOutputType(OutputType.pubsub);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  SinkOptions.Parsed options = SinkOptions.parseSinkOptions(sinkOptions);

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(options.getOutputType().write(options));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/gzipped.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example #11
Source File: WordCountTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testWordCountSimple() {
  PCollection<KV<String, Long>> pc =
      pipeline.apply(Create.of(INPUT_STRS)).apply(new CountWords());
  PAssert.that(pc).containsInAnyOrder(KV.of("hello", 2L), KV.of(("world"), 1L));
  PipelineResult result = pipeline.run();
  result.waitUntilFinish();

  Map<String, Long> expectedCounters = new HashMap<>();
  expectedCounters.put("emptyLines", 2L);
  for (MetricResult c :
      result.metrics().queryMetrics(MetricsFilter.builder().build()).getCounters()) {
    String name = c.getName().getName();
    if (expectedCounters.containsKey(name)) {
      assertEquals(expectedCounters.get(name), c.getCommitted());
      expectedCounters.remove(name);
    }
  }
  assertTrue(expectedCounters.isEmpty());
}
 
Example #12
Source File: ExportTimestampTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private void exportAndImportDbAtTime(String sourceDb, String destDb,
                                     String jobIdName, String ts,
                                     TestPipeline exportPipeline,
                                     TestPipeline importPipeline) {
  ValueProvider.StaticValueProvider<String> destination = ValueProvider.StaticValueProvider
      .of(tmpDir);
  ValueProvider.StaticValueProvider<String> jobId = ValueProvider.StaticValueProvider
      .of(jobIdName);
  ValueProvider.StaticValueProvider<String> source = ValueProvider.StaticValueProvider
      .of(tmpDir + "/" + jobIdName);
  ValueProvider.StaticValueProvider<String> timestamp = ValueProvider.StaticValueProvider.of(ts);
  SpannerConfig sourceConfig = spannerServer.getSpannerConfig(sourceDb);
  exportPipeline.apply("Export", new ExportTransform(sourceConfig, destination,
                                                     jobId, timestamp));
  PipelineResult exportResult = exportPipeline.run();
  exportResult.waitUntilFinish();

  SpannerConfig copyConfig = spannerServer.getSpannerConfig(destDb);
  importPipeline.apply("Import", new ImportTransform(
      copyConfig, source, ValueProvider.StaticValueProvider.of(true),
      ValueProvider.StaticValueProvider.of(true),
      ValueProvider.StaticValueProvider.of(true)));
  PipelineResult importResult = importPipeline.run();
  importResult.waitUntilFinish();
}
 
Example #13
Source File: BeamPipelineEngine.java    From hop with Apache License 2.0 6 votes vote down vote up
private PipelineResult executePipeline( org.apache.beam.sdk.Pipeline pipeline ) throws HopException {

    RunnerType runnerType = beamEngineRunConfiguration.getRunnerType();
    switch ( runnerType ) {
      case Direct:
        return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Flink:
        return FlinkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case DataFlow:
        return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Spark:
        return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      default:
        throw new HopException( "Execution on runner '" + runnerType.name() + "' is not supported yet." );
    }
  }
 
Example #14
Source File: BigQueryDatasetRuntime.java    From components with Apache License 2.0 6 votes vote down vote up
public void getSampleDeprecated(int limit, Consumer<IndexedRecord> consumer) {
    // Create a pipeline using the input component to get records.
    DirectOptions options = BeamLocalRunnerOption.getOptions();
    final Pipeline p = Pipeline.create(options);

    // Create an input runtime based on the properties.
    BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
    BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
    inputProperties.init();
    inputProperties.setDatasetProperties(properties);
    inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);

    try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
        // Collect a sample of the input records.
        p
                .apply(inputRuntime) //
                .apply(Sample.<IndexedRecord> any(limit))
                .apply(collector);
        PipelineResult pr = p.run();
        pr.waitUntilFinish();
    }
}
 
Example #15
Source File: TalendIOTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Test
public void processorMulti() {
    final PCollection<SampleLength> out = pipeline
            .apply(Create.of(new Sample("a"), new Sample("bb")).withCoder(JsonbCoder.of(Sample.class, PLUGIN)))
            .apply(UUID.randomUUID().toString(), toRecord())
            .setCoder(SchemaRegistryCoder.of())
            .apply(new ViewsMappingTransform(emptyMap(), PLUGIN))
            .apply(TalendFn.asFn(new BaseTestProcessor() {

                @Override
                public void onNext(final InputFactory input, final OutputFactory factory) {
                    final Object read = input.read(Branches.DEFAULT_BRANCH);
                    factory
                            .create(Branches.DEFAULT_BRANCH)
                            .emit(new Sample(Record.class.cast(read).getString("data")));
                }
            }))
            .apply(toSampleLength());
    PAssert.that(out.apply(UUID.randomUUID().toString(), toInt())).containsInAnyOrder(1, 2);
    assertEquals(PipelineResult.State.DONE, pipeline.run().getState());
}
 
Example #16
Source File: MongoDBIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteAndRead() {
  initialCollectionSize = getCollectionSizeInBytes(collection);

  writePipeline
      .apply("Generate sequence", GenerateSequence.from(0).to(options.getNumberOfRecords()))
      .apply("Produce documents", MapElements.via(new LongToDocumentFn()))
      .apply("Collect write time metric", ParDo.of(new TimeMonitor<>(NAMESPACE, "write_time")))
      .apply(
          "Write documents to MongoDB",
          MongoDbIO.write()
              .withUri(mongoUrl)
              .withDatabase(options.getMongoDBDatabaseName())
              .withCollection(collection));
  PipelineResult writeResult = writePipeline.run();
  writeResult.waitUntilFinish();

  finalCollectionSize = getCollectionSizeInBytes(collection);

  PCollection<String> consolidatedHashcode =
      readPipeline
          .apply(
              "Read all documents",
              MongoDbIO.read()
                  .withUri(mongoUrl)
                  .withDatabase(options.getMongoDBDatabaseName())
                  .withCollection(collection))
          .apply("Collect read time metrics", ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time")))
          .apply("Map documents to Strings", MapElements.via(new DocumentToStringFn()))
          .apply("Calculate hashcode", Combine.globally(new HashingFn()));

  String expectedHash = getHashForRecordCount(options.getNumberOfRecords(), EXPECTED_HASHES);
  PAssert.thatSingleton(consolidatedHashcode).isEqualTo(expectedHash);

  PipelineResult readResult = readPipeline.run();
  readResult.waitUntilFinish();
  collectAndPublishMetrics(writeResult, readResult);
}
 
Example #17
Source File: SimpleFileIOOutputRuntimeUnboundedTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Basic unit test writing to Avro.
 */
@Test
public void testBasicAvroUnboundedWithWindow() throws IOException, URISyntaxException {
    String fileSpec = mini
            .getLocalFs()
            .getUri()
            .resolve(new Path(mini.newFolder().toString(), "output.avro").toUri())
            .toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = beam.createPipeline();
    PCollection<IndexedRecord> input = p //
            .apply(GenerateSequence.from(0).withRate(10, Duration.millis(1000))) //
            .apply(ParDo.of(new GenerateDoFn()))
            .apply(Window.<IndexedRecord> into(FixedWindows.of(Duration.millis(30000))));

    input.apply(runtime);

    // And run the test.
    PipelineResult pr = p.run();

    // Check the expected values.
    // TODO(rskraba): Implement a comparison for the file on disk.
    // mini.assertReadFile(mini.getLocalFs(), fileSpec, "1;one", "2;two");
}
 
Example #18
Source File: JobInvocationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 10_000)
public void testStateAfterCompletion() throws Exception {
  jobInvocation.start();
  assertThat(jobInvocation.getState(), is(JobApi.JobState.Enum.RUNNING));

  TestPipelineResult pipelineResult = new TestPipelineResult(PipelineResult.State.DONE);
  runner.setResult(pipelineResult);

  awaitJobState(jobInvocation, JobApi.JobState.Enum.DONE);
}
 
Example #19
Source File: MetricsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static MetricQueryResults queryTestMetrics(PipelineResult result) {
  return result
      .metrics()
      .queryMetrics(
          MetricsFilter.builder()
              .addNameFilter(MetricNameFilter.inNamespace(MetricsTest.class))
              .build());
}
 
Example #20
Source File: TestPipeline.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Like {@link #run} but with the given potentially modified options. */
@Override
public PipelineResult run(PipelineOptions options) {
  checkState(
      enforcement.isPresent(),
      "Is your TestPipeline declaration missing a @Rule annotation? Usage: "
          + "@Rule public final transient TestPipeline pipeline = TestPipeline.create();");

  final PipelineResult pipelineResult;
  try {
    enforcement.get().beforePipelineExecution();
    PipelineOptions updatedOptions =
        MAPPER.convertValue(MAPPER.valueToTree(options), PipelineOptions.class);
    updatedOptions
        .as(TestValueProviderOptions.class)
        .setProviderRuntimeValues(StaticValueProvider.of(providerRuntimeValues));
    pipelineResult = super.run(updatedOptions);
    verifyPAssertsSucceeded(this, pipelineResult);
  } catch (RuntimeException exc) {
    Throwable cause = exc.getCause();
    if (cause instanceof AssertionError) {
      throw (AssertionError) cause;
    } else {
      throw exc;
    }
  }

  // If we reach this point, the pipeline has been run and no exceptions have been thrown during
  // its execution.
  enforcement.get().afterPipelineExecution();
  return pipelineResult;
}
 
Example #21
Source File: SparkStructuredStreamingPipelineResult.java    From beam with Apache License 2.0 5 votes vote down vote up
private State awaitTermination(Duration duration)
    throws TimeoutException, ExecutionException, InterruptedException {
  pipelineExecution.get(duration.getMillis(), TimeUnit.MILLISECONDS);
  // Throws an exception if the job is not finished successfully in the given time.
  // TODO: all streaming functionality
  return PipelineResult.State.DONE;
}
 
Example #22
Source File: ExportPipeline.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Runs a pipeline to export a Cloud Spanner database to Avro files.
 *
 * @param args arguments to the pipeline
 */
public static void main(String[] args) {

  ExportPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(ExportPipelineOptions.class);

  Pipeline p = Pipeline.create(options);

  SpannerConfig spannerConfig =
      SpannerConfig.create()
          .withProjectId(options.getSpannerProjectId())
          .withHost(options.getSpannerHost())
          .withInstanceId(options.getInstanceId())
          .withDatabaseId(options.getDatabaseId());
  p.begin()
      .apply(
          "Run Export",
          new ExportTransform(spannerConfig, options.getOutputDir(), options.getTestJobId(),
                              options.getSnapshotTime()));
  PipelineResult result = p.run();
  if (options.getWaitUntilFinish() &&
      /* Only if template location is null, there is a dataflow job to wait for. Else it's
       * template generation which doesn't start a dataflow job.
       */
      options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) {
    result.waitUntilFinish();
  }
}
 
Example #23
Source File: SparkPipelineStateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testFailedPipeline(final SparkPipelineOptions options) throws Exception {

    SparkPipelineResult result = null;

    try {
      final Pipeline pipeline = Pipeline.create(options);
      pipeline
          .apply(getValues(options))
          .setCoder(StringUtf8Coder.of())
          .apply(
              MapElements.via(
                  new SimpleFunction<String, String>() {

                    @Override
                    public String apply(final String input) {
                      throw new MyCustomException(FAILED_THE_BATCH_INTENTIONALLY);
                    }
                  }));

      result = (SparkPipelineResult) pipeline.run();
      result.waitUntilFinish();
    } catch (final Exception e) {
      assertThat(e, instanceOf(Pipeline.PipelineExecutionException.class));
      assertThat(e.getCause(), instanceOf(MyCustomException.class));
      assertThat(e.getCause().getMessage(), is(FAILED_THE_BATCH_INTENTIONALLY));
      assertThat(result.getState(), is(PipelineResult.State.FAILED));
      result.cancel();
      return;
    }

    fail("An injected failure did not affect the pipeline as expected.");
  }
 
Example #24
Source File: BigQueryMergeValidatorTemplate.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
  // Create the pipeline
  Pipeline pipeline = Pipeline.create(options);
  String replicaTable = options.getReplicaTable();
  String stagingTable = options.getStagingTable();

  pipeline
      .apply(Create.of(1))
      .apply(
          ParDo.of(
              new DoFn<Integer, MergeInfo>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  MergeInfo mergeInfo =
                      MergeInfo.create(
                          "_metadata_timestamp",
                          "_metadata_deleted",
                          replicaTable,
                          stagingTable,
                          ALL_FIELDS,
                          ALL_PK_FIELDS);
                  c.output(mergeInfo);
                }
              }))
      .apply(new BigQueryMerger(
          Duration.standardMinutes(1), null, MergeConfiguration.bigQueryConfiguration()));

  return pipeline.run();
}
 
Example #25
Source File: BigQueryToElasticsearch.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
private static PipelineResult run(BigQueryToElasticsearchReadOptions options) {

  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  /*
   * Steps: 1) Read records from BigQuery via BigQueryIO.
   *        2) Create json string from Table Row.
   *        3) Write records to Elasticsearch.
   *
   *
   * Step #1: Read from BigQuery. If a query is provided then it is used to get the TableRows.
   */
  pipeline
      .apply(
          "ReadFromBigQuery",
          ReadBigQuery.newBuilder()
              .setOptions(options.as(BigQueryToElasticsearchReadOptions.class))
              .build())

      /*
       * Step #2: Convert table rows to JSON documents.
       */
      .apply("TableRowsToJsonDocument", ParDo.of(new TableRowToJsonFn()))

      /*
       * Step #3: Write converted records to Elasticsearch
       */
      .apply(
          "WriteToElasticsearch",
          WriteToElasticsearch.newBuilder()
              .setOptions(options.as(WriteToElasticsearchOptions.class))
              .build());

  return pipeline.run();
}
 
Example #26
Source File: ParquetIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void collectAndPublishMetrics(PipelineResult result) {
  String uuid = UUID.randomUUID().toString();
  String timestamp = Timestamp.now().toString();
  Set<Function<MetricsReader, NamedTestResult>> metricSuppliers =
      fillMetricSuppliers(uuid, timestamp);
  final IOITMetrics metrics =
      new IOITMetrics(metricSuppliers, result, PARQUET_NAMESPACE, uuid, timestamp);
  metrics.publish(bigQueryDataset, bigQueryTable);
  metrics.publishToInflux(settings);
}
 
Example #27
Source File: CopyDbTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
private void runTest() {
  String tmpDirPath = tmpDir.getRoot().getAbsolutePath();
  ValueProvider.StaticValueProvider<String> destination = ValueProvider.StaticValueProvider
      .of(tmpDirPath);
  ValueProvider.StaticValueProvider<String> jobId = ValueProvider.StaticValueProvider
      .of("jobid");
  ValueProvider.StaticValueProvider<String> source = ValueProvider.StaticValueProvider
      .of(tmpDirPath + "/jobid");

  SpannerConfig sourceConfig = spannerServer.getSpannerConfig(sourceDb);
  exportPipeline.apply("Export", new ExportTransform(sourceConfig, destination, jobId));
  PipelineResult exportResult = exportPipeline.run();
  exportResult.waitUntilFinish();

  SpannerConfig destConfig = spannerServer.getSpannerConfig(destinationDb);
  importPipeline.apply(
      "Import",
      new ImportTransform(
          destConfig,
          source,
          ValueProvider.StaticValueProvider.of(true),
          ValueProvider.StaticValueProvider.of(true),
          ValueProvider.StaticValueProvider.of(true)));
  PipelineResult importResult = importPipeline.run();
  importResult.waitUntilFinish();

  PCollection<Long> mismatchCount =
      comparePipeline.apply("Compare", new CompareDatabases(sourceConfig, destConfig));
  PAssert.that(mismatchCount).satisfies((x) -> {
    assertEquals(Lists.newArrayList(x), Lists.newArrayList(0L));
    return null;
  });
  PipelineResult compareResult = comparePipeline.run();
  compareResult.waitUntilFinish();

  Ddl sourceDdl = readDdl(sourceDb);
  Ddl destinationDdl = readDdl(destinationDb);

  assertThat(sourceDdl.prettyPrint(), equalToIgnoringWhiteSpace(destinationDdl.prettyPrint()));
}
 
Example #28
Source File: MetricsReaderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimeIsMinusOneIfTimeMetricIsTooFarFromNow() {
  List<Integer> sampleInputData = Arrays.asList(1, 5, 5, 5, 5);

  createTestPipeline(sampleInputData, new MonitorWithTimeDistribution());
  PipelineResult result = testPipeline.run();

  MetricsReader reader = new MetricsReader(result, NAMESPACE, 900000000001L);

  assertEquals(-1, reader.getStartTimeMetric("timeDist"));
  assertEquals(-1, reader.getEndTimeMetric("timeDist"));
}
 
Example #29
Source File: TrafficRoutes.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractStationSpeedFn()))
      // map the incoming data stream into sliding windows.
      .apply(
          Window.into(
              SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                  .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new TrackSpeed())
      .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}
 
Example #30
Source File: TestJetRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PipelineResult run(Pipeline pipeline) {
  Collection<JetInstance> instances = initMemberInstances(factory);
  try {
    PipelineResult result = delegate.run(pipeline);
    if (result instanceof FailedRunningPipelineResults) {
      throw ((FailedRunningPipelineResults) result).getCause();
    }
    result.waitUntilFinish();
    return result;
  } finally {
    killMemberInstances(instances, factory);
  }
}