Java Code Examples for org.apache.beam.sdk.PipelineResult#cancel()

The following examples show how to use org.apache.beam.sdk.PipelineResult#cancel() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCountTimeOut1Sec.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String inputFilePath = args[0];
  final String outputFilePath = args[1];
  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WordCountTimeOut1Sec");

  final Pipeline p = generateWordCountPipeline(options, inputFilePath, outputFilePath);
  final PipelineResult pr = p.run();
  final PipelineResult.State running = pr.waitUntilFinish(org.joda.time.Duration.standardSeconds(1));
  try {
    final PipelineResult.State cancelled = pr.cancel();
  } catch (final IOException e) {
    LOG.info("IOException while cancelling job");
  }
}
 
Example 2
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canReadPubsubInput() throws Exception {
  List<String> inputLines = Lines.resources("testdata/basic-messages-nonempty.ndjson");
  publishLines(inputLines);

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setInput(pipeline.newProvider(subscriptionName.toString()));

  PCollection<String> output = pipeline.apply(InputType.pubsub.read(sinkOptions))
      .apply("encodeJson", OutputFileFormat.json.encode());

  PAssert.that(output).containsInAnyOrder(inputLines);

  // This runs in the background and returns immediately due to setBlockOnRun above.
  PipelineResult result = pipeline.run();

  // The wait here is determined empirically; it's not entirely clear why it takes this long.
  System.err.println("Waiting 15 seconds to make sure we've processed all messages...");
  result.waitUntilFinish(Duration.millis(15000));
  System.err.println("Done waiting; now cancelling the pipeline so the test can finish.");
  result.cancel();
}
 
Example 3
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canSendPubsubOutput() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  // We would normally use pipeline.newProvider instead of StaticValueProvider in tests,
  // but something about this configuration causes the pipeline to stall when CompressPayload
  // accesses a method on the underlying enum value when defined via pipeline.newProvider.
  sinkOptions.setOutputPubsubCompression(StaticValueProvider.of(Compression.UNCOMPRESSED));

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(OutputType.pubsub.write(sinkOptions));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/truncated.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example 4
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canSendGzippedPayloads() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions sinkOptions = pipeline.getOptions().as(SinkOptions.class);
  sinkOptions.setOutputType(OutputType.pubsub);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  SinkOptions.Parsed options = SinkOptions.parseSinkOptions(sinkOptions);

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(options.getOutputType().write(options));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/gzipped.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example 5
Source File: PubsubReadIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadPublicData() throws Exception {
  // The pipeline will never terminate on its own
  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  PCollection<String> messages =
      pipeline.apply(
          PubsubIO.readStrings()
              .fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));

  messages.apply(
      "waitForAnyMessage", signal.signalSuccessWhen(messages.getCoder(), anyMessages -> true));

  Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
  pipeline.apply(signal.signalStart());
  PipelineResult job = pipeline.run();
  start.get();

  signal.waitForSuccess(Duration.standardSeconds(30));
  // A runner may not support cancel
  try {
    job.cancel();
  } catch (UnsupportedOperationException exc) {
    // noop
  }
}
 
Example 6
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test(timeout = 30000)
public void canSendPubsubErrorOutput() throws Exception {
  final List<String> inputLines = Lines
      .resources("testdata/pubsub-integration/error-input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions.Parsed sinkOptions = pipeline.getOptions().as(SinkOptions.Parsed.class);
  sinkOptions.setInput(pipeline.newProvider("test input"));
  sinkOptions.setJobName("test job name");
  sinkOptions.setErrorOutput(pipeline.newProvider(topicName.toString()));
  // We would normally use pipeline.newProvider instead of StaticValueProvider in tests,
  // but something about this configuration causes the pipeline to stall when CompressPayload
  // accesses a method on the underlying enum value when defined via pipeline.newProvider.
  sinkOptions.setErrorOutputPubsubCompression(StaticValueProvider.of(Compression.UNCOMPRESSED));

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(ErrorOutputType.pubsub.write(sinkOptions));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/error-output.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example 7
Source File: BeamHelper.java    From dbeam with Apache License 2.0 5 votes vote down vote up
public static PipelineResult waitUntilDone(
    final PipelineResult result, final Duration exportTimeout) {
  // terminal state might be null, such as:
  // {{ @link org.apache.beam.runners.dataflow.DataflowPipelineJob.waitUntilFinish }}
  @Nullable
  final PipelineResult.State terminalState =
      result.waitUntilFinish(org.joda.time.Duration.millis(exportTimeout.toMillis()));
  if (terminalState == null || !terminalState.isTerminal()) {
    try {
      result.cancel();
    } catch (IOException e) {
      throw new Pipeline.PipelineExecutionException(
          new Exception(
              String.format(
                  "Job exceeded timeout of %s, but was not possible to cancel, "
                      + "finished with terminalState %s",
                  exportTimeout.toString(), terminalState),
              e));
    }
    throw new Pipeline.PipelineExecutionException(
        new Exception("Job cancelled after exceeding timeout " + exportTimeout.toString()));
  }
  if (!terminalState.equals(PipelineResult.State.DONE)) {
    throw new Pipeline.PipelineExecutionException(
        new Exception("Job finished with terminalState " + terminalState.toString()));
  }
  return result;
}
 
Example 8
Source File: BeamEnumerableConverter.java    From beam with Apache License 2.0 5 votes vote down vote up
private static PipelineResult limitRun(
    PipelineOptions options,
    BeamRelNode node,
    DoFn<Row, Void> doFn,
    Queue<Row> values,
    int limitCount) {
  options.as(DirectOptions.class).setBlockOnRun(false);
  Pipeline pipeline = Pipeline.create(options);
  PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
  resultCollection.apply(ParDo.of(doFn));

  PipelineResult result = pipeline.run();

  State state;
  while (true) {
    // Check pipeline state in every second
    state = result.waitUntilFinish(Duration.standardSeconds(1));
    if (state != null && state.isTerminal()) {
      if (PipelineResult.State.FAILED.equals(state)) {
        throw new RuntimeException("Pipeline failed for unknown reason");
      }
      break;
    }

    try {
      if (values.size() >= limitCount) {
        result.cancel();
        break;
      }
    } catch (IOException e) {
      LOG.warn(e.toString());
      break;
    }
  }

  return result;
}
 
Example 9
Source File: JobFailure.java    From beam with Apache License 2.0 5 votes vote down vote up
static void handleFailure(
    final PipelineResult pipelineResult, final List<NamedTestResult> testResults)
    throws IOException {
  Optional<JobFailure> failure = lookForFailure(pipelineResult, testResults);

  if (failure.isPresent()) {
    JobFailure jobFailure = failure.get();

    if (jobFailure.requiresCancelling) {
      pipelineResult.cancel();
    }

    throw new RuntimeException(jobFailure.cause);
  }
}
 
Example 10
Source File: PubsubReadIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadPubsubMessageId() throws Exception {
  // The pipeline will never terminate on its own
  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  PCollection<PubsubMessage> messages =
      pipeline.apply(
          PubsubIO.readMessagesWithAttributesAndMessageId()
              .fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));

  messages.apply(
      "isMessageIdNonNull",
      signal.signalSuccessWhen(messages.getCoder(), new NonEmptyMessageIdCheck()));

  Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
  pipeline.apply(signal.signalStart());
  PipelineResult job = pipeline.run();
  start.get();

  signal.waitForSuccess(Duration.standardMinutes(1));
  // A runner may not support cancel
  try {
    job.cancel();
  } catch (UnsupportedOperationException exc) {
    // noop
  }
}
 
Example 11
Source File: FhirIOReadIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testFhirIORead() throws Exception {
  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  FhirIO.Read.Result result =
      pipeline
          .apply(PubsubIO.readStrings().fromSubscription(pubsubSubscription))
          .apply(FhirIO.readResources());

  PCollection<String> resources = result.getResources();
  resources.apply(
      "waitForAnyMessage", signal.signalSuccessWhen(resources.getCoder(), anyResources -> true));
  // wait for any resource

  Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
  pipeline.apply(signal.signalStart());
  PipelineResult job = pipeline.run();
  start.get();
  signal.waitForSuccess(Duration.standardSeconds(30));

  // A runner may not support cancel
  try {
    job.cancel();
  } catch (UnsupportedOperationException exc) {
    // noop
  }
}
 
Example 12
Source File: KafkaIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void cancelIfTimeouted(PipelineResult readResult, PipelineResult.State readState)
    throws IOException {

  // TODO(lgajowy) this solution works for dataflow only - it returns null when
  //  waitUntilFinish(Duration duration) exceeds provided duration.
  if (readState == null) {
    readResult.cancel();
  }
}
 
Example 13
Source File: WindowedWordCount.java    From deployment-examples with MIT License 4 votes vote down vote up
static void runWindowedWordCount(Options options) throws IOException {
  final String output = options.getOutput();
  final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
  final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());

  Pipeline pipeline = Pipeline.create(options);

  /*
   * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
   * unbounded input source.
   */
  PCollection<String> input =
      pipeline
          /* Read from the GCS file. */
          .apply(TextIO.read().from(options.getInputFile()))
          // Concept #2: Add an element timestamp, using an artificial time just to show
          // windowing.
          // See AddTimestampFn for more detail on this.
          .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));

  /*
   * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
   * minute (you can change this with a command-line option). See the documentation for more
   * information on how fixed windows work, and for information on the other types of windowing
   * available (e.g., sliding windows).
   */
  PCollection<String> windowedWords =
      input.apply(
          Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));

  /*
   * Concept #4: Re-use our existing CountWords transform that does not have knowledge of
   * windows over a PCollection containing windowed values.
   */
  PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());

  /*
   * Concept #5: Format the results and write to a sharded file partitioned by window, using a
   * simple ParDo operation. Because there may be failures followed by retries, the
   * writes must be idempotent, but the details of writing to files is elided here.
   */
  wordCounts
      .apply(MapElements.via(new WordCount.FormatAsTextFn()))
      .apply(new WriteOneFilePerWindow(output, options.getNumShards()));

  PipelineResult result = pipeline.run();
  try {
    result.waitUntilFinish();
  } catch (Exception exc) {
    result.cancel();
  }
}
 
Example 14
Source File: WindowedWordCount.java    From beam with Apache License 2.0 4 votes vote down vote up
static void runWindowedWordCount(Options options) throws IOException {
  final String output = options.getOutput();
  final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
  final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());

  Pipeline pipeline = Pipeline.create(options);

  /*
   * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
   * unbounded input source.
   */
  PCollection<String> input =
      pipeline
          /* Read from the GCS file. */
          .apply(TextIO.read().from(options.getInputFile()))
          // Concept #2: Add an element timestamp, using an artificial time just to show
          // windowing.
          // See AddTimestampFn for more detail on this.
          .apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));

  /*
   * Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
   * minute (you can change this with a command-line option). See the documentation for more
   * information on how fixed windows work, and for information on the other types of windowing
   * available (e.g., sliding windows).
   */
  PCollection<String> windowedWords =
      input.apply(
          Window.into(FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));

  /*
   * Concept #4: Re-use our existing CountWords transform that does not have knowledge of
   * windows over a PCollection containing windowed values.
   */
  PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());

  /*
   * Concept #5: Format the results and write to a sharded file partitioned by window, using a
   * simple ParDo operation. Because there may be failures followed by retries, the
   * writes must be idempotent, but the details of writing to files is elided here.
   */
  wordCounts
      .apply(MapElements.via(new WordCount.FormatAsTextFn()))
      .apply(new WriteOneFilePerWindow(output, options.getNumShards()));

  PipelineResult result = pipeline.run();
  try {
    result.waitUntilFinish();
  } catch (Exception exc) {
    result.cancel();
  }
}