Java Code Examples for org.apache.beam.sdk.PipelineResult#State

The following examples show how to use org.apache.beam.sdk.PipelineResult#State . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/**
 * Main function for the MR BEAM program.
 *
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String inputFilePath = args[0];
  final String outputFilePath = args[1];
  final PipelineOptions options = NemoPipelineOptionsFactory.create();
  options.setJobName("WordCountTimeOut1Sec");

  final Pipeline p = generateWordCountPipeline(options, inputFilePath, outputFilePath);
  final PipelineResult pr = p.run();
  final PipelineResult.State running = pr.waitUntilFinish(org.joda.time.Duration.standardSeconds(1));
  try {
    final PipelineResult.State cancelled = pr.cancel();
  } catch (final IOException e) {
    LOG.info("IOException while cancelling job");
  }
}
 
Example 2
Source Project: beam   File: JobFailure.java    License: Apache License 2.0 6 votes vote down vote up
private static Optional<JobFailure> lookForInvalidState(PipelineResult.State state) {
  switch (state) {
    case RUNNING:
    case UNKNOWN:
      return of(new JobFailure("Job timeout.", true));

    case CANCELLED:
    case FAILED:
    case STOPPED:
    case UPDATED:
      return of(new JobFailure(format("Invalid job state: %s.", state.toString()), false));

    default:
      return empty();
  }
}
 
Example 3
Source Project: dbeam   File: BeamHelper.java    License: Apache License 2.0 5 votes vote down vote up
public static PipelineResult waitUntilDone(
    final PipelineResult result, final Duration exportTimeout) {
  // terminal state might be null, such as:
  // {{ @link org.apache.beam.runners.dataflow.DataflowPipelineJob.waitUntilFinish }}
  @Nullable
  final PipelineResult.State terminalState =
      result.waitUntilFinish(org.joda.time.Duration.millis(exportTimeout.toMillis()));
  if (terminalState == null || !terminalState.isTerminal()) {
    try {
      result.cancel();
    } catch (IOException e) {
      throw new Pipeline.PipelineExecutionException(
          new Exception(
              String.format(
                  "Job exceeded timeout of %s, but was not possible to cancel, "
                      + "finished with terminalState %s",
                  exportTimeout.toString(), terminalState),
              e));
    }
    throw new Pipeline.PipelineExecutionException(
        new Exception("Job cancelled after exceeding timeout " + exportTimeout.toString()));
  }
  if (!terminalState.equals(PipelineResult.State.DONE)) {
    throw new Pipeline.PipelineExecutionException(
        new Exception("Job finished with terminalState " + terminalState.toString()));
  }
  return result;
}
 
Example 4
Source Project: beam   File: NonMergingGroupByKeyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEnabledReIterationDoesNotThrowAnException() {
  final Pipeline p = FlinkTestPipeline.createForBatch();
  p.getOptions().as(FlinkPipelineOptions.class).setReIterableGroupByKeyResult(true);
  p.apply(Create.of(Arrays.asList(KV.of("a", 1), KV.of("b", 2), KV.of("c", 3))))
      .apply(GroupByKey.create())
      .apply(ParDo.of(new ReiterateDoFn<>()));
  final PipelineResult.State state = p.run().waitUntilFinish();
  Assert.assertEquals(PipelineResult.State.DONE, state);
}
 
Example 5
Source Project: beam   File: AvroTableProviderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadAndWriteAvroTable() {
  File destinationFile = new File(tempFolder.getRoot(), "person-info.avro");

  BeamSqlEnv env = BeamSqlEnv.inMemory(new AvroTableProvider());
  env.executeDdl(
      String.format(
          "CREATE EXTERNAL TABLE PersonInfo %s TYPE avro LOCATION '%s'",
          AVRO_FIELD_NAMES, destinationFile.getAbsolutePath()));

  BeamSqlRelUtils.toPCollection(
      writePipeline,
      env.parseQuery(
          "INSERT INTO PersonInfo VALUES ('Alan', 22, 'England'), ('John', 42, 'USA')"));

  writePipeline.run().waitUntilFinish();

  PCollection<Row> rows =
      BeamSqlRelUtils.toPCollection(
          readPipeline, env.parseQuery("SELECT age, country FROM PersonInfo where age > 25"));

  PAssert.that(rows)
      .containsInAnyOrder(Row.withSchema(OUTPUT_ROW_SCHEMA).addValues(42L, "USA").build());

  PipelineResult.State state = readPipeline.run().waitUntilFinish();
  assertEquals(state, State.DONE);
}
 
Example 6
Source Project: beam   File: DataStoreReadWriteIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDataStoreV1SqlWriteRead_withoutKey() {
  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new DataStoreV1TableProvider());
  String projectId = options.getProject();

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   `content` VARCHAR \n"
          + ") \n"
          + "TYPE 'datastoreV1' \n"
          + "LOCATION '"
          + projectId
          + "/"
          + KIND
          + "'";
  sqlEnv.executeDdl(createTableStatement);

  String insertStatement = "INSERT INTO TEST VALUES ( '3000' )";

  BeamSqlRelUtils.toPCollection(writePipeline, sqlEnv.parseQuery(insertStatement));
  writePipeline.run().waitUntilFinish();

  String selectTableStatement = "SELECT * FROM TEST";
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));

  assertThat(output.getSchema(), equalTo(SOURCE_SCHEMA_WITHOUT_KEY));

  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 7
Source Project: beam   File: JobFailure.java    License: Apache License 2.0 5 votes vote down vote up
private static Optional<JobFailure> lookForFailure(
    PipelineResult pipelineResult, List<NamedTestResult> testResults) {
  PipelineResult.State state = pipelineResult.getState();

  Optional<JobFailure> stateRelatedFailure = lookForInvalidState(state);

  if (stateRelatedFailure.isPresent()) {
    return stateRelatedFailure;
  } else {
    return lookForMetricResultFailure(testResults);
  }
}
 
Example 8
Source Project: beam   File: KafkaIOIT.java    License: Apache License 2.0 5 votes vote down vote up
private void cancelIfTimeouted(PipelineResult readResult, PipelineResult.State readState)
    throws IOException {

  // TODO(lgajowy) this solution works for dataflow only - it returns null when
  //  waitUntilFinish(Duration duration) exceeds provided duration.
  if (readState == null) {
    readResult.cancel();
  }
}
 
Example 9
Source Project: beam   File: MetricsPusher.java    License: Apache License 2.0 5 votes vote down vote up
private void run() {
  pushMetrics();
  if (pipelineResult != null) {
    PipelineResult.State pipelineState = pipelineResult.getState();
    if (pipelineState.isTerminal()) {
      tearDown();
    }
  }
}
 
Example 10
Source Project: beam   File: BigQueryReadWriteIT.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSQLRead_withExport() throws IOException {
  bigQueryTestingTypes.insertRows(
      SOURCE_SCHEMA_TWO,
      row(
          SOURCE_SCHEMA_TWO,
          9223372036854775807L,
          (byte) 127,
          (short) 32767,
          2147483647,
          (float) 1.0,
          1.0,
          true,
          parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"),
          "varchar",
          "char",
          Arrays.asList("123", "456")));

  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   c_bigint BIGINT, \n"
          + "   c_tinyint TINYINT, \n"
          + "   c_smallint SMALLINT, \n"
          + "   c_integer INTEGER, \n"
          + "   c_float FLOAT, \n"
          + "   c_double DOUBLE, \n"
          + "   c_boolean BOOLEAN, \n"
          + "   c_timestamp TIMESTAMP, \n"
          + "   c_varchar VARCHAR, \n "
          + "   c_char CHAR, \n"
          + "   c_arr ARRAY<VARCHAR> \n"
          + ") \n"
          + "TYPE 'bigquery' \n"
          + "LOCATION '"
          + bigQueryTestingTypes.tableSpec()
          + "'"
          + "TBLPROPERTIES "
          + "'{ "
          + METHOD_PROPERTY
          + ": \""
          + Method.EXPORT.toString()
          + "\" }'";
  sqlEnv.executeDdl(createTableStatement);

  String selectTableStatement = "SELECT * FROM TEST";
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));

  PAssert.that(output)
      .containsInAnyOrder(
          row(
              SOURCE_SCHEMA_TWO,
              9223372036854775807L,
              (byte) 127,
              (short) 32767,
              2147483647,
              (float) 1.0,
              1.0,
              true,
              parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"),
              "varchar",
              "char",
              Arrays.asList("123", "456")));
  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 11
Source Project: beam   File: BigQueryReadWriteIT.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSQLWriteAndRead() {
  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   c_bigint BIGINT, \n"
          + "   c_tinyint TINYINT, \n"
          + "   c_smallint SMALLINT, \n"
          + "   c_integer INTEGER, \n"
          + "   c_float FLOAT, \n"
          + "   c_double DOUBLE, \n"
          + "   c_boolean BOOLEAN, \n"
          + "   c_timestamp TIMESTAMP, \n"
          + "   c_varchar VARCHAR, \n "
          + "   c_char CHAR, \n"
          + "   c_arr ARRAY<VARCHAR> \n"
          + ") \n"
          + "TYPE 'bigquery' \n"
          + "LOCATION '"
          + bigQueryTestingTypes.tableSpec()
          + "'";
  sqlEnv.executeDdl(createTableStatement);

  String insertStatement =
      "INSERT INTO TEST VALUES ("
          + "9223372036854775807, "
          + "127, "
          + "32767, "
          + "2147483647, "
          + "1.0, "
          + "1.0, "
          + "TRUE, "
          + "TIMESTAMP '2018-05-28 20:17:40.123', "
          + "'varchar', "
          + "'char', "
          + "ARRAY['123', '456']"
          + ")";

  sqlEnv.parseQuery(insertStatement);
  BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
  pipeline.run().waitUntilFinish(Duration.standardMinutes(5));

  String selectTableStatement = "SELECT * FROM TEST";
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));

  PAssert.that(output)
      .containsInAnyOrder(
          row(
              SOURCE_SCHEMA_TWO,
              9223372036854775807L,
              (byte) 127,
              (short) 32767,
              2147483647,
              (float) 1.0,
              1.0,
              true,
              parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"),
              "varchar",
              "char",
              Arrays.asList("123", "456")));
  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 12
Source Project: beam   File: BigQueryReadWriteIT.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSQLWriteAndRead_withExport() {
  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   c_bigint BIGINT, \n"
          + "   c_tinyint TINYINT, \n"
          + "   c_smallint SMALLINT, \n"
          + "   c_integer INTEGER, \n"
          + "   c_float FLOAT, \n"
          + "   c_double DOUBLE, \n"
          + "   c_boolean BOOLEAN, \n"
          + "   c_timestamp TIMESTAMP, \n"
          + "   c_varchar VARCHAR, \n "
          + "   c_char CHAR, \n"
          + "   c_arr ARRAY<VARCHAR> \n"
          + ") \n"
          + "TYPE 'bigquery' \n"
          + "LOCATION '"
          + bigQueryTestingTypes.tableSpec()
          + "' \n"
          + "TBLPROPERTIES "
          + "'{ "
          + METHOD_PROPERTY
          + ": \""
          + Method.EXPORT.toString()
          + "\" }'";
  sqlEnv.executeDdl(createTableStatement);

  String insertStatement =
      "INSERT INTO TEST VALUES ("
          + "9223372036854775807, "
          + "127, "
          + "32767, "
          + "2147483647, "
          + "1.0, "
          + "1.0, "
          + "TRUE, "
          + "TIMESTAMP '2018-05-28 20:17:40.123', "
          + "'varchar', "
          + "'char', "
          + "ARRAY['123', '456']"
          + ")";

  sqlEnv.parseQuery(insertStatement);
  BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
  pipeline.run().waitUntilFinish(Duration.standardMinutes(5));

  String selectTableStatement = "SELECT * FROM TEST";
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));

  PAssert.that(output)
      .containsInAnyOrder(
          row(
              SOURCE_SCHEMA_TWO,
              9223372036854775807L,
              (byte) 127,
              (short) 32767,
              2147483647,
              (float) 1.0,
              1.0,
              true,
              parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"),
              "varchar",
              "char",
              Arrays.asList("123", "456")));
  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 13
Source Project: beam   File: BigQueryReadWriteIT.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSQLWriteAndRead_withDirectRead() {
  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   c_bigint BIGINT, \n"
          + "   c_tinyint TINYINT, \n"
          + "   c_smallint SMALLINT, \n"
          + "   c_integer INTEGER, \n"
          + "   c_float FLOAT, \n"
          + "   c_double DOUBLE, \n"
          + "   c_boolean BOOLEAN, \n"
          + "   c_timestamp TIMESTAMP, \n"
          + "   c_varchar VARCHAR, \n "
          + "   c_char CHAR, \n"
          + "   c_arr ARRAY<VARCHAR> \n"
          + ") \n"
          + "TYPE 'bigquery' \n"
          + "LOCATION '"
          + bigQueryTestingTypes.tableSpec()
          + "' \n"
          + "TBLPROPERTIES "
          + "'{ "
          + METHOD_PROPERTY
          + ": \""
          + Method.DIRECT_READ.toString()
          + "\" }'";
  sqlEnv.executeDdl(createTableStatement);

  String insertStatement =
      "INSERT INTO TEST VALUES ("
          + "9223372036854775807, "
          + "127, "
          + "32767, "
          + "2147483647, "
          + "1.0, "
          + "1.0, "
          + "TRUE, "
          + "TIMESTAMP '2018-05-28 20:17:40.123', "
          + "'varchar', "
          + "'char', "
          + "ARRAY['123', '456']"
          + ")";

  sqlEnv.parseQuery(insertStatement);
  BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
  pipeline.run().waitUntilFinish(Duration.standardMinutes(5));

  String selectTableStatement = "SELECT * FROM TEST";
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));

  PAssert.that(output)
      .containsInAnyOrder(
          row(
              SOURCE_SCHEMA_TWO,
              9223372036854775807L,
              (byte) 127,
              (short) 32767,
              2147483647,
              (float) 1.0,
              1.0,
              true,
              parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"),
              "varchar",
              "char",
              Arrays.asList("123", "456")));
  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 14
Source Project: beam   File: BigQueryReadWriteIT.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testSQLRead_withDirectRead_withProjectPushDown() {
  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   c_bigint BIGINT, \n"
          + "   c_tinyint TINYINT, \n"
          + "   c_smallint SMALLINT, \n"
          + "   c_integer INTEGER, \n"
          + "   c_float FLOAT, \n"
          + "   c_double DOUBLE, \n"
          + "   c_boolean BOOLEAN, \n"
          + "   c_timestamp TIMESTAMP, \n"
          + "   c_varchar VARCHAR, \n "
          + "   c_char CHAR, \n"
          + "   c_arr ARRAY<VARCHAR> \n"
          + ") \n"
          + "TYPE 'bigquery' \n"
          + "LOCATION '"
          + bigQueryTestingTypes.tableSpec()
          + "' \n"
          + "TBLPROPERTIES "
          + "'{ "
          + METHOD_PROPERTY
          + ": \""
          + Method.DIRECT_READ.toString()
          + "\" }'";
  sqlEnv.executeDdl(createTableStatement);

  String insertStatement =
      "INSERT INTO TEST VALUES ("
          + "9223372036854775807, "
          + "127, "
          + "32767, "
          + "2147483647, "
          + "1.0, "
          + "1.0, "
          + "TRUE, "
          + "TIMESTAMP '2018-05-28 20:17:40.123', "
          + "'varchar', "
          + "'char', "
          + "ARRAY['123', '456']"
          + ")";

  sqlEnv.parseQuery(insertStatement);
  BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
  pipeline.run().waitUntilFinish(Duration.standardMinutes(5));

  String selectTableStatement = "SELECT c_integer, c_varchar, c_tinyint FROM TEST";
  BeamRelNode relNode = sqlEnv.parseQuery(selectTableStatement);
  PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, relNode);

  // Calc is not dropped because BigQuery does not support field reordering yet.
  assertThat(relNode, instanceOf(BeamCalcRel.class));
  assertThat(relNode.getInput(0), instanceOf(BeamPushDownIOSourceRel.class));
  // IO projects fields in the same order they are defined in the schema.
  assertThat(
      relNode.getInput(0).getRowType().getFieldNames(),
      containsInAnyOrder("c_tinyint", "c_integer", "c_varchar"));
  // Field reordering is done in a Calc
  assertThat(
      output.getSchema(),
      equalTo(
          Schema.builder()
              .addNullableField("c_integer", INT32)
              .addNullableField("c_varchar", STRING)
              .addNullableField("c_tinyint", BYTE)
              .build()));

  PAssert.that(output)
      .containsInAnyOrder(row(output.getSchema(), 2147483647, "varchar", (byte) 127));
  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 15
Source Project: beam   File: SparkPipelineResult.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public PipelineResult.State cancel() throws IOException {
  offerNewState(PipelineResult.State.CANCELLED);
  return state;
}
 
Example 16
@Override
public PipelineResult.State cancel() throws IOException {
  offerNewState(PipelineResult.State.CANCELLED);
  return state;
}
 
Example 17
Source Project: beam   File: DataStoreReadWriteIT.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testReadAllSupportedTypes() {
  BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new DataStoreV1TableProvider());
  String projectId = options.getProject();

  final Schema expectedSchema =
      Schema.builder()
          .addNullableField("__key__", VARBINARY)
          .addNullableField("boolean", BOOLEAN)
          .addNullableField("datetime", DATETIME)
          // TODO: flattening of nested fields by Calcite causes some issues.
          /*.addRowField("embeddedentity",
          Schema.builder()
              .addNullableField("property1", STRING)
              .addNullableField("property2", INT64)
              .build())*/
          .addNullableField("floatingnumber", DOUBLE)
          .addNullableField("integer", INT64)
          .addNullableField("primitivearray", FieldType.array(STRING))
          .addNullableField("string", STRING)
          .addNullableField("text", STRING)
          .build();

  String createTableStatement =
      "CREATE EXTERNAL TABLE TEST( \n"
          + "   `__key__` VARBINARY, \n"
          + "   `boolean` BOOLEAN, \n"
          + "   `datetime` TIMESTAMP, \n"
          // + "   `embeddedentity` ROW(`property1` VARCHAR, `property2` BIGINT), \n"
          + "   `floatingnumber` DOUBLE, \n"
          + "   `integer` BIGINT, \n"
          + "   `primitivearray` ARRAY<VARCHAR>, \n"
          + "   `string` VARCHAR, \n"
          + "   `text` VARCHAR"
          + ") \n"
          + "TYPE 'datastoreV1' \n"
          + "LOCATION '"
          + projectId
          + "/"
          + KIND_ALL_TYPES
          + "'";
  sqlEnv.executeDdl(createTableStatement);

  String selectTableStatement = "SELECT * FROM TEST";
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));

  assertThat(output.getSchema(), equalTo(expectedSchema));

  PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
  assertThat(state, equalTo(State.DONE));
}
 
Example 18
Source Project: beam   File: SparkPipelineResult.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public PipelineResult.State getState() {
  return state;
}
 
Example 19
@Override
public PipelineResult.State getState() {
  return state;
}
 
Example 20
@Override
public PipelineResult.State waitUntilFinish() {
  return waitUntilFinish(Duration.millis(Long.MAX_VALUE));
}