org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method Java Examples

The following examples show how to use org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteToTableDecorator() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));
  p.apply(Create.of(row1, row2))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id$20171127")
              .withTestServices(fakeBqServices)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(schema)
              .withoutValidation());
  p.run();
}
 
Example #2
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaWriteStreams() throws Exception {
  p.apply(
          Create.of(
              new SchemaPojo("a", 1),
              new SchemaPojo("b", 2),
              new SchemaPojo("c", 3),
              new SchemaPojo("d", 4)))
      .apply(
          BigQueryIO.<SchemaPojo>write()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(Method.STREAMING_INSERTS)
              .useBeamSchema()
              .withTestServices(fakeBqServices)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(
          new TableRow().set("name", "a").set("number", "1"),
          new TableRow().set("name", "b").set("number", "2"),
          new TableRow().set("name", "c").set("number", "3"),
          new TableRow().set("name", "d").set("number", "4")));
}
 
Example #3
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchemaWriteLoads() throws Exception {
  p.apply(
          Create.of(
              new SchemaPojo("a", 1),
              new SchemaPojo("b", 2),
              new SchemaPojo("c", 3),
              new SchemaPojo("d", 4)))
      .apply(
          BigQueryIO.<SchemaPojo>write()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(Method.FILE_LOADS)
              .useBeamSchema()
              .withTestServices(fakeBqServices)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(
          new TableRow().set("name", "a").set("number", "1"),
          new TableRow().set("name", "b").set("number", "2"),
          new TableRow().set("name", "c").set("number", "3"),
          new TableRow().set("name", "d").set("number", "4")));
}
 
Example #4
Source File: DataCatalogBigQueryIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRead() throws Exception {
  TableReference bqTable = bigQuery.tableReference();

  // Streaming inserts do not work with DIRECT_READ mode, there is a several hour lag.
  PCollection<Row> data =
      writePipeline.apply(Create.of(row(1, "name1"), row(2, "name2"), row(3, "name3")));
  data.apply(
      BigQueryIO.<Row>write()
          .withSchema(BigQueryUtils.toTableSchema(ID_NAME_SCHEMA))
          .withFormatFunction(BigQueryUtils.toTableRow())
          .withMethod(Method.FILE_LOADS)
          .to(bqTable));
  writePipeline.run().waitUntilFinish(Duration.standardMinutes(2));

  String tableId =
      String.format(
          "bigquery.`table`.`%s`.`%s`.`%s`",
          bqTable.getProjectId(), bqTable.getDatasetId(), bqTable.getTableId());

  readPipeline
      .getOptions()
      .as(BeamSqlPipelineOptions.class)
      .setPlannerName(queryPlanner.getCanonicalName());

  try (DataCatalogTableProvider tableProvider =
      DataCatalogTableProvider.create(
          readPipeline.getOptions().as(DataCatalogPipelineOptions.class))) {
    PCollection<Row> result =
        readPipeline.apply(
            "query",
            SqlTransform.query("SELECT id, name FROM " + tableId)
                .withDefaultTableProvider("datacatalog", tableProvider));

    PAssert.that(result).containsInAnyOrder(row(1, "name1"), row(2, "name2"), row(3, "name3"));
    readPipeline.run().waitUntilFinish(Duration.standardMinutes(2));
  }
}
 
Example #5
Source File: BigQueryKmsKeyIT.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests query job and table creation with KMS key settings.
 *
 * <p>Verifies table creation with KMS key.
 */
private void testQueryAndWrite(Method method) throws Exception {
  String outputTableId = "testQueryAndWrite_" + method.name();
  String outputTableSpec = project + ":" + BIG_QUERY_DATASET_ID + "." + outputTableId;

  options.setTempLocation(options.getTempRoot() + "/bq_it_temp");
  Pipeline p = Pipeline.create(options);
  // Reading triggers BQ query and extract jobs. Writing triggers either a load job or performs a
  // streaming insert (depending on method).
  p.apply(
          BigQueryIO.readTableRows()
              .fromQuery("SELECT * FROM (SELECT \"foo\" as fruit)")
              .withKmsKey(kmsKey))
      .apply(
          BigQueryIO.writeTableRows()
              .to(outputTableSpec)
              .withSchema(OUTPUT_SCHEMA)
              .withMethod(method)
              .withKmsKey(kmsKey));
  p.run().waitUntilFinish();

  Table table = BQ_CLIENT.getTableResource(project, BIG_QUERY_DATASET_ID, outputTableId);
  assertNotNull(String.format("table not found: %s", outputTableId), table);
  assertNotNull(
      "output table has no EncryptionConfiguration", table.getEncryptionConfiguration());
  assertEquals(table.getEncryptionConfiguration().getKmsKeyName(), kmsKey);
}
 
Example #6
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
void schemaUpdateOptionsTest(
    BigQueryIO.Write.Method insertMethod, Set<SchemaUpdateOption> schemaUpdateOptions)
    throws Exception {
  TableRow row = new TableRow().set("date", "2019-01-01").set("number", "1");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));

  Write<TableRow> writeTransform =
      BigQueryIO.writeTableRows()
          .to("project-id:dataset-id.table-id")
          .withTestServices(fakeBqServices)
          .withMethod(insertMethod)
          .withSchema(schema)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
          .withSchemaUpdateOptions(schemaUpdateOptions);

  p.apply(Create.<TableRow>of(row)).apply(writeTransform);
  p.run();

  List<String> expectedOptions =
      schemaUpdateOptions.stream().map(Enum::name).collect(Collectors.toList());

  for (Job job : fakeJobService.getAllJobs()) {
    JobConfigurationLoad configuration = job.getConfiguration().getLoad();
    assertEquals(expectedOptions, configuration.getSchemaUpdateOptions());
  }
}
 
Example #7
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteValidateFailsWithAvroFormatAndStreamingInserts() {
  p.enableAbandonedNodeEnforcement(false);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Writing avro formatted data is only supported for FILE_LOADS");
  p.apply(Create.empty(INPUT_RECORD_CODER))
      .apply(
          BigQueryIO.<InputRecord>write()
              .to("dataset.table")
              .withSchema(new TableSchema())
              .withAvroFormatFunction(r -> new GenericData.Record(r.getSchema()))
              .withMethod(Method.STREAMING_INSERTS)
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));
}
 
Example #8
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteWithoutInsertId() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", 1);
  TableRow row2 = new TableRow().set("name", "b").set("number", 2);
  TableRow row3 = new TableRow().set("name", "c").set("number", 3);
  p.apply(Create.of(row1, row2, row3).withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("name").setType("STRING"),
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .ignoreInsertIds()
              .withoutValidation());
  p.run();
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row2, row3));
  // Verify no insert id is added.
  assertThat(
      fakeDatasetService.getAllIds("project-id", "dataset-id", "table-id"), containsInAnyOrder());
}
 
Example #9
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testFailuresNoRetryPolicy() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");
  TableRow row3 = new TableRow().set("name", "c").set("number", "3");

  TableDataInsertAllResponse.InsertErrors ephemeralError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));

  fakeDatasetService.failOnInsert(
      ImmutableMap.of(
          row1, ImmutableList.of(ephemeralError, ephemeralError),
          row2, ImmutableList.of(ephemeralError, ephemeralError)));

  p.apply(Create.of(row1, row2, row3))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("name").setType("STRING"),
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row2, row3));
}
 
Example #10
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTriggeredFileLoads() throws Exception {
  List<TableRow> elements = Lists.newArrayList();
  for (int i = 0; i < 30; ++i) {
    elements.add(new TableRow().set("number", i));
  }

  TestStream<TableRow> testStream =
      TestStream.create(TableRowJsonCoder.of())
          .addElements(
              elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class))
          .advanceWatermarkToInfinity();

  p.apply(testStream)
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .withTriggeringFrequency(Duration.standardSeconds(30))
              .withNumFileShards(2)
              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
 
Example #11
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testClusteringTableFunction() throws Exception {
  TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
  TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");

  TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
  Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));
  p.apply(Create.of(row1, row2))
      .apply(
          BigQueryIO.writeTableRows()
              .to(
                  (ValueInSingleWindow<TableRow> vsw) -> {
                    String tableSpec =
                        "project-id:dataset-id.table-" + vsw.getValue().get("number");
                    return new TableDestination(
                        tableSpec,
                        null,
                        new TimePartitioning().setType("DAY").setField("date"),
                        new Clustering().setFields(ImmutableList.of("date")));
                  })
              .withTestServices(fakeBqServices)
              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
              .withSchema(schema)
              .withClustering()
              .withoutValidation());
  p.run();
  Table table =
      fakeDatasetService.getTable(
          BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-1"));
  assertEquals(schema, table.getSchema());
  assertEquals(timePartitioning, table.getTimePartitioning());
  assertEquals(clustering, table.getClustering());
}
 
Example #12
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteFileSchemaUpdateOptionAll() throws Exception {
  Set<SchemaUpdateOption> options = EnumSet.allOf(SchemaUpdateOption.class);
  schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options);
}
 
Example #13
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteFileSchemaUpdateOptionAllowFieldRelaxation() throws Exception {
  Set<SchemaUpdateOption> options = EnumSet.of(SchemaUpdateOption.ALLOW_FIELD_RELAXATION);
  schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options);
}
 
Example #14
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteFileSchemaUpdateOptionAllowFieldAddition() throws Exception {
  Set<SchemaUpdateOption> options = EnumSet.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION);
  schemaUpdateOptionsTest(BigQueryIO.Write.Method.FILE_LOADS, options);
}
 
Example #15
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testExtendedErrorRetrieval() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");
  TableRow row3 = new TableRow().set("name", "c").set("number", "3");
  String tableSpec = "project-id:dataset-id.table-id";

  TableDataInsertAllResponse.InsertErrors ephemeralError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
  TableDataInsertAllResponse.InsertErrors persistentError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(Lists.newArrayList(new ErrorProto().setReason("invalidQuery")));

  fakeDatasetService.failOnInsert(
      ImmutableMap.of(
          row1, ImmutableList.of(ephemeralError, ephemeralError),
          row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));

  PCollection<BigQueryInsertError> failedRows =
      p.apply(Create.of(row1, row2, row3))
          .apply(
              BigQueryIO.writeTableRows()
                  .to(tableSpec)
                  .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                  .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
                  .withSchema(
                      new TableSchema()
                          .setFields(
                              ImmutableList.of(
                                  new TableFieldSchema().setName("name").setType("STRING"),
                                  new TableFieldSchema().setName("number").setType("INTEGER"))))
                  .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
                  .withTestServices(fakeBqServices)
                  .withoutValidation()
                  .withExtendedErrorInfo())
          .getFailedInsertsWithErr();

  // row2 finally fails with a non-retryable error, so we expect to see it in the collection of
  // failed rows.
  PAssert.that(failedRows)
      .containsInAnyOrder(
          new BigQueryInsertError(
              row2, persistentError, BigQueryHelpers.parseTableSpec(tableSpec)));
  p.run();

  // Only row1 and row3 were successfully inserted.
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row3));
}
 
Example #16
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testRetryPolicy() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");
  TableRow row3 = new TableRow().set("name", "c").set("number", "3");

  TableDataInsertAllResponse.InsertErrors ephemeralError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")));
  TableDataInsertAllResponse.InsertErrors persistentError =
      new TableDataInsertAllResponse.InsertErrors()
          .setErrors(ImmutableList.of(new ErrorProto().setReason("invalidQuery")));

  fakeDatasetService.failOnInsert(
      ImmutableMap.of(
          row1, ImmutableList.of(ephemeralError, ephemeralError),
          row2, ImmutableList.of(ephemeralError, ephemeralError, persistentError)));

  PCollection<TableRow> failedRows =
      p.apply(Create.of(row1, row2, row3))
          .apply(
              BigQueryIO.writeTableRows()
                  .to("project-id:dataset-id.table-id")
                  .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                  .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
                  .withSchema(
                      new TableSchema()
                          .setFields(
                              ImmutableList.of(
                                  new TableFieldSchema().setName("name").setType("STRING"),
                                  new TableFieldSchema().setName("number").setType("INTEGER"))))
                  .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
                  .withTestServices(fakeBqServices)
                  .withoutValidation())
          .getFailedInserts();
  // row2 finally fails with a non-retryable error, so we expect to see it in the collection of
  // failed rows.
  PAssert.that(failedRows).containsInAnyOrder(row2);
  p.run();

  // Only row1 and row3 were successfully inserted.
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row3));
}
 
Example #17
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testTriggeredFileLoadsWithTempTables() throws Exception {
  List<TableRow> elements = Lists.newArrayList();
  for (int i = 0; i < 30; ++i) {
    elements.add(new TableRow().set("number", i));
  }

  TestStream<TableRow> testStream =
      TestStream.create(TableRowJsonCoder.of())
          .addElements(
              elements.get(0), Iterables.toArray(elements.subList(1, 10), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(10), Iterables.toArray(elements.subList(11, 20), TableRow.class))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              elements.get(20), Iterables.toArray(elements.subList(21, 30), TableRow.class))
          .advanceWatermarkToInfinity();

  p.apply(testStream)
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .withTriggeringFrequency(Duration.standardSeconds(30))
              .withNumFileShards(2)
              .withMaxBytesPerPartition(1)
              .withMaxFilesPerPartition(1)
              .withMethod(BigQueryIO.Write.Method.FILE_LOADS)
              .withoutValidation());
  p.run();

  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(Iterables.toArray(elements, TableRow.class)));
}
 
Example #18
Source File: PubsubAvroToBigQuery.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
/**
 * Runs the pipeline with the supplied options.
 *
 * @param options execution parameters to the pipeline
 * @return result of the pipeline execution as a {@link PipelineResult}
 */
private static PipelineResult run(PubsubAvroToBigQueryOptions options) {

  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  Schema schema = SchemaUtils.getAvroSchema(options.getSchemaPath());

  WriteResult writeResults =
      pipeline
          .apply(
              "Read Avro records",
              PubsubIO
                  .readAvroGenericRecords(schema)
                  .fromSubscription(options.getInputSubscription()))

          .apply(
              "Write to BigQuery",
              BigQueryIO.<GenericRecord>write()
                  .to(options.getOutputTableSpec())
                  .useBeamSchema()
                  .withMethod(Method.STREAMING_INSERTS)
                  .withWriteDisposition(WriteDisposition.valueOf(options.getWriteDisposition()))
                  .withCreateDisposition(
                      CreateDisposition.valueOf(options.getCreateDisposition()))
                  .withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors())
                  .withExtendedErrorInfo());

  writeResults
      .getFailedInsertsWithErr()
      .apply(
          "Create error payload",
          ErrorConverters.BigQueryInsertErrorToPubsubMessage.<GenericRecord>newBuilder()
              .setPayloadCoder(AvroCoder.of(schema))
              .setTranslateFunction(
                  BigQueryConverters.TableRowToGenericRecordFn.of(schema))
              .build())
      .apply(
          "Write failed records",
          PubsubIO.writeMessages().to(options.getOutputTopic()));

  // Execute the pipeline and return the result.
  return pipeline.run();
}
 
Example #19
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testClusteringThrowsWithoutPartitioning() throws Exception {
  p.enableAbandonedNodeEnforcement(false);
  testTimePartitioningClustering(Method.STREAMING_INSERTS, false, true);
}
 
Example #20
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testClusteringBatchLoads() throws Exception {
  testClustering(BigQueryIO.Write.Method.FILE_LOADS);
}
 
Example #21
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testClusteringStreamingInserts() throws Exception {
  testClustering(BigQueryIO.Write.Method.STREAMING_INSERTS);
}
 
Example #22
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testTimePartitioningBatchLoads() throws Exception {
  testTimePartitioning(BigQueryIO.Write.Method.FILE_LOADS);
}
 
Example #23
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testTimePartitioningStreamingInserts() throws Exception {
  testTimePartitioning(BigQueryIO.Write.Method.STREAMING_INSERTS);
}
 
Example #24
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
void testClustering(BigQueryIO.Write.Method insertMethod) throws Exception {
  testTimePartitioningClustering(insertMethod, true, true);
}
 
Example #25
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
void testTimePartitioning(BigQueryIO.Write.Method insertMethod) throws Exception {
  testTimePartitioningClustering(insertMethod, true, false);
}
 
Example #26
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
void testTimePartitioningClustering(
    BigQueryIO.Write.Method insertMethod, boolean enablePartitioning, boolean enableClustering)
    throws Exception {
  TableRow row1 = new TableRow().set("date", "2018-01-01").set("number", "1");
  TableRow row2 = new TableRow().set("date", "2018-01-02").set("number", "2");

  TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setField("date");
  Clustering clustering = new Clustering().setFields(ImmutableList.of("date"));
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));

  Write<TableRow> writeTransform =
      BigQueryIO.writeTableRows()
          .to("project-id:dataset-id.table-id")
          .withTestServices(fakeBqServices)
          .withMethod(insertMethod)
          .withSchema(schema)
          .withoutValidation();

  if (enablePartitioning) {
    writeTransform = writeTransform.withTimePartitioning(timePartitioning);
  }
  if (enableClustering) {
    writeTransform = writeTransform.withClustering(clustering);
  }

  p.apply(Create.of(row1, row2)).apply(writeTransform);
  p.run();
  Table table =
      fakeDatasetService.getTable(
          BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id"));

  assertEquals(schema, table.getSchema());
  if (enablePartitioning) {
    assertEquals(timePartitioning, table.getTimePartitioning());
  }
  if (enableClustering) {
    assertEquals(clustering, table.getClustering());
  }
}
 
Example #27
Source File: BigQueryKmsKeyIT.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithStreamingInserts() throws Exception {
  testQueryAndWrite(Method.STREAMING_INSERTS);
}
 
Example #28
Source File: BigQueryKmsKeyIT.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithFileLoads() throws Exception {
  testQueryAndWrite(Method.FILE_LOADS);
}
 
Example #29
Source File: BigQueryChangeApplier.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@Override
public PDone expand(PCollection<Row> input) {
  Pipeline p = input.getPipeline();
  Schema inputCollectionSchema = input.getSchema();

  PCollection<KV<String, KV<Schema, Schema>>> tableSchemaCollection =
      buildTableSchemaCollection(input);
  PCollectionView<Map<String, KV<Schema, Schema>>> schemaMapView = tableSchemaCollection
      .apply(View.asMap());

  PCollection<TableRow> updatesToWrite = formatIntoTableRows(input);

  updatesToWrite.apply(
      BigQueryIO.writeTableRows()
          .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(WriteDisposition.WRITE_APPEND)
          .withMethod(Method.STREAMING_INSERTS)
      .to(new ChangelogTableDynamicDestinations(changeLogDataset, gcpProjectId, schemaMapView)));

  String jobPrefix =
      String.format(
          "beam_cdc_%s_%s_", gcpProjectId.replace(':', '_').replace('.', '_'), replicaDataset);

  // If the input collection does not have a primary key field, then we do not need to issue
  // periodic merge requests.
  if (inputCollectionSchema.hasField(DataflowCdcRowFormat.PRIMARY_KEY)) {
    p.apply("MergeHeartbeat",
        GenerateSequence
            .from(0)
            .withRate(1, Duration.standardSeconds(updateFrequencySeconds)))
        .apply("KeyByTable", ParDo.of(new KeySchemasByTableFn(schemaMapView))
            .withSideInputs(schemaMapView))
        .apply("BuildMergeStatements",
            ParDo.of(
                new MergeStatementBuildingFn(changeLogDataset, replicaDataset, gcpProjectId)))
        .setCoder(SerializableCoder.of(
            TypeDescriptors.kvs(
                TypeDescriptors.strings(),
                TypeDescriptor.of(BigQueryAction.class))))
        .apply("IssueMergeStatements",
            ParDo.of(new BigQueryStatementIssuingFn(jobPrefix)));
  }
  return PDone.in(p);
}