org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method Java Examples

The following examples show how to use org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildTableBasedSourceWithReadOptions() {
  TableReadOptions readOptions =
      TableReadOptions.newBuilder()
          .addSelectedFields("field1")
          .addSelectedFields("field2")
          .setRowRestriction("int_field > 5")
          .build();
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .withReadOptions(readOptions);
  checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table");
  assertEquals(typedRead.getReadOptions(), readOptions);
}
 
Example #2
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void checkTypedReadTableObject(
    TypedRead typedRead, String project, String dataset, String table) {
  assertEquals(project, typedRead.getTable().getProjectId());
  assertEquals(dataset, typedRead.getTable().getDatasetId());
  assertEquals(table, typedRead.getTable().getTableId());
  assertNull(typedRead.getQuery());
  assertEquals(Method.DIRECT_READ, typedRead.getMethod());
}
 
Example #3
Source File: BigQueryIOPushDownIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void readUsingDirectReadMethodPushDown() {
  sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));

  BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
          .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));

  PipelineResult result = pipeline.run();
  result.waitUntilFinish();
  collectAndPublishMetrics(result, "_directread_pushdown");
}
 
Example #4
Source File: BigQueryIOPushDownIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void readUsingDirectReadMethod() {
  List<RelOptRule> ruleList = new ArrayList<>();
  for (RuleSet x : getRuleSets()) {
    x.iterator().forEachRemaining(ruleList::add);
  }
  // Remove push-down rule
  ruleList.remove(BeamIOPushDownRule.INSTANCE);

  InMemoryMetaStore inMemoryMetaStore = new InMemoryMetaStore();
  inMemoryMetaStore.registerProvider(
      new BigQueryPerfTableProvider(NAMESPACE, FIELDS_READ_METRIC));
  sqlEnv =
      BeamSqlEnv.builder(inMemoryMetaStore)
          .setPipelineOptions(PipelineOptionsFactory.create())
          .setRuleSets(new RuleSet[] {RuleSets.ofList(ruleList)})
          .build();
  sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString()));

  BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
          .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));

  PipelineResult result = pipeline.run();
  result.waitUntilFinish();
  collectAndPublishMetrics(result, "_directread");
}
 
Example #5
Source File: BigQueryIOPushDownIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void readUsingDefaultMethod() {
  sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DEFAULT.toString()));

  BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT);
  PCollection<Row> output =
      BeamSqlRelUtils.toPCollection(pipeline, beamRelNode)
          .apply(ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC)));

  PipelineResult result = pipeline.run();
  result.waitUntilFinish();
  collectAndPublishMetrics(result, "_default");
}
 
Example #6
Source File: BigQueryIO.java    From beam with Apache License 2.0 5 votes vote down vote up
private Method resolveMethod(PCollection<T> input) {
  if (getMethod() != Method.DEFAULT) {
    return getMethod();
  }
  // By default, when writing an Unbounded PCollection, we use StreamingInserts and
  // BigQuery's streaming import API.
  return (input.isBounded() == IsBounded.UNBOUNDED)
      ? Method.STREAMING_INSERTS
      : Method.FILE_LOADS;
}
 
Example #7
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildTableBasedSource() {
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table");
  checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table");
  assertTrue(typedRead.getValidate());
}
 
Example #8
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildTableBasedSourceWithoutValidation() {
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .withoutValidation();
  checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table");
  assertFalse(typedRead.getValidate());
}
 
Example #9
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildTableBasedSourceWithDefaultProject() {
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("myDataset.myTable");
  checkTypedReadTableObject(typedRead, null, "myDataset", "myTable");
}
 
Example #10
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildTableBasedSourceWithTableReference() {
  TableReference tableReference =
      new TableReference()
          .setProjectId("foo.com:project")
          .setDatasetId("dataset")
          .setTableId("table");
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from(tableReference);
  checkTypedReadTableObject(typedRead, "foo.com:project", "dataset", "table");
}
 
Example #11
Source File: BigQueryConverters.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<TableRow> expand(PBegin pipeline) {

  if (options().getQuery() == null) {
    LOG.info("No query provided, reading directly from: " + options().getInputTableSpec());
    return pipeline.apply(
        "ReadFromBigQuery",
        BigQueryIO.readTableRows()
            .from(options().getInputTableSpec())
            .withTemplateCompatibility()
            .withMethod(Method.DIRECT_READ)
            .withCoder(TableRowJsonCoder.of()));

  } else {
    LOG.info("Using query: " + options().getQuery());

    if (!options().getUseLegacySql()) {

      LOG.info("Using Standard SQL");
      return pipeline.apply(
          "ReadFromBigQueryWithQuery",
          BigQueryIO.readTableRows()
              .fromQuery(options().getQuery())
              .withTemplateCompatibility()
              .usingStandardSql()
              .withCoder(TableRowJsonCoder.of()));
    } else {

      LOG.info("Using Legacy SQL");
      return pipeline.apply(
          "ReadFromBigQueryWithQuery",
          BigQueryIO.readTableRows()
              .fromQuery(options().getQuery())
              .withTemplateCompatibility()
              .withCoder(TableRowJsonCoder.of()));
    }
  }
}
 
Example #12
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildSourceWithTableAndFlatten() {
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(
      "Invalid BigQueryIO.Read: Specifies a table with a result flattening preference,"
          + " which only applies to queries");
  p.apply(
      "ReadMyTable",
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .withoutResultFlattening());
  p.run();
}
 
Example #13
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildSourceWithTableAndSqlDialect() {
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(
      "Invalid BigQueryIO.Read: Specifies a table with a SQL dialect preference,"
          + " which only applies to queries");
  p.apply(
      "ReadMyTable",
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .usingStandardSql());
  p.run();
}
 
Example #14
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildSourceWithReadOptionsAndSelectedFields() {
  thrown.expect(IllegalStateException.class);
  thrown.expectMessage("withReadOptions() already called");
  p.apply(
      "ReadMyTable",
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .withReadOptions(TableReadOptions.newBuilder().build())
          .withSelectedFields(Lists.newArrayList("field1")));
}
 
Example #15
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildSourceWithReadOptionsAndRowRestriction() {
  thrown.expect(IllegalStateException.class);
  thrown.expectMessage("withReadOptions() already called");
  p.apply(
      "ReadMyTable",
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .withReadOptions(TableReadOptions.newBuilder().build())
          .withRowRestriction("field > 1"));
}
 
Example #16
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDisplayData() {
  String tableSpec = "foo.com:project:dataset.table";
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from(tableSpec);
  DisplayData displayData = DisplayData.from(typedRead);
  assertThat(displayData, hasDisplayItem("table", tableSpec));
}
 
Example #17
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testEvaluatedDisplayData() {
  DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table");
  Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(typedRead);
  assertThat(displayData, hasItem(hasDisplayItem("table")));
}
 
Example #18
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testName() {
  assertEquals(
      "BigQueryIO.TypedRead",
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table")
          .getName());
}
 
Example #19
Source File: BigQueryIOStorageReadIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void runBigQueryIOStorageReadPipeline() {
  Pipeline p = Pipeline.create(options);
  PCollection<Long> count =
      p.apply(
              "Read",
              BigQueryIO.read(TableRowParser.INSTANCE)
                  .from(options.getInputTable())
                  .withMethod(Method.DIRECT_READ))
          .apply("Count", Count.globally());
  PAssert.thatSingleton(count).isEqualTo(options.getNumRecords());
  p.run().waitUntilFinish();
}
 
Example #20
Source File: BigQueryIOStorageQueryIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void runBigQueryIOStorageQueryPipeline() {
  Pipeline p = Pipeline.create(options);
  PCollection<Long> count =
      p.apply(
              "Query",
              BigQueryIO.read(TableRowParser.INSTANCE)
                  .fromQuery("SELECT * FROM `" + options.getInputTable() + "`")
                  .usingStandardSql()
                  .withMethod(Method.DIRECT_READ))
          .apply("Count", Count.globally());
  PAssert.thatSingleton(count).isEqualTo(options.getNumRecords());
  p.run().waitUntilFinish();
}
 
Example #21
Source File: BigQueryTornadoesIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testE2eBigQueryTornadoesWithStorageApi() throws Exception {
  BigQueryTornadoesITOptions options =
      TestPipeline.testingPipelineOptions().as(BigQueryTornadoesITOptions.class);
  options.setReadMethod(Method.DIRECT_READ);
  options.setOutput(
      String.format(
          "%s.%s",
          "BigQueryTornadoesIT", "monthly_tornadoes_storage_" + System.currentTimeMillis()));

  runE2EBigQueryTornadoesTest(options);
}
 
Example #22
Source File: BigQueryTable.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public BeamSqlTableFilter constructFilter(List<RexNode> filter) {
  if (method.equals(Method.DIRECT_READ)) {
    return new BigQueryFilter(filter);
  }

  return super.constructFilter(filter);
}
 
Example #23
Source File: BigQueryTableProviderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultMethod_whenPropertiesAreNotSet() {
  Table table = fakeTable("hello");
  BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table);

  assertEquals(Method.DIRECT_READ, sqlTable.method);
}
 
Example #24
Source File: BigQueryTableProviderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectDefaultMethodExplicitly() {
  Table table =
      fakeTableWithProperties(
          "hello", "{ " + METHOD_PROPERTY + ": " + "\"" + Method.DEFAULT.toString() + "\" }");
  BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table);

  assertEquals(Method.DEFAULT, sqlTable.method);
}
 
Example #25
Source File: BigQueryTableProviderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectDirectReadMethod() {
  Table table =
      fakeTableWithProperties(
          "hello", "{ " + METHOD_PROPERTY + ": " + "\"" + Method.DIRECT_READ.toString() + "\" }");
  BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table);

  assertEquals(Method.DIRECT_READ, sqlTable.method);
}
 
Example #26
Source File: BigQueryTableProviderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectExportMethod() {
  Table table =
      fakeTableWithProperties(
          "hello", "{ " + METHOD_PROPERTY + ": " + "\"" + Method.EXPORT.toString() + "\" }");
  BigQueryTable sqlTable = (BigQueryTable) provider.buildBeamSqlTable(table);

  assertEquals(Method.EXPORT, sqlTable.method);
}
 
Example #27
Source File: BigQueryTable.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Row> buildIOReader(
    PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
  if (!method.equals(Method.DIRECT_READ)) {
    LOG.info("Predicate/project push-down only available for `DIRECT_READ` method, skipping.");
    return buildIOReader(begin);
  }

  final FieldAccessDescriptor resolved =
      FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema());
  final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved);

  TypedRead<Row> typedRead = getBigQueryTypedRead(newSchema);

  if (!(filters instanceof DefaultTableFilter)) {
    BigQueryFilter bigQueryFilter = (BigQueryFilter) filters;
    if (!bigQueryFilter.getSupported().isEmpty()) {
      String rowRestriction = generateRowRestrictions(getSchema(), bigQueryFilter.getSupported());
      if (!rowRestriction.isEmpty()) {
        LOG.info("Pushing down the following filter: " + rowRestriction);
        typedRead = typedRead.withRowRestriction(rowRestriction);
      }
    }
  }

  if (!fieldNames.isEmpty()) {
    typedRead = typedRead.withSelectedFields(fieldNames);
  }

  return begin.apply("Read Input BQ Rows with push-down", typedRead);
}
 
Example #28
Source File: BigQueryHllSketchCompatibilityIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void readSketchFromBigQuery(String tableId, Long expectedCount) {
  String tableSpec = String.format("%s.%s", DATASET_ID, tableId);
  String query =
      String.format(
          "SELECT HLL_COUNT.INIT(%s) AS %s FROM %s",
          DATA_FIELD_NAME, QUERY_RESULT_FIELD_NAME, tableSpec);

  SerializableFunction<SchemaAndRecord, byte[]> parseQueryResultToByteArray =
      input ->
          // BigQuery BYTES type corresponds to Java java.nio.ByteBuffer type
          HllCount.getSketchFromByteBuffer(
              (ByteBuffer) input.getRecord().get(QUERY_RESULT_FIELD_NAME));

  TestPipelineOptions options =
      TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);

  Pipeline p = Pipeline.create(options);
  PCollection<Long> result =
      p.apply(
              BigQueryIO.read(parseQueryResultToByteArray)
                  .fromQuery(query)
                  .usingStandardSql()
                  .withMethod(Method.DIRECT_READ)
                  .withCoder(ByteArrayCoder.of()))
          .apply(HllCount.MergePartial.globally()) // no-op, only for testing MergePartial
          .apply(HllCount.Extract.globally());
  PAssert.thatSingleton(result).isEqualTo(expectedCount);
  p.run().waitUntilFinish();
}
 
Example #29
Source File: BigQueryTornadoesIT.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testE2EBigQueryTornadoesWithExport() throws Exception {
  BigQueryTornadoesITOptions options =
      TestPipeline.testingPipelineOptions().as(BigQueryTornadoesITOptions.class);
  options.setReadMethod(Method.EXPORT);
  options.setOutput(
      String.format(
          "%s.%s", "BigQueryTornadoesIT", "monthly_tornadoes_" + System.currentTimeMillis()));

  runE2EBigQueryTornadoesTest(options);
}
 
Example #30
Source File: BigQueryToParquet.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
private static PipelineResult run(BigQueryToParquetOptions options) {

  // Create the pipeline.
  Pipeline pipeline = Pipeline.create(options);

  TableReadOptions.Builder builder = TableReadOptions.newBuilder();

  /* Add fields to filter export on, if any. */
  if (options.getFields() != null) {
    builder.addAllSelectedFields(Arrays.asList(options.getFields().split(",\\s*")));
  }

  TableReadOptions tableReadOptions = builder.build();
  BigQueryStorageClient client = BigQueryStorageClientFactory.create();
  ReadSession session =
      ReadSessionFactory.create(client, options.getTableRef(), tableReadOptions);

  // Extract schema from ReadSession
  Schema schema = getTableSchema(session);
  client.close();

  /*
   * Steps: 1) Read records from BigQuery via BigQueryIO.
   *        2) Write records to Google Cloud Storage in Parquet format.
   */
  pipeline
      /*
       * Step 1: Read records via BigQueryIO using supplied schema as a PCollection of
       *         {@link GenericRecord}.
       */
      .apply(
          "ReadFromBigQuery",
          BigQueryIO.read(SchemaAndRecord::getRecord)
              .from(options.getTableRef())
              .withTemplateCompatibility()
              .withMethod(Method.DIRECT_READ)
              .withCoder(AvroCoder.of(schema))
              .withReadOptions(tableReadOptions))
      /*
       * Step 2: Write records to Google Cloud Storage as one or more Parquet files
       *         via {@link ParquetIO}.
       */
      .apply(
          "WriteToParquet",
          FileIO.<GenericRecord>write()
              .via(ParquetIO.sink(schema))
              .to(options.getBucket())
              .withNumShards(options.getNumShards())
              .withSuffix(FILE_SUFFIX));

  // Execute the pipeline and return the result.
  return pipeline.run();
}