com.google.cloud.bigquery.StandardTableDefinition Java Examples

The following examples show how to use com.google.cloud.bigquery.StandardTableDefinition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a table. */
// [TARGET create(TableInfo, TableOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
// [VARIABLE "string_field"]
public Table createTable(String datasetName, String tableName, String fieldName) {
  // [START bigquery_create_table]
  TableId tableId = TableId.of(datasetName, tableName);
  // Table field definition
  Field field = Field.of(fieldName, LegacySQLTypeName.STRING);
  // Table schema definition
  Schema schema = Schema.of(field);
  TableDefinition tableDefinition = StandardTableDefinition.of(schema);
  TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();
  Table table = bigquery.create(tableInfo);
  // [END bigquery_create_table]
  return table;
}
 
Example #2
Source File: DatasetSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a table in the dataset with schema and time partitioning. */
// [TARGET create(String, TableDefinition, TableOption...)]
// [VARIABLE “my_table”]
// [VARIABLE “my_field”]
public Table createTable(String tableName, String fieldName) {
  // [START ]
  Schema schema = Schema.of(Field.of(fieldName, LegacySQLTypeName.STRING));
  StandardTableDefinition definition =
      StandardTableDefinition.newBuilder()
          .setSchema(schema)
          .setTimePartitioning(TimePartitioning.of(TimePartitioning.Type.DAY))
          .build();
  Table table = dataset.create(tableName, definition);
  // [END ]
  return table;
}
 
Example #3
Source File: CreateTableAndLoadData.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
public static void main(String... args) throws InterruptedException, TimeoutException {
  BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
  TableId tableId = TableId.of("dataset", "table");
  Table table = bigquery.getTable(tableId);
  if (table == null) {
    System.out.println("Creating table " + tableId);
    Field integerField = Field.of("fieldName", LegacySQLTypeName.INTEGER);
    Schema schema = Schema.of(integerField);
    table = bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
  }
  System.out.println("Loading data into table " + tableId);
  Job loadJob = table.load(FormatOptions.csv(), "gs://bucket/path");
  loadJob = loadJob.waitFor();
  if (loadJob.getStatus().getError() != null) {
    System.out.println("Job completed with errors");
  } else {
    System.out.println("Job succeeded");
  }
}
 
Example #4
Source File: BigQueryMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private Table createBigQueryTable(TableId tableId) {
  // Create Blank BigQuery Table
  LOG.info(String.format("Creating Table: %s", tableId.toString()));

  List<Field> fieldList = new ArrayList<Field>();
  Schema schema = Schema.of(fieldList);

  StandardTableDefinition.Builder tableDefinitionBuilder =
      StandardTableDefinition.newBuilder().setSchema(schema);
  if (dayPartitioning) {
    tableDefinitionBuilder.setTimePartitioning(
        TimePartitioning.newBuilder(TimePartitioning.Type.DAY).build());
  }
  TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinitionBuilder.build()).build();
  Table table = bigquery.create(tableInfo);

  return table;
}
 
Example #5
Source File: BigQueryMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Returns {@code Table} after creating the table with no columns in BigQuery.
 *
 * @param tableId a TableId referencing the BigQuery table being requested.
 */
private Table createBigQueryTable(TableId tableId) {
  // Create Blank BigQuery Table
  List<Field> fieldList = new ArrayList<Field>();
  Schema schema = Schema.of(fieldList);

  StandardTableDefinition.Builder tableDefinitionBuilder =
      StandardTableDefinition.newBuilder().setSchema(schema);
  if (dayPartitioning) {
    tableDefinitionBuilder.setTimePartitioning(
        TimePartitioning.newBuilder(TimePartitioning.Type.DAY).build());
  }
  TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinitionBuilder.build()).build();
  Table table = bigquery.create(tableInfo);

  return table;
}
 
Example #6
Source File: BigQueryStatementIssuingFn.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private Table createBigQueryTable(BigQueryAction action) {
  TableDefinition definition = StandardTableDefinition.of(
      BigQuerySchemaUtils.beamSchemaToBigQueryClientSchema(action.tableSchema));

  TableId tableId = TableId.of(action.projectId, action.dataset, action.tableName);
  TableInfo tableInfo = TableInfo.newBuilder(tableId, definition).build();

  LOG.info("Creating a new BigQuery table: {}", tableInfo);

  try {
    return bigQueryClient.create(tableInfo);
  } catch (BigQueryException e) {
    if (e.getMessage().startsWith("Already Exists")) {
      return null;
    } else {
      throw e;
    }
  }
}
 
Example #7
Source File: CloudSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of loading a parquet file from GCS to a table. */
public void loadTableGcsParquet(String datasetName) throws InterruptedException {
  // [START bigquery_load_table_gcs_parquet]
  String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet";
  TableId tableId = TableId.of(datasetName, "us_states");
  LoadJobConfiguration configuration =
      LoadJobConfiguration.builder(tableId, sourceUri)
          .setFormatOptions(FormatOptions.parquet())
          .build();
  // Load the table
  Job loadJob = bigquery.create(JobInfo.of(configuration));
  loadJob = loadJob.waitFor();
  // Check the table
  StandardTableDefinition destinationTable = bigquery.getTable(tableId).getDefinition();
  System.out.println("State: " + loadJob.getStatus().getState());
  System.out.printf("Loaded %d rows.\n", destinationTable.getNumRows());
  // [END bigquery_load_table_gcs_parquet]
}
 
Example #8
Source File: CloudSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of copying multiple tables to a destination. */
public void copyTables(String datasetId, String destinationTableId) throws InterruptedException {
  generateTableWithDdl(datasetId, "table1");
  generateTableWithDdl(datasetId, "table2");

  // [START bigquery_copy_table_multiple_source]
  TableId destinationTable = TableId.of(datasetId, destinationTableId);
  CopyJobConfiguration configuration =
      CopyJobConfiguration.newBuilder(
              destinationTable,
              Arrays.asList(TableId.of(datasetId, "table1"), TableId.of(datasetId, "table2")))
          .build();

  // Copy the tables.
  Job job = bigquery.create(JobInfo.of(configuration));
  job = job.waitFor();

  // Check the table
  StandardTableDefinition table = bigquery.getTable(destinationTable).getDefinition();
  System.out.println("State: " + job.getStatus().getState());
  System.out.printf("Copied %d rows.\n", table.getNumRows());
  // [END bigquery_copy_table_multiple_source]
}
 
Example #9
Source File: BQTableDefinitionTest.java    From beast with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldCreatePartitionedTable() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(true);
    when(bqConfig.getBQTablePartitionKey()).thenReturn("timestamp_field");
    Schema bqSchema = Schema.of(
            Field.newBuilder("timestamp_field", LegacySQLTypeName.TIMESTAMP).build()
    );

    BQTableDefinition bqTableDefinition = new BQTableDefinition(bqConfig);
    StandardTableDefinition tableDefinition = bqTableDefinition.getTableDefinition(bqSchema);

    Schema returnedSchema = tableDefinition.getSchema();
    assertEquals(returnedSchema.getFields().size(), bqSchema.getFields().size());
    assertEquals(returnedSchema.getFields().get(0).getName(), bqSchema.getFields().get(0).getName());
    assertEquals(returnedSchema.getFields().get(0).getMode(), bqSchema.getFields().get(0).getMode());
    assertEquals(returnedSchema.getFields().get(0).getType(), bqSchema.getFields().get(0).getType());
    assertEquals("timestamp_field", tableDefinition.getTimePartitioning().getField());
}
 
Example #10
Source File: PutBigQueryStreamingIT.java    From nifi with Apache License 2.0 6 votes vote down vote up
private void createTable(String tableName) {
    TableId tableId = TableId.of(dataset.getDatasetId().getDataset(), tableName);

    // Table field definition
    Field id = Field.newBuilder("id", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).build();
    Field name = Field.newBuilder("name", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field alias = Field.newBuilder("alias", LegacySQLTypeName.STRING).setMode(Mode.REPEATED).build();

    Field zip = Field.newBuilder("zip", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field city = Field.newBuilder("city", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field addresses = Field.newBuilder("addresses", LegacySQLTypeName.RECORD, zip, city).setMode(Mode.REPEATED).build();

    Field position = Field.newBuilder("position", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field company = Field.newBuilder("company", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field job = Field.newBuilder("job", LegacySQLTypeName.RECORD, position, company).setMode(Mode.NULLABLE).build();

    // Table schema definition
    schema = Schema.of(id, name, alias, addresses, job);
    TableDefinition tableDefinition = StandardTableDefinition.of(schema);
    TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();

    // create table
    bigquery.create(tableInfo);
}
 
Example #11
Source File: ITDatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testListTablesNotEmpty() {
  String expectedTableName = "test_table";

  dataset.create(expectedTableName, StandardTableDefinition.newBuilder().build());
  Page<Table> tables = datasetSnippets.list();
  Iterator<Table> iterator = tables.iterateAll().iterator();
  assertTrue(iterator.hasNext());

  Table actualTable = iterator.next();
  assertEquals(expectedTableName, actualTable.getTableId().getTable());
  assertFalse(iterator.hasNext());

  bigquery.delete(TableId.of(DATASET, expectedTableName));
}
 
Example #12
Source File: BigQueryExample.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Override
TableInfo parse(String... args) throws Exception {
  if (args.length >= 3) {
    String dataset = args[0];
    String table = args[1];
    TableId tableId = TableId.of(dataset, table);
    return TableInfo.of(tableId, StandardTableDefinition.of(parseSchema(args, 2, args.length)));
  }
  throw new IllegalArgumentException("Missing required arguments.");
}
 
Example #13
Source File: BigQueryClient.java    From beam with Apache License 2.0 5 votes vote down vote up
private void createTable(TableId tableId, Schema schema) {
  TableInfo tableInfo =
      TableInfo.newBuilder(tableId, StandardTableDefinition.of(schema))
          .setFriendlyName(tableId.getTable())
          .build();

  client.create(tableInfo, FIELD_OPTIONS);
}
 
Example #14
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of loading a newline-delimited-json file with textual fields from GCS to a table. */
// [TARGET create(JobInfo, JobOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
public Long writeRemoteFileToTable(String datasetName, String tableName)
    throws InterruptedException {
  // [START bigquery_load_table_gcs_json]
  String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
  TableId tableId = TableId.of(datasetName, tableName);
  // Table field definition
  Field[] fields =
      new Field[] {
        Field.of("name", LegacySQLTypeName.STRING),
        Field.of("post_abbr", LegacySQLTypeName.STRING)
      };
  // Table schema definition
  Schema schema = Schema.of(fields);
  LoadJobConfiguration configuration =
      LoadJobConfiguration.builder(tableId, sourceUri)
          .setFormatOptions(FormatOptions.json())
          .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .setSchema(schema)
          .build();
  // Load the table
  Job loadJob = bigquery.create(JobInfo.of(configuration));
  loadJob = loadJob.waitFor();
  // Check the table
  System.out.println("State: " + loadJob.getStatus().getState());
  return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
  // [END bigquery_load_table_gcs_json]
}
 
Example #15
Source File: CloudSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of undeleting a table. */
public void undeleteTable(String datasetId) throws InterruptedException {
  generateTableWithDdl(datasetId, "oops_undelete_me");

  // [START bigquery_undelete_table]
  // String datasetId = "my_dataset";
  String tableId = "oops_undelete_me";

  // Record the current time.  We'll use this as the snapshot time
  // for recovering the table.
  long snapTime = Instant.now().toEpochMilli();

  // "Accidentally" delete the table.
  bigquery.delete(TableId.of(datasetId, tableId));

  // Construct the restore-from tableID using a snapshot decorator.
  String snapshotTableId = String.format("%s@%d", tableId, snapTime);
  // Choose a new table ID for the recovered table data.
  String recoverTableId = String.format("%s_recovered", tableId);

  // Construct and run a copy job.
  CopyJobConfiguration configuration =
      CopyJobConfiguration.newBuilder(
              TableId.of(datasetId, recoverTableId), TableId.of(datasetId, snapshotTableId))
          .build();
  Job job = bigquery.create(JobInfo.of(configuration));
  job = job.waitFor();

  // Check the table
  StandardTableDefinition table =
      bigquery.getTable(TableId.of(datasetId, recoverTableId)).getDefinition();
  System.out.println("State: " + job.getStatus().getState());
  System.out.printf("Recovered %d rows.\n", table.getNumRows());
  // [END bigquery_undelete_table]
}
 
Example #16
Source File: ITDatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetTable() {
  String expectedTableName = "test_table";

  dataset.create(expectedTableName, StandardTableDefinition.newBuilder().build());
  Table actualTable = datasetSnippets.getTable(expectedTableName);

  assertNotNull(actualTable);
  assertEquals(expectedTableName, actualTable.getTableId().getTable());

  bigquery.delete(TableId.of(DATASET, expectedTableName));
}
 
Example #17
Source File: ITBigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testInsertAllAndListTableData() throws IOException, InterruptedException {
  String tableName = "test_insert_all_and_list_table_data";
  String fieldName1 = "booleanField";
  String fieldName2 = "bytesField";
  String fieldName3 = "recordField";
  String fieldName4 = "stringField";
  TableId tableId = TableId.of(DATASET, tableName);
  Schema schema =
      Schema.of(
          Field.of(fieldName1, LegacySQLTypeName.BOOLEAN),
          Field.of(fieldName2, LegacySQLTypeName.BYTES),
          Field.of(
              fieldName3,
              LegacySQLTypeName.RECORD,
              Field.of(fieldName4, LegacySQLTypeName.STRING)));
  TableInfo table = TableInfo.of(tableId, StandardTableDefinition.of(schema));
  assertNotNull(bigquery.create(table));
  InsertAllResponse response = bigquerySnippets.insertAll(DATASET, tableName);
  assertFalse(response.hasErrors());
  assertTrue(response.getInsertErrors().isEmpty());
  Page<FieldValueList> listPage = bigquerySnippets.listTableDataFromId(DATASET, tableName);
  while (Iterators.size(listPage.iterateAll().iterator()) < 1) {
    Thread.sleep(500);
    listPage = bigquerySnippets.listTableDataFromId(DATASET, tableName);
  }
  FieldValueList row = listPage.getValues().iterator().next();
  assertEquals(true, row.get(0).getBooleanValue());
  assertArrayEquals(new byte[] {0xA, 0xD, 0xD, 0xE, 0xD}, row.get(1).getBytesValue());
  assertEquals("Hello, World!", row.get(2).getRecordValue().get(0).getStringValue());

  listPage = bigquerySnippets.listTableDataSchema(DATASET, tableName, schema, fieldName1);
  row = listPage.getValues().iterator().next();
  assertNotNull(row.get(fieldName1));
  assertArrayEquals(new byte[] {0xA, 0xD, 0xD, 0xE, 0xD}, row.get(fieldName2).getBytesValue());

  bigquerySnippets.listTableDataSchemaId();

  assertTrue(bigquerySnippets.deleteTable(DATASET, tableName));
}
 
Example #18
Source File: ITTableSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
  ++nextTableNumber;
  StandardTableDefinition.Builder builder = StandardTableDefinition.newBuilder();
  builder.setSchema(SCHEMA);
  table = bigquery.create(TableInfo.of(getTableId(), builder.build()));
  bigquery.create(TableInfo.of(getCopyTableId(), builder.build()));
  tableSnippets = new TableSnippets(table);
}
 
Example #19
Source File: ITTableSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testDelete() {
  Table doomedTable =
      bigquery.create(TableInfo.of(DOOMED_TABLE_ID, StandardTableDefinition.of(SCHEMA)));
  TableSnippets doomedTableSnippets = new TableSnippets(doomedTable);
  assertTrue(doomedTableSnippets.delete());
}
 
Example #20
Source File: CreateStore.java    From quetzal with Eclipse Public License 2.0 5 votes vote down vote up
public Table createTable(String tableName, Field[] fields) {	
	TableId tableId = TableId.of(datasetName, tableName);
	Schema schema = Schema.of(fields);
	TableDefinition tableDefinition = StandardTableDefinition.of(schema);
	TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();
	Table t = bigquery.create(tableInfo);
	System.err.println("created " + t.getTableId());
	return t;
}
 
Example #21
Source File: BigQueryDatasetRuntimeTestIT.java    From components with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void initDatasetAndTable() throws IOException {
    BigQuery bigquery = BigQueryConnection.createClient(createDatastore());
    for (String dataset : datasets) {
        DatasetId datasetId = DatasetId.of(BigQueryTestConstants.PROJECT, dataset);
        bigquery.create(DatasetInfo.of(datasetId));
    }

    for (String table : tables) {
        TableDefinition tableDefinition =
                StandardTableDefinition.of(Schema.of(Field.of("test", LegacySQLTypeName.STRING)));
        TableId tableId = TableId.of(BigQueryTestConstants.PROJECT, datasets.get(0), table);
        bigquery.create(TableInfo.of(tableId, tableDefinition));
    }
}
 
Example #22
Source File: BQTableDefinition.java    From beast with Apache License 2.0 5 votes vote down vote up
public StandardTableDefinition getTableDefinition(Schema schema) {
    StandardTableDefinition tableDefinition = StandardTableDefinition.newBuilder()
            .setSchema(schema)
            .build();
    if (!bqConfig.isBQTablePartitioningEnabled()) {
        return tableDefinition;
    }
    return getPartitionedTableDefinition(schema);
}
 
Example #23
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canSetStrictSchemaMode() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("additional_properties", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--bqWriteMethod=streaming",
      "--bqStrictSchemaDocTypes=my-namespace/my-test", "--output=" + output,
      "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertThat(stringValuesQueryWithRetries("SELECT additional_properties FROM " + tableSpec),
      matchesInAnyOrder(Lists.newArrayList("{\"type\":\"main\"}", null, "{\"type\":\"main\"}")));
}
 
Example #24
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canRecoverFailedInsertsInStreamingMode() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());

  bigquery.create(TableInfo.newBuilder(tableId,
      StandardTableDefinition.of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
          Field.newBuilder("extra_required_field", LegacySQLTypeName.STRING)
              .setMode(Mode.REQUIRED).build())))
      .build());

  String input = Resources.getResource("testdata/json-payload.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=text", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output, "--errorOutputType=file",
      "--bqWriteMethod=streaming", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  assertTrue(stringValuesQuery("SELECT client_id FROM " + tableSpec).isEmpty());

  List<String> expectedErrorLines = Lines.resources("testdata/json-payload-wrapped.ndjson");
  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(expectedErrorLines.size()));
}
 
Example #25
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
private void canWriteWithMixedMethod(String streamingDocTypes) throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output, "--bqWriteMethod=mixed",
      "--bqStreamingDocTypes=" + streamingDocTypes, "--errorOutputType=file",
      "--tempLocation=gs://gcp-ingestion-static-test-bucket/temp/bq-loads",
      "--schemasLocation=schemas.tar.gz", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #26
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteViaFileLoads() throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output,
      "--bqWriteMethod=file_loads", "--errorOutputType=file",
      "--tempLocation=gs://gcp-ingestion-static-test-bucket/temp/bq-loads",
      "--schemasLocation=schemas.tar.gz", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #27
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteToDynamicTables() throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery.create(TableInfo.newBuilder(tableId,
      StandardTableDefinition.of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
          Field.of("type", LegacySQLTypeName.STRING))))
      .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output,
      "--bqWriteMethod=streaming", "--errorOutputType=file", "--schemasLocation=schemas.tar.gz",
      "--errorOutputFileCompression=UNCOMPRESSED", "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #28
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteToBigQuery() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-with-attributes.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--bqWriteMethod=streaming",
      "--schemasLocation=schemas.tar.gz", "--output=" + output, "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertThat(stringValuesQueryWithRetries("SELECT submission_timestamp FROM " + tableSpec),
      matchesInAnyOrder(Lists.newArrayList(null, null, "1561983194.123456")));
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123", "abc123", "def456")));
}
 
Example #29
Source File: BQTableDefinitionTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test (expected = BQPartitionKeyNotSpecified.class)
public void shouldThrowErrorIfPartitionFieldNotSet() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(true);
    Schema bqSchema = Schema.of(
            Field.newBuilder("int_field", LegacySQLTypeName.INTEGER).build()
    );

    BQTableDefinition bqTableDefinition = new BQTableDefinition(bqConfig);
    StandardTableDefinition tableDefinition = bqTableDefinition.getTableDefinition(bqSchema);
    tableDefinition.getSchema();
}
 
Example #30
Source File: BQTableDefinitionTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldReturnTableDefinitionIfPartitionDisabled() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(false);
    Schema bqSchema = Schema.of(
            Field.newBuilder("int_field", LegacySQLTypeName.INTEGER).build()
    );

    BQTableDefinition bqTableDefinition = new BQTableDefinition(bqConfig);
    StandardTableDefinition tableDefinition = bqTableDefinition.getTableDefinition(bqSchema);
    Schema returnedSchema = tableDefinition.getSchema();
    assertEquals(returnedSchema.getFields().size(), bqSchema.getFields().size());
    assertEquals(returnedSchema.getFields().get(0).getName(), bqSchema.getFields().get(0).getName());
    assertEquals(returnedSchema.getFields().get(0).getMode(), bqSchema.getFields().get(0).getMode());
    assertEquals(returnedSchema.getFields().get(0).getType(), bqSchema.getFields().get(0).getType());
}