com.google.cloud.bigquery.DatasetInfo Java Examples

The following examples show how to use com.google.cloud.bigquery.DatasetInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractBigQueryIT.java    From nifi with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws IOException {
    final Map<PropertyDescriptor, String> propertiesMap = new HashMap<>();
    propertiesMap.put(CredentialPropertyDescriptors.SERVICE_ACCOUNT_JSON_FILE, SERVICE_ACCOUNT_JSON);
    Credentials credentials = credentialsProviderFactory.getGoogleCredentials(propertiesMap, new ProxyAwareTransportFactory(null));

    BigQueryOptions bigQueryOptions = BigQueryOptions.newBuilder()
            .setProjectId(PROJECT_ID)
            .setCredentials(credentials)
            .build();

    bigquery = bigQueryOptions.getService();

    DatasetInfo datasetInfo = DatasetInfo.newBuilder(RemoteBigQueryHelper.generateDatasetName()).build();
    dataset = bigquery.create(datasetInfo);
}
 
Example #2
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a dataset. */
// [TARGET create(DatasetInfo, DatasetOption...)]
// [VARIABLE "my_dataset_name"]
public Dataset createDataset(String datasetName) {
  // [START bigquery_create_dataset]
  Dataset dataset = null;
  DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetName).build();
  try {
    // the dataset was created
    dataset = bigquery.create(datasetInfo);
  } catch (BigQueryException e) {
    // the dataset was not created
  }
  // [END bigquery_create_dataset]
  return dataset;
}
 
Example #3
Source File: DatasetInfoSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Update the ACLs for a dataset. */
// [TARGET getAcl()]
// [VARIABLE bigquery.getDataset(DatasetId.of("my_dataset"))]
public List<Acl> updateDatasetAccess(DatasetInfo dataset) {
  // [START bigquery_update_dataset_access]
  List<Acl> beforeAcls = dataset.getAcl();

  // Make a copy of the ACLs so that they can be modified.
  ArrayList<Acl> acls = new ArrayList<>(beforeAcls);
  acls.add(Acl.of(new Acl.User("[email protected]"), Acl.Role.READER));
  DatasetInfo.Builder builder = dataset.toBuilder();
  builder.setAcl(acls);

  bigquery.update(builder.build()); // API request.
  // [END bigquery_update_dataset_access]

  return beforeAcls;
}
 
Example #4
Source File: BigQueryOutput.java    From flo with Apache License 2.0 6 votes vote down vote up
@Override
public StagingTableId provide(EvalContext evalContext) {
  final String location = getDatasetOrThrow().getLocation();

  final TableId stagingTableId = bigQuery().createStagingTableId(tableId, location);
  final DatasetId stagingDatasetId = DatasetId.of(stagingTableId.getProject(), stagingTableId.getDataset());

  if (bigQuery().getDataset(stagingDatasetId) == null) {
    bigQuery().create(DatasetInfo
        .newBuilder(stagingDatasetId)
        .setLocation(location)
        .setDefaultTableLifetime(Duration.ofDays(1).toMillis())
        .build());
    LOG.info("created staging dataset: {}", stagingDatasetId);
  }

  return StagingTableId.of(this, stagingTableId);
}
 
Example #5
Source File: BigQueryDatasetRuntime.java    From components with Apache License 2.0 6 votes vote down vote up
private TableResult queryWithLarge(BigQuery bigquery, QueryJobConfiguration queryRequest, String projectId,
                                   BigQuery.JobOption... options) {
    String tempDataset = genTempName("dataset");
    String tempTable = genTempName("table");
    bigquery.create(DatasetInfo.of(tempDataset));
    TableId tableId = TableId.of(projectId, tempDataset, tempTable);
    QueryJobConfiguration jobConfiguration = QueryJobConfiguration
            .newBuilder(queryRequest.getQuery())
            .setAllowLargeResults(true)
            .setUseLegacySql(queryRequest.useLegacySql())
            .setDestinationTable(tableId)
            .build();
    try {
        return query(bigquery, jobConfiguration, projectId, options);
    } finally {
        bigquery.delete(tableId);
    }
}
 
Example #6
Source File: BQClientTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldIgnoreExceptionIfDatasetAlreadyExists() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(true);
    when(bqConfig.getBQTablePartitionKey()).thenReturn("partition_column");
    when(bqConfig.getTable()).thenReturn("bq-table");
    when(bqConfig.getDataset()).thenReturn("bq-proto");
    bqClient = new BQClient(bigquery, bqConfig);

    ArrayList<Field> bqSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("test-1", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("partition_column", LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.OFFSET_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TOPIC_COLUMN_NAME, LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.LOAD_TIME_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TIMESTAMP_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.PARTITION_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    }};

    TableDefinition tableDefinition = getPartitionedTableDefinition(bqSchemaFields);
    TableId tableId = TableId.of(bqConfig.getDataset(), bqConfig.getTable());
    TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();

    when(bigquery.getDataset(tableId.getDataset())).thenReturn(dataset);
    when(dataset.exists()).thenReturn(false);
    when(table.exists()).thenReturn(false);
    when(bigquery.getTable(tableId)).thenReturn(table);
    when(bigquery.create(tableInfo)).thenReturn(table);

    bqClient.upsertTable(bqSchemaFields);
    verify(bigquery).create(DatasetInfo.of(tableId.getDataset()));
    verify(bigquery).create(tableInfo);
    verify(bigquery, never()).update(tableInfo);
}
 
Example #7
Source File: ITCloudSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() {
  bigquery = RemoteBigQueryHelper.create().getOptions().getService();
  cloudSnippets = new CloudSnippets(bigquery);
  bigquery.create(DatasetInfo.newBuilder(DATASET).build());
  bout = new ByteArrayOutputStream();
  out = new PrintStream(bout);
  System.setOut(out);
}
 
Example #8
Source File: ITTableSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() {
  bigquery = RemoteBigQueryHelper.create().getOptions().getService();
  bigquery.create(DatasetInfo.newBuilder(DATASET_NAME).build());
  bigquery.create(DatasetInfo.newBuilder(COPY_DATASET_NAME).build());
  storage = RemoteStorageHelper.create().getOptions().getService();
  storage.create(BucketInfo.of(BUCKET_NAME));
}
 
Example #9
Source File: ITBigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() {
  bigquery = RemoteBigQueryHelper.create().getOptions().getService();
  bigquerySnippets = new BigQuerySnippets(bigquery);
  bigquery.create(DatasetInfo.newBuilder(DATASET).build());
  bout = new ByteArrayOutputStream();
  out = new PrintStream(bout);
  System.setOut(out);
}
 
Example #10
Source File: ITDatasetInfoSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() {
  bigquery = RemoteBigQueryHelper.create().getOptions().getService();
  datasetInfoSnippets = new DatasetInfoSnippets(bigquery);
  bigquery.create(DatasetInfo.newBuilder(DATASET).build());
  bout = new ByteArrayOutputStream();
  out = new PrintStream(bout);
  System.setOut(out);
}
 
Example #11
Source File: ITDatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testDelete() {
  String datasetName = RemoteBigQueryHelper.generateDatasetName();
  DatasetInfo dataset = DatasetInfo.newBuilder(datasetName).build();
  DatasetSnippets datasetSnippets = new DatasetSnippets(bigquery.create(dataset));
  assertTrue(datasetSnippets.deleteDataset());
}
 
Example #12
Source File: ITDatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() {
  bigquery = RemoteBigQueryHelper.create().getOptions().getService();
  dataset = bigquery.create(DatasetInfo.newBuilder(DATASET).build());
  nonExistingDataset = bigquery.create(DatasetInfo.newBuilder(NON_EXISTING_DATASET).build());
  nonExistingDataset.delete(DatasetDeleteOption.deleteContents());
}
 
Example #13
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of updating a dataset by changing its description. */
// [TARGET update(DatasetInfo, DatasetOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "some_new_description"]
public Dataset updateDataset(String datasetName, String newDescription) {
  // [START bigquery_update_dataset_description]
  Dataset oldDataset = bigquery.getDataset(datasetName);
  DatasetInfo datasetInfo = oldDataset.toBuilder().setDescription(newDescription).build();
  Dataset newDataset = bigquery.update(datasetInfo);
  // [END bigquery_update_dataset_description]
  return newDataset;
}
 
Example #14
Source File: DatasetInfoSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Update the default table expiration time for a dataset. */
// [TARGET getDefaultTableLifetime()]
// [VARIABLE bigquery.getDataset(DatasetId.of("my_dataset"))]
public Long updateDatasetExpiration(DatasetInfo dataset) {
  // [START bigquery_update_dataset_expiration]
  Long beforeExpiration = dataset.getDefaultTableLifetime();

  Long oneDayMilliseconds = 24 * 60 * 60 * 1000L;
  DatasetInfo.Builder builder = dataset.toBuilder();
  builder.setDefaultTableLifetime(oneDayMilliseconds);
  bigquery.update(builder.build()); // API request.
  // [END bigquery_update_dataset_expiration]

  return beforeExpiration;
}
 
Example #15
Source File: BigQueryDatasetRuntimeTestIT.java    From components with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void initDatasetAndTable() throws IOException {
    BigQuery bigquery = BigQueryConnection.createClient(createDatastore());
    for (String dataset : datasets) {
        DatasetId datasetId = DatasetId.of(BigQueryTestConstants.PROJECT, dataset);
        bigquery.create(DatasetInfo.of(datasetId));
    }

    for (String table : tables) {
        TableDefinition tableDefinition =
                StandardTableDefinition.of(Schema.of(Field.of("test", LegacySQLTypeName.STRING)));
        TableId tableId = TableId.of(BigQueryTestConstants.PROJECT, datasets.get(0), table);
        bigquery.create(TableInfo.of(tableId, tableDefinition));
    }
}
 
Example #16
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldCreateStagingDatasetIfDoesNotExist() {
  when(bigQuery.getDataset(DATASET_ID)).thenReturn(dataset);

  final BigQueryOutput bigQueryOutput = BigQueryOutput.create(() -> floBigQueryClient, TABLE_ID);

  bigQueryOutput.provide(null);

  verify(bigQuery).create(any(DatasetInfo.class));
  final DatasetInfo createdStagingDataset = datasetInfoCaptor.getValue();
  final DatasetId expectedDatasetId = DatasetId.of(DATASET_ID.getProject(), "_incoming_" + LOCATION);
  assertThat(createdStagingDataset.getDatasetId(), is(expectedDatasetId));
  assertThat(createdStagingDataset.getLocation(), is(LOCATION));
  assertThat(createdStagingDataset.getDefaultTableLifetime(), is(Duration.ofDays(1).toMillis()));
}
 
Example #17
Source File: BigQueryDataset.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/** Find credentials in the environment and create a dataset in BigQuery. */
@Override
protected void starting(Description description) {
  RemoteBigQueryHelper bqHelper = RemoteBigQueryHelper.create();
  bigquery = bqHelper.getOptions().getService();
  project = bqHelper.getOptions().getProjectId();
  dataset = RemoteBigQueryHelper.generateDatasetName();
  bigquery.create(DatasetInfo.newBuilder(dataset).build());
}
 
Example #18
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteToBigQuery() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-with-attributes.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--bqWriteMethod=streaming",
      "--schemasLocation=schemas.tar.gz", "--output=" + output, "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertThat(stringValuesQueryWithRetries("SELECT submission_timestamp FROM " + tableSpec),
      matchesInAnyOrder(Lists.newArrayList(null, null, "1561983194.123456")));
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123", "abc123", "def456")));
}
 
Example #19
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteToDynamicTables() throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery.create(TableInfo.newBuilder(tableId,
      StandardTableDefinition.of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
          Field.of("type", LegacySQLTypeName.STRING))))
      .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output,
      "--bqWriteMethod=streaming", "--errorOutputType=file", "--schemasLocation=schemas.tar.gz",
      "--errorOutputFileCompression=UNCOMPRESSED", "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #20
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteViaFileLoads() throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output,
      "--bqWriteMethod=file_loads", "--errorOutputType=file",
      "--tempLocation=gs://gcp-ingestion-static-test-bucket/temp/bq-loads",
      "--schemasLocation=schemas.tar.gz", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #21
Source File: BigQueryOutput.java    From flo with Apache License 2.0 5 votes vote down vote up
private DatasetInfo getDatasetOrThrow() {
  final DatasetId datasetId = DatasetId.of(tableId.getProject(), tableId.getDataset());

  final DatasetInfo dataset = bigQuery().getDataset(datasetId);

  if (dataset == null) {
    LOG.error("Could not find dataset {}", datasetId);
    throw new IllegalArgumentException(
        "Dataset does not exist. Please create it before attempting to write to it.");
  }

  return dataset;
}
 
Example #22
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
private void canWriteWithMixedMethod(String streamingDocTypes) throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output, "--bqWriteMethod=mixed",
      "--bqStreamingDocTypes=" + streamingDocTypes, "--errorOutputType=file",
      "--tempLocation=gs://gcp-ingestion-static-test-bucket/temp/bq-loads",
      "--schemasLocation=schemas.tar.gz", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #23
Source File: Search.java    From java-docs-samples with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  bigquery = BigQueryOptions.getDefaultInstance().getService();
  if (bigquery.getDataset(datasetName) == null) {
    bigquery.create(DatasetInfo.newBuilder(datasetName).build());
  }
  bout = new ByteArrayOutputStream();
  out = new PrintStream(bout);
  System.setOut(out);
}
 
Example #24
Source File: QuickStartIT.java    From java-docs-samples with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  bigquery = BigQueryOptions.getDefaultInstance().getService();
  if (bigquery.getDataset(datasetName) == null) {
    Dataset dataset = bigquery.create(DatasetInfo.newBuilder(datasetName).build());
  }
  bout = new ByteArrayOutputStream();
  out = new PrintStream(bout);
  System.setOut(out);
}
 
Example #25
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canRecoverFailedInsertsInStreamingMode() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());

  bigquery.create(TableInfo.newBuilder(tableId,
      StandardTableDefinition.of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
          Field.newBuilder("extra_required_field", LegacySQLTypeName.STRING)
              .setMode(Mode.REQUIRED).build())))
      .build());

  String input = Resources.getResource("testdata/json-payload.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=text", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output, "--errorOutputType=file",
      "--bqWriteMethod=streaming", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  assertTrue(stringValuesQuery("SELECT client_id FROM " + tableSpec).isEmpty());

  List<String> expectedErrorLines = Lines.resources("testdata/json-payload-wrapped.ndjson");
  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(expectedErrorLines.size()));
}
 
Example #26
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canSetStrictSchemaMode() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("additional_properties", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--bqWriteMethod=streaming",
      "--bqStrictSchemaDocTypes=my-namespace/my-test", "--output=" + output,
      "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertThat(stringValuesQueryWithRetries("SELECT additional_properties FROM " + tableSpec),
      matchesInAnyOrder(Lists.newArrayList("{\"type\":\"main\"}", null, "{\"type\":\"main\"}")));
}
 
Example #27
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Test
public void canLoadRawFormat() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("args", LegacySQLTypeName.STRING),
                      Field.of("content_length", LegacySQLTypeName.STRING),
                      Field.of("date", LegacySQLTypeName.STRING),
                      Field.of("dnt", LegacySQLTypeName.STRING),
                      Field.of("host", LegacySQLTypeName.STRING),
                      Field.of("method", LegacySQLTypeName.STRING),
                      Field.of("payload", LegacySQLTypeName.BYTES),
                      Field.of("protocol", LegacySQLTypeName.STRING),
                      Field.of("remote_addr", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP),
                      Field.of("uri", LegacySQLTypeName.STRING),
                      Field.of("user_agent", LegacySQLTypeName.STRING),
                      Field.of("x_debug_id", LegacySQLTypeName.STRING),
                      Field.of("x_forwarded_for", LegacySQLTypeName.STRING),
                      Field.of("x_pingsender_version", LegacySQLTypeName.STRING),
                      Field.of("x_pipeline_proxy", LegacySQLTypeName.STRING)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources.getResource("testdata/bigquery-integration/input-raw-format.ndjson")
      .getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink
      .run(new String[] { "--inputFileFormat=json", "--inputType=file", "--input=" + input,
          "--outputType=bigquery", "--bqWriteMethod=streaming", "--decompressInputPayloads=false",
          "--outputTableRowFormat=raw", "--output=" + output, "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertEquals(
      Json.asString(ImmutableMap.<String, String>builder()
          .put("submission_timestamp", "2020-01-12T21:02:18.123456Z")
          .put("uri",
              "/submit/telemetry/6c49ec73-4350-45a0-9c8a-6c8f5aded0cf/main/Firefox/58.0.2"
                  + "/release/20180206200532")
          .put("protocol", "HTTP/1.1").put("method", "POST").put("args", "v=4")
          .put("remote_addr", "172.31.32.5").put("content_length", "4722")
          .put("date", "Mon, 12 Jan 2020 21:02:18 GMT").put("dnt", "1")
          .put("host", "incoming.telemetry.mozilla.org").put("user_agent", "pingsender/1.0")
          .put("x_forwarded_for", "10.98.132.74, 103.3.237.12").put("x_pingsender_version", "1.0")
          .put("x_debug_id", "my_debug_session_1")
          .put("x_pipeline_proxy", "2020-01-12T21:02:18.123456Z")
          // ensure this value stayed compressed when loaded
          .put("payload", "H4sIAJBj8l4AAytJLS4BAAx+f9gEAAAA").build()),
      String.join("\n",
          stringValuesQueryWithRetries("SELECT TO_JSON_STRING(t) FROM " + tableSpec + " AS t")));
}
 
Example #28
Source File: IntegrationTestUtils.java    From spark-bigquery-connector with Apache License 2.0 4 votes vote down vote up
public static void createDataset(String dataset) {
    BigQuery bq = getBigquery();
    DatasetId datasetId = DatasetId.of(dataset);
    logger.warn("Creating test dataset: {}", datasetId);
    bq.create(DatasetInfo.of(datasetId));
}
 
Example #29
Source File: BigQueryExample.java    From google-cloud-java with Apache License 2.0 4 votes vote down vote up
@Override
public void run(BigQuery bigquery, DatasetId datasetId) {
  bigquery.create(DatasetInfo.newBuilder(datasetId).build());
  System.out.println("Created dataset " + datasetId);
}
 
Example #30
Source File: InsertDataAndQueryTable.java    From google-cloud-java with Apache License 2.0 4 votes vote down vote up
public static void main(String... args) throws InterruptedException {
  // Create a service instance
  BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

  // Create a dataset
  String datasetId = "my_dataset_id";
  bigquery.create(DatasetInfo.newBuilder(datasetId).build());

  TableId tableId = TableId.of(datasetId, "my_table_id");
  // Table field definition
  Field stringField = Field.of("StringField", LegacySQLTypeName.STRING);
  // Table schema definition
  Schema schema = Schema.of(stringField);
  // Create a table
  StandardTableDefinition tableDefinition = StandardTableDefinition.of(schema);
  bigquery.create(TableInfo.of(tableId, tableDefinition));

  // Define rows to insert
  Map<String, Object> firstRow = new HashMap<>();
  Map<String, Object> secondRow = new HashMap<>();
  firstRow.put("StringField", "value1");
  secondRow.put("StringField", "value2");
  // Create an insert request
  InsertAllRequest insertRequest =
      InsertAllRequest.newBuilder(tableId).addRow(firstRow).addRow(secondRow).build();
  // Insert rows
  InsertAllResponse insertResponse = bigquery.insertAll(insertRequest);
  // Check if errors occurred
  if (insertResponse.hasErrors()) {
    System.out.println("Errors occurred while inserting rows");
  }

  // Create a query request
  QueryJobConfiguration queryConfig =
      QueryJobConfiguration.newBuilder("SELECT * FROM my_dataset_id.my_table_id").build();
  // Read rows
  System.out.println("Table rows:");
  for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
    System.out.println(row);
  }
}