com.google.cloud.bigquery.TableId Java Examples

The following examples show how to use com.google.cloud.bigquery.TableId. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryOutput.java    From flo with Apache License 2.0 6 votes vote down vote up
@Override
public StagingTableId provide(EvalContext evalContext) {
  final String location = getDatasetOrThrow().getLocation();

  final TableId stagingTableId = bigQuery().createStagingTableId(tableId, location);
  final DatasetId stagingDatasetId = DatasetId.of(stagingTableId.getProject(), stagingTableId.getDataset());

  if (bigQuery().getDataset(stagingDatasetId) == null) {
    bigQuery().create(DatasetInfo
        .newBuilder(stagingDatasetId)
        .setLocation(location)
        .setDefaultTableLifetime(Duration.ofDays(1).toMillis())
        .build());
    LOG.info("created staging dataset: {}", stagingDatasetId);
  }

  return StagingTableId.of(this, stagingTableId);
}
 
Example #2
Source File: BigQueryMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private Boolean addNewTableField(TableId tableId, TableRow row, String rowKey,
    List<Field> newFieldList, Map<String, LegacySQLTypeName> inputSchema) {
  // Call Get Schema and Extract New Field Type
  Field newField;

  if (inputSchema.containsKey(rowKey)) {
    newField = Field.of(rowKey, inputSchema.get(rowKey));
  } else {
    newField = Field.of(rowKey, LegacySQLTypeName.STRING);
  }

  newFieldList.add(newField);

  // Currently we always add new fields for each call
  // TODO: add an option to ignore new field and why boolean?
  return true;
}
 
Example #3
Source File: BigQueryOperatorTest.java    From flo with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldRunQueryJobInTestMode() throws Exception {
  final TableId table = TableId.of("foo", "bar", "baz");

  final Task<TableId> task = Task.named("task")
      .ofType(TableId.class)
      .output(BigQueryOutput.create(table))
      .operator(BigQueryOperator.create())
      .process((stagingTable, bq) -> bq.job(
          JobInfo.of(QueryJobConfiguration.newBuilder("SELECT foo FROM input")
              .setDestinationTable(stagingTable.tableId())
              .build()))
          .success(response -> stagingTable.publish()));

  try (TestScope scope = FloTesting.scope()) {

    final TableId result = FloRunner.runTask(task).future()
        .get(30, SECONDS);

    assertThat(result, is(table));
    assertThat(BigQueryMocking.mock().tablePublished(table), is(true));
    assertThat(BigQueryMocking.mock().tableExists(table), is(true));
  }
}
 
Example #4
Source File: BigQueryOperatorTest.java    From flo with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldRunCopyJobInTestMode() throws Exception {
  final TableId srcTable = TableId.of("foo", "bar", "src");
  final TableId dstTable = TableId.of("foo", "bar", "dst");

  final Task<TableId> task = Task.named("task")
      .ofType(TableId.class)
      .operator(BigQueryOperator.create())
      .process(bq -> bq.job(
          JobInfo.of(CopyJobConfiguration.of(dstTable, srcTable)))
          .success(response -> dstTable));

  try (TestScope scope = FloTesting.scope()) {

    final TableId result = FloRunner.runTask(task).future()
        .get(30, SECONDS);

    assertThat(result, is(dstTable));
  }
}
 
Example #5
Source File: ReadSessionCreator.java    From presto with Apache License 2.0 6 votes vote down vote up
TableInfo createTableFromQuery()
{
    TableId destinationTable = bigQueryClient.createDestinationTable(table);
    log.debug("destinationTable is %s", destinationTable);
    JobInfo jobInfo = JobInfo.of(
            QueryJobConfiguration
                    .newBuilder(query)
                    .setDestinationTable(destinationTable)
                    .build());
    log.debug("running query %s", jobInfo);
    Job job = waitForJob(bigQueryClient.create(jobInfo));
    log.debug("job has finished. %s", job);
    if (job.getStatus().getError() != null) {
        throw convertToBigQueryException(job.getStatus().getError());
    }
    // add expiration time to the table
    TableInfo createdTable = bigQueryClient.getTable(destinationTable);
    long expirationTime = createdTable.getCreationTime() +
            TimeUnit.HOURS.toMillis(config.viewExpirationTimeInHours);
    Table updatedTable = bigQueryClient.update(createdTable.toBuilder()
            .setExpirationTime(expirationTime)
            .build());
    return updatedTable;
}
 
Example #6
Source File: TableSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example copying the table to a destination table. */
// [TARGET copy(TableId, JobOption...)]
// [VARIABLE "my_dataset"]
// [VARIABLE "my_destination_table"]
public Job copyTableId(String dataset, String tableName) throws BigQueryException {
  // [START bigquery_copy_table]
  TableId destinationId = TableId.of(dataset, tableName);
  JobOption options = JobOption.fields(JobField.STATUS, JobField.USER_EMAIL);
  Job job = table.copy(destinationId, options);
  // Wait for the job to complete.
  try {
    Job completedJob =
        job.waitFor(
            RetryOption.initialRetryDelay(Duration.ofSeconds(1)),
            RetryOption.totalTimeout(Duration.ofMinutes(3)));
    if (completedJob != null && completedJob.getStatus().getError() == null) {
      // Job completed successfully.
    } else {
      // Handle error case.
    }
  } catch (InterruptedException e) {
    // Handle interrupted wait
  }
  // [END bigquery_copy_table]
  return job;
}
 
Example #7
Source File: BigQueryTasksTest.java    From flo with Apache License 2.0 6 votes vote down vote up
@Test
public void lookupLatestDailyShouldThrowNotReadyForNonExistentOrTooOldTable() throws Exception {
  final Task<TableId> lookup = BigQueryTasks.lookupLatestDaily(() -> {
        FloBigQueryClient bq = mock(FloBigQueryClient.class);
        when(bq.tableExists(any())).thenReturn(false);
        return bq;
      },
      "foo",
      "bar",
      "baz",
      Date.parse("2018-01-01"),
      7);
  exception.expectCause(instanceOf(NotReady.class));
  FloRunner.runTask(lookup)
      .future().get(30, TimeUnit.SECONDS);
}
 
Example #8
Source File: MergeInfoMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<MergeInfo> expand(PCollection<KV<TableId, TableRow>> input) {
  return input.apply(
      MapElements.into(TypeDescriptor.of(MergeInfo.class))
          .via(
              element -> {
                return MergeInfo.create(
                    METADATA_TIMESTAMP, // TODO should be list pulled from Datastream API
                    METADATA_DELETED,
                    String.format("%s.%s",
                        // Staging Table // TODO these should possibly be passed separately
                        BigQueryConverters
                            .formatStringTemplate(stagingDataset, element.getValue()),
                        BigQueryConverters
                            .formatStringTemplate(stagingTable, element.getValue())),
                    String.format("%s.%s", // Replica Table
                        BigQueryConverters
                            .formatStringTemplate(replicaDataset, element.getValue()),
                        BigQueryConverters
                            .formatStringTemplate(replicaTable, element.getValue())),
                    ImmutableList.copyOf(element.getValue().keySet()),
                    ImmutableList.of("ID"));
              }));
}
 
Example #9
Source File: CreateTableAndLoadData.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
public static void main(String... args) throws InterruptedException, TimeoutException {
  BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
  TableId tableId = TableId.of("dataset", "table");
  Table table = bigquery.getTable(tableId);
  if (table == null) {
    System.out.println("Creating table " + tableId);
    Field integerField = Field.of("fieldName", LegacySQLTypeName.INTEGER);
    Schema schema = Schema.of(integerField);
    table = bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
  }
  System.out.println("Loading data into table " + tableId);
  Job loadJob = table.load(FormatOptions.csv(), "gs://bucket/path");
  loadJob = loadJob.waitFor();
  if (loadJob.getStatus().getError() != null) {
    System.out.println("Job completed with errors");
  } else {
    System.out.println("Job succeeded");
  }
}
 
Example #10
Source File: BigQueryMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<OutputT> expand(PCollection<InputT> tableKVPCollection) {
  return tableKVPCollection.apply(
      "TableRowExtractDestination",
      MapElements.via(
          new SimpleFunction<InputT, OutputT>() {
            @Override
            public OutputT apply(InputT input) {
              /*
                  We run validation against every event to ensure all columns
                  exist in source.
                  If a column is in the event and not in BigQuery,
                  the column is added to the table before the event can continue.
              */
              setUp();
              TableId tableId = getTableId(input);
              TableRow row = getTableRow(input);
              Map<String, LegacySQLTypeName> inputSchema = getObjectSchema(input);
              int retries = getMapperRetries();

              applyMapperToTableRow(tableId, row, inputSchema, retries);
              return getOutputObject(input);
            }
          }));
}
 
Example #11
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
private void canWriteWithMixedMethod(String streamingDocTypes) throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output, "--bqWriteMethod=mixed",
      "--bqStreamingDocTypes=" + streamingDocTypes, "--errorOutputType=file",
      "--tempLocation=gs://gcp-ingestion-static-test-bucket/temp/bq-loads",
      "--schemasLocation=schemas.tar.gz", "--errorOutputFileCompression=UNCOMPRESSED",
      "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #12
Source File: BigQueryMappers.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Override
public KV<TableId, TableRow> getOutputObject(KV<TableId, TableRow> input) {
  TableId tableId = getTableId(input);
  TableRow tableRow = getTableRow(input);
  TableRow cleanedTableRow = getCleanedTableRow(tableId, tableRow);

  return KV.of(tableId, cleanedTableRow);
}
 
Example #13
Source File: BigQueryTasksTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void lookupShouldBeSerializable() throws SerializationException {
  final ByteArrayOutputStream baos = new ByteArrayOutputStream();
  final Task<TableId> task = BigQueryTasks.lookup("foo", "bar", "baz");
  Serialization.serialize(task, baos);
  final ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
  final Task<TableId> deserializedTask = Serialization.deserialize(bais);
  assertThat(deserializedTask, is(notNullValue()));
}
 
Example #14
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteToDynamicTables() throws Exception {
  String table = "my_test_table";
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery.create(TableInfo.newBuilder(tableId,
      StandardTableDefinition.of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
          Field.of("type", LegacySQLTypeName.STRING))))
      .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s.%s", projectId, dataset, "${document_type}_table");
  String errorOutput = outputPath + "/error/out";

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--output=" + output,
      "--bqWriteMethod=streaming", "--errorOutputType=file", "--schemasLocation=schemas.tar.gz",
      "--errorOutputFileCompression=UNCOMPRESSED", "--errorOutput=" + errorOutput });

  result.waitUntilFinish();

  String tableSpec = String.format("%s.%s", dataset, table);
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123")));

  List<String> errorOutputLines = Lines.files(outputPath + "/error/out*.ndjson");
  assertThat(errorOutputLines, Matchers.hasSize(2));
}
 
Example #15
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canWriteToBigQuery() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("type", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-with-attributes.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--bqWriteMethod=streaming",
      "--schemasLocation=schemas.tar.gz", "--output=" + output, "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertThat(stringValuesQueryWithRetries("SELECT submission_timestamp FROM " + tableSpec),
      matchesInAnyOrder(Lists.newArrayList(null, null, "1561983194.123456")));
  assertThat(stringValuesQueryWithRetries("SELECT client_id FROM " + tableSpec),
      matchesInAnyOrder(ImmutableList.of("abc123", "abc123", "def456")));
}
 
Example #16
Source File: BigQueryIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void canSetStrictSchemaMode() throws Exception {
  String table = "my_test_table";
  String tableSpec = String.format("%s.%s", dataset, table);
  TableId tableId = TableId.of(dataset, table);

  bigquery.create(DatasetInfo.newBuilder(dataset).build());
  bigquery
      .create(TableInfo
          .newBuilder(tableId,
              StandardTableDefinition
                  .of(Schema.of(Field.of("client_id", LegacySQLTypeName.STRING),
                      Field.of("additional_properties", LegacySQLTypeName.STRING),
                      Field.of("submission_timestamp", LegacySQLTypeName.TIMESTAMP)))
                  .toBuilder().setTimePartitioning(TIME_PARTITIONING).setClustering(CLUSTERING)
                  .build())
          .build());

  String input = Resources
      .getResource("testdata/bigquery-integration/input-varied-doctypes.ndjson").getPath();
  String output = String.format("%s:%s", projectId, tableSpec);

  PipelineResult result = Sink.run(new String[] { "--inputFileFormat=json", "--inputType=file",
      "--input=" + input, "--outputType=bigquery", "--bqWriteMethod=streaming",
      "--bqStrictSchemaDocTypes=my-namespace/my-test", "--output=" + output,
      "--errorOutputType=stderr" });

  result.waitUntilFinish();

  assertThat(stringValuesQueryWithRetries("SELECT additional_properties FROM " + tableSpec),
      matchesInAnyOrder(Lists.newArrayList("{\"type\":\"main\"}", null, "{\"type\":\"main\"}")));
}
 
Example #17
Source File: BigQueryClient.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Inserts multiple rows of the same schema to a BigQuery table. */
public void insertAll(Collection<Map<String, ?>> rows, String table) {
  TableId tableId = TableId.of(projectId, dataset, table);

  InsertAllRequest.Builder builder = InsertAllRequest.newBuilder(tableId);

  for (Map<String, ?> row : rows) {
    builder.addRow(row);
  }

  InsertAllResponse response = client.insertAll(builder.build());
  handleBigQueryResponseExceptions(response);
}
 
Example #18
Source File: BigQuery.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
private Batch(TableId tableId) {
  super();
  this.tableId = tableId;
  builder = InsertAllRequest.newBuilder(tableId)
      // ignore row values for columns not present in the table
      .setIgnoreUnknownValues(true)
      // insert all valid rows when invalid rows are present in the request
      .setSkipInvalidRows(true);
}
 
Example #19
Source File: BqSinkTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    tableId = TableId.of("test-dataset", "test-table");
    builder = InsertAllRequest.newBuilder(tableId);
    bqRow = new BQRowWithInsertId();
    BQErrorHandler errorHandlerInstance = new OOBErrorHandler(new DefaultLogWriter());
    sink = new BqSink(bigquery, tableId, new BQResponseParser(), errorHandlerInstance, bqRow);
    when(successfulResponse.hasErrors()).thenReturn(false);
    when(bigquery.insertAll(any())).thenReturn(successfulResponse);
    when(failureResponse.hasErrors()).thenReturn(true);
    insertErrors = new HashMap<>();
    List<BigQueryError> columnError = Arrays.asList(new BigQueryError("failed since type mismatched", "column location", "message"));
    insertErrors.put(0L, columnError);
    when(failureResponse.getInsertErrors()).thenReturn(insertErrors);
}
 
Example #20
Source File: ITDatasetSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateTable() {
  String expectedTableName = "test_table";
  String expectedFieldName = "test_field";

  Table actualTable = datasetSnippets.createTable(expectedTableName, expectedFieldName);
  assertNotNull(actualTable);
  assertEquals(expectedTableName, actualTable.getTableId().getTable());
  assertEquals(1, actualTable.getDefinition().getSchema().getFields().size());

  Field actualField = actualTable.getDefinition().getSchema().getFields().get(0);
  assertEquals(expectedFieldName, actualField.getName());

  bigquery.delete(TableId.of(DATASET, expectedTableName));
}
 
Example #21
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
public TableId getTableId(TableRow input) {
    String datasetName = BigQueryConverters.formatStringTemplate(datasetTemplate.get(), input);
    String tableName = BigQueryConverters.formatStringTemplate(tableTemplate.get(), input);

    if (projectId == null) {
        return TableId.of(datasetName, tableName);
    }
    else {
        return TableId.of(projectId.get(), datasetName, tableName);
    }
}
 
Example #22
Source File: PubsubMessageToTableRow.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Given a KV containing a destination and a message, return the message content as a {@link
 * TableRow} ready to pass to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}.
 */
public TableRow kvToTableRow(KV<TableDestination, PubsubMessage> kv) {
  if (format == null) {
    format = createFormat();
  }
  final TableReference ref = kv.getKey().getTableReference();
  final TableId tableId = TableId.of(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
  final PubsubMessage message = kv.getValue();
  return Json.asTableRow(format.apply(tableId, message.getAttributeMap(), message.getPayload()));
}
 
Example #23
Source File: BigQueryMockingTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldCreateCustomStagingTableId()
    throws InterruptedException, ExecutionException, TimeoutException {
  final TableId expectedFinal = TableId.of("foo", "bar", "tab");
  final TableId expectedStaging = TableId.of("test_foo", "test_bar", "test_tab");


  final Task<TableId> task = Task.named("task")
      .ofType(TableId.class)
      .output(BigQueryOutput.create(expectedFinal))
      .process(actual -> {
        assertThat(actual.tableId(), is(expectedStaging));
        return actual.publish();
      });

  try(final TestScope scope = FloTesting.scope()){
    BigQueryMocking.mock().stagingTableId(expectedFinal, expectedStaging);

    assertThat(BigQueryMocking.mock().tableExists(expectedFinal), is(false));
    assertThat(BigQueryMocking.mock().tablePublished(expectedFinal), is(false));
    assertThat(BigQueryMocking.mock().tableExists(expectedStaging), is(true));
    assertThat(BigQueryMocking.mock().tablePublished(expectedStaging), is(false));

    final TableId resultTableId = FloRunner.runTask(task).future().get(30, TimeUnit.SECONDS);

    assertThat(resultTableId, is(expectedFinal));
    assertThat(BigQueryMocking.mock().tableExists(expectedFinal), is(true));
    assertThat(BigQueryMocking.mock().tablePublished(expectedFinal), is(true));

    assertThat(BigQueryMocking.mock().tableExists(expectedStaging), is(false));
    assertThat(BigQueryMocking.mock().tablePublished(expectedStaging), is(false));
  }
}
 
Example #24
Source File: BigQueryOutputTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldBeRunnable() throws Exception {
  final String nonExistentProject = UUID.randomUUID().toString();
  final Task<TableId> task = Task.named("task").ofType(TableId.class)
      .output(BigQueryOutput.create(nonExistentProject, "foo", "bar"))
      .process(StagingTableId::tableId);

  final Future<TableId> future = FloRunner.runTask(task).future();
  try {
    future.get(30, TimeUnit.SECONDS);
  } catch (ExecutionException e) {
    final Throwable rootCause = Throwables.getRootCause(e);
    if (rootCause instanceof GoogleJsonResponseException) {
      // Seems we managed to make a request, so the lookup context was successfully invoked. We're done here.
    } else if (rootCause instanceof IllegalArgumentException
        && rootCause.getMessage().startsWith("A project ID is required")) {
      // Seems we got as far as to instantiate the BigQuery client. We're done here.
    } else if (rootCause instanceof IllegalArgumentException &&
        rootCause.getMessage().startsWith("Dataset does not exist.")) {
      // Seems we managed to make a request, so the lookup context was successfully invoked. We're done here.
    } else if (rootCause instanceof BigQueryException &&
        rootCause.getMessage().equals("The project " + nonExistentProject + " has not enabled BigQuery.")) {
      // Seems we managed to make a request, so the lookup context was successfully invoked. We're done here.
    } else {
      // Not sure what error we got here, might be a serialization problem. Be conservative and fail.
      throw new AssertionError("Unknown error, might be serialization problem that needs fixing?", e);
    }
  }
}
 
Example #25
Source File: BigQueryTasks.java    From flo with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Task<TableId> lookup(F0<FloBigQueryClient> bigQuerySupplier, TableId tableId) {
  return Task.named("bigquery.lookup", tableId.getProject(), tableId.getDataset(), tableId.getTable())
      .ofType(TableId.class)
      .operator(BigQueryLookupOperator.of(bigQuerySupplier))
      .process(bq -> bq.lookup(tableId));
}
 
Example #26
Source File: BigQueryMockingTest.java    From flo with Apache License 2.0 5 votes vote down vote up
@Test
public void lookupShouldReturnMockedTable() throws Exception {
  final Task<TableId> lookup = BigQueryTasks.lookup("foo", "bar", "tab");
  try (TestScope scope = FloTesting.scope()) {
    assertThat(BigQueryMocking.mock().tablePublished("foo", "bar", "tab"), is(false));
    assertThat(BigQueryMocking.mock().tableExists("foo", "bar", "tab"), is(false));
    BigQueryMocking.mock().table("foo", "bar", "tab");
    assertThat(BigQueryMocking.mock().tablePublished("foo", "bar", "tab"), is(false));
    assertThat(BigQueryMocking.mock().tableExists("foo", "bar", "tab"), is(true));
    final TableId tableId = FloRunner.runTask(lookup).future().get(30, TimeUnit.SECONDS);
    assertThat(tableId, is(TableId.of("foo", "bar", "tab")));
    assertThat(BigQueryMocking.mock().tablePublished("foo", "bar", "tab"), is(false));
    assertThat(BigQueryMocking.mock().tableExists("foo", "bar", "tab"), is(true));
  }
}
 
Example #27
Source File: BigQueryLookup.java    From flo with Apache License 2.0 5 votes vote down vote up
@Override
public Optional<TableId> lookup(FloBigQueryClient bq) {
  for (int i = 0; i <= lookBackDays; i++) {
    Date date = Date.of(start.localDate().minusDays(i));
    String table = tableName + "_" + BigQueryTasks.formatTableDate(date);
    TableId tableId = TableId.of(project, dataset, table);

    if (bq.tableExists(tableId)) {
      return Optional.of(tableId);
    }
  }

  return Optional.empty();
}
 
Example #28
Source File: BqIntegrationTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testBQErrorsAreStoredInGCS() throws InvalidProtocolBufferException {
    Instant validNow = Instant.now();
    TestMessage validMessage1 = getTestMessage("VALID-11-testValidMessageInBQ", validNow);
    TestMessage validMessage2 = getTestMessage("VALID-12-testValidMessageInBQ", validNow);

    Instant inValidLater = Instant.now().plus(Duration.ofDays(185));
    Instant inValidBefore = Instant.now().minus(Duration.ofDays(365));
    TestMessage inValidMessage1 = getTestMessage("INVALID-21-testBQErrorsAreStoredInGCS", inValidLater);
    TestMessage inValidMessage2 = getTestMessage("INVALID-21-testBQErrorsAreStoredInGCS", inValidLater);

    ColumnMapping columnMapping = new ColumnMapping();
    columnMapping.put("1", "order_number");
    columnMapping.put("2", "order_url");
    columnMapping.put("3", "order_details");
    columnMapping.put("4", "created_at");

    List<Record> validRecords = getKafkaConsumerRecords(columnMapping, validNow, "testBQErrorsAreStoredInGCS-valid", 1,
            1L, clock, validMessage1, validMessage2);
    List<Record> inValidRecords = getKafkaConsumerRecords(columnMapping, validNow, "testBQErrorsAreStoredInGCS-valid", 2,
            10L, clock, inValidMessage1, inValidMessage2);

    List<Record> allRecords = new ArrayList<>();
    allRecords.addAll(validRecords);
    allRecords.addAll(inValidRecords);

    final Storage gcsStore = authenticatedGCStorageInstance();

    //Insert into BQ
    TableId tableId = TableId.of("playground", "test_nested_messages");
    BQErrorHandler errorHandler = new OOBErrorHandler(new GCSErrorWriter(gcsStore, gcsBucket, "test-integ-beast"));
    BqSink bqSink = new BqSink(authenticatedBQ(), tableId, new BQResponseParser(), errorHandler, bqRow);
    Status push = bqSink.push(new Records(allRecords));
    assertTrue("Invalid Message should have been inserted into GCS Sink and success status should be true", push.isSuccess());
}
 
Example #29
Source File: BigQuerySchemaStore.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public Schema getSchema(TableId tableId, Map<String, String> attributes) {
  if (tableId == null) {
    // Always throws SchemaNotFoundException
    return getSchema(attributes);
  }
  if (tableSchemaCache == null) {
    // We need to be very careful about settings for the cache here. We have had significant
    // issues in the past due to exceeding limits on BigQuery API requests; see
    // https://bugzilla.mozilla.org/show_bug.cgi?id=1623000
    tableSchemaCache = CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10))
        .build();
  }
  if (bqService == null) {
    bqService = BigQueryOptions.newBuilder().setProjectId(tableId.getProject())
        .setRetrySettings(RETRY_SETTINGS).build().getService();
  }
  try {
    return Optional.of(tableSchemaCache.get(tableId, () -> {
      Table table = bqService.getTable(tableId);
      if (table != null) {
        return table.getDefinition().getSchema();
      } else {
        return null;
      }
    })).orElseThrow(() -> SchemaNotFoundException.forName(tableId.toString()));
  } catch (ExecutionException e) {
    throw new UncheckedExecutionException(e.getCause());
  }
}
 
Example #30
Source File: BqIntegrationTest.java    From beast with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void shouldPushTestNestedRepeatedMessages() throws InvalidProtocolBufferException {
    Instant now = Instant.now();
    long second = now.getEpochSecond();
    ProtoParser protoParser = new ProtoParser(StencilClientFactory.getClient(), TestNestedRepeatedMessage.class.getName());
    TestNestedRepeatedMessage protoMessage = TestNestedRepeatedMessage.newBuilder()
            .addRepeatedMessage(ProtoUtil.generateTestMessage(now))
            .addRepeatedMessage(ProtoUtil.generateTestMessage(now))
            .build();

    TableId tableId = TableId.of("bqsinktest", "nested_messages");
    BqSink bqSink = new BqSink(authenticatedBQ(), tableId, new BQResponseParser(), gcsSinkHandler, bqRow);

    ColumnMapping columnMapping = new ColumnMapping();
    ColumnMapping nested = new ColumnMapping();
    nested.put("record_name", "messsages");
    nested.put("1", "order_number");
    nested.put("2", "order_url");
    columnMapping.put("2", nested);
    ConsumerRecordConverter customConverter = new ConsumerRecordConverter(new RowMapper(columnMapping), protoParser, clock);


    ConsumerRecord<byte[], byte[]> consumerRecord = new ConsumerRecord<>("topic", 1, 1, second, TimestampType.CREATE_TIME,
            0, 0, 1, null, protoMessage.toByteArray());

    List<Record> records = customConverter.convert(Collections.singleton(consumerRecord));
    Status push = bqSink.push(new Records(records));
    assertTrue(push.isSuccess());
}