com.google.api.services.bigquery.model.TableReference Java Examples

The following examples show how to use com.google.api.services.bigquery.model.TableReference. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryToTableIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setupTestEnvironment() throws Exception {
  PipelineOptionsFactory.register(BigQueryToTableOptions.class);
  project = TestPipeline.testingPipelineOptions().as(GcpOptions.class).getProject();
  // Create one BQ dataset for all test cases.
  BQ_CLIENT.createNewDataset(project, BIG_QUERY_DATASET_ID);

  // Create table and insert data for new type query test cases.
  BQ_CLIENT.createNewTable(
      project,
      BIG_QUERY_DATASET_ID,
      new Table()
          .setSchema(BigQueryToTableIT.NEW_TYPES_QUERY_TABLE_SCHEMA)
          .setTableReference(
              new TableReference()
                  .setTableId(BigQueryToTableIT.NEW_TYPES_QUERY_TABLE_NAME)
                  .setDatasetId(BIG_QUERY_DATASET_ID)
                  .setProjectId(project)));
  BQ_CLIENT.insertDataToTable(
      project,
      BIG_QUERY_DATASET_ID,
      BigQueryToTableIT.NEW_TYPES_QUERY_TABLE_NAME,
      BigQueryToTableIT.NEW_TYPES_QUERY_TABLE_DATA);
}
 
Example #2
Source File: BigQueryStorageTableSource.java    From beam with Apache License 2.0 6 votes vote down vote up
public static <T> BigQueryStorageTableSource<T> create(
    ValueProvider<TableReference> tableRefProvider,
    @Nullable TableReadOptions readOptions,
    @Nullable ValueProvider<List<String>> selectedFields,
    @Nullable ValueProvider<String> rowRestriction,
    SerializableFunction<SchemaAndRecord, T> parseFn,
    Coder<T> outputCoder,
    BigQueryServices bqServices) {
  return new BigQueryStorageTableSource<>(
      tableRefProvider,
      readOptions,
      selectedFields,
      rowRestriction,
      parseFn,
      outputCoder,
      bqServices);
}
 
Example #3
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
private Table table(String defaultProjectId, Optional<DatasetReference> defaultDataset, TableConfig config)
{
    Optional<String> datasetId = config.dataset().or(defaultDataset.transform(DatasetReference::getDatasetId));
    if (!datasetId.isPresent()) {
        throw new ConfigException("Bad table reference or configuration: Missing 'dataset'");
    }
    return new Table()
            .setTableReference(new TableReference()
                    .setProjectId(config.project().or(defaultProjectId))
                    .setDatasetId(datasetId.get())
                    .setTableId(config.id()))
            .setSchema(config.schema().orNull())
            .setFriendlyName(config.friendly_name().orNull())
            .setExpirationTime(config.expiration_time()
                    .transform(p -> p.getTimestamp().toInstant(request.getTimeZone()).toEpochMilli()).orNull())
            .setTimePartitioning(config.time_partitioning().orNull())
            .setView(config.view().orNull());
}
 
Example #4
Source File: BigQueryHelpersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateTempTableReference() {
  String projectId = "this-is-my-project";
  String jobUuid = "this-is-my-job";
  TableReference noDataset =
      BigQueryHelpers.createTempTableReference(projectId, jobUuid, Optional.empty());

  assertEquals(noDataset.getProjectId(), projectId);
  assertEquals(noDataset.getDatasetId(), "temp_dataset_" + jobUuid);
  assertEquals(noDataset.getTableId(), "temp_table_" + jobUuid);

  Optional<String> dataset = Optional.ofNullable("my-tmp-dataset");
  TableReference tempTableReference =
      BigQueryHelpers.createTempTableReference(projectId, jobUuid, dataset);

  assertEquals(tempTableReference.getProjectId(), noDataset.getProjectId());
  assertEquals(tempTableReference.getDatasetId(), dataset.get());
  assertEquals(tempTableReference.getTableId(), noDataset.getTableId());

  assertEquals(dataset.get(), noDataset.setDatasetId(dataset.get()).getDatasetId());
}
 
Example #5
Source File: UpdateSnapshotViewAction.java    From nomulus with Apache License 2.0 6 votes vote down vote up
private static void updateTable(Bigquery bigquery, Table table) throws IOException {
  TableReference ref = table.getTableReference();
  try {
    bigquery
        .tables()
        .update(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(), table)
        .execute();
  } catch (GoogleJsonResponseException e) {
    if (e.getDetails() != null && e.getDetails().getCode() == 404) {
      bigquery.tables().insert(ref.getProjectId(), ref.getDatasetId(), table).execute();
    } else {
      logger.atWarning().withCause(e).log(
          "UpdateSnapshotViewAction failed, caught exception %s", e.getDetails());
    }
  }
}
 
Example #6
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateTableSucceeds() throws IOException {
  TableReference ref =
      new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
  Table testTable = new Table().setTableReference(ref);
  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(200);
  when(response.getContent()).thenReturn(toStream(testTable));

  BigQueryServicesImpl.DatasetServiceImpl services =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
  Table ret =
      services.tryCreateTable(
          testTable, new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF), Sleeper.DEFAULT);
  assertEquals(testTable, ret);
  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
}
 
Example #7
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testIsTableEmptyThrows() throws Exception {
  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(401);

  TableReference tableRef =
      new TableReference()
          .setProjectId("projectId")
          .setDatasetId("datasetId")
          .setTableId("tableId");

  BigQueryServicesImpl.DatasetServiceImpl datasetService =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());

  thrown.expect(IOException.class);
  thrown.expectMessage(String.format("Unable to list table data: %s", tableRef.getTableId()));

  datasetService.isTableEmpty(tableRef, BackOff.STOP_BACKOFF, Sleeper.DEFAULT);
}
 
Example #8
Source File: BigQueryStrings.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
/**
 * Parses a string into a TableReference; projectId may be omitted if the caller defines a
 * "default" project; in such a case, getProjectId() of the returned TableReference will
 * return null.
 *
 * @param tableRefString A string of the form [projectId]:[datasetId].[tableId].
 * @return a TableReference with the parsed components.
 */
public static TableReference parseTableReference(String tableRefString) {
  // Logic mirrored from cloud/helix/clients/cli/bigquery_client.py.
  TableReference tableRef = new TableReference();
  int projectIdEnd = tableRefString.lastIndexOf(':');
  String datasetAndTableString = tableRefString;
  if (projectIdEnd != -1) {
    tableRef.setProjectId(tableRefString.substring(0, projectIdEnd));

    // Omit the ':' from the remaining datasetId.tableId substring.
    datasetAndTableString = tableRefString.substring(projectIdEnd + 1);
  }

  Preconditions.checkArgument(datasetAndTableString.matches(DATASET_AND_TABLE_REGEX),
      "Invalid datasetAndTableString '%s'; must match regex '%s'.",
      datasetAndTableString, DATASET_AND_TABLE_REGEX);

  List<String> idParts = DOT_SPLITTER.splitToList(datasetAndTableString);
  tableRef.setDatasetId(idParts.get(0));
  tableRef.setTableId(idParts.get(1));
  return tableRef;
}
 
Example #9
Source File: BigQueryHelpers.java    From beam with Apache License 2.0 6 votes vote down vote up
static void verifyTableNotExistOrEmpty(DatasetService datasetService, TableReference tableRef) {
  try {
    if (datasetService.getTable(tableRef) != null) {
      checkState(
          datasetService.isTableEmpty(tableRef),
          "BigQuery table is not empty: %s.",
          toTableSpec(tableRef));
    }
  } catch (IOException | InterruptedException e) {
    if (e instanceof InterruptedException) {
      Thread.currentThread().interrupt();
    }
    throw new RuntimeException(
        "unable to confirm BigQuery table emptiness for table " + toTableSpec(tableRef), e);
  }
}
 
Example #10
Source File: BigQueryHelpers.java    From beam with Apache License 2.0 6 votes vote down vote up
static void verifyDatasetPresence(DatasetService datasetService, TableReference table) {
  try {
    datasetService.getDataset(table.getProjectId(), table.getDatasetId());
  } catch (Exception e) {
    ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
    if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
      throw new IllegalArgumentException(
          String.format(RESOURCE_NOT_FOUND_ERROR, "dataset", toTableSpec(table)), e);
    } else if (e instanceof RuntimeException) {
      throw (RuntimeException) e;
    } else {
      throw new RuntimeException(
          String.format(
              UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "dataset", toTableSpec(table)),
          e);
    }
  }
}
 
Example #11
Source File: BigQueryHelpers.java    From beam with Apache License 2.0 6 votes vote down vote up
static void verifyTablePresence(DatasetService datasetService, TableReference table) {
  try {
    datasetService.getTable(table);
  } catch (Exception e) {
    ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
    if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
      throw new IllegalArgumentException(
          String.format(RESOURCE_NOT_FOUND_ERROR, "table", toTableSpec(table)), e);
    } else if (e instanceof RuntimeException) {
      throw (RuntimeException) e;
    } else {
      throw new RuntimeException(
          String.format(
              UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "table", toTableSpec(table)),
          e);
    }
  }
}
 
Example #12
Source File: FakeJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
private JobStatus runExtractJob(Job job, JobConfigurationExtract extract)
    throws InterruptedException, IOException {
  TableReference sourceTable = extract.getSourceTable();

  List<TableRow> rows =
      datasetService.getAllRows(
          sourceTable.getProjectId(), sourceTable.getDatasetId(), sourceTable.getTableId());
  TableSchema schema = datasetService.getTable(sourceTable).getSchema();
  List<Long> destinationFileCounts = Lists.newArrayList();
  for (String destination : extract.getDestinationUris()) {
    destinationFileCounts.add(writeRows(sourceTable.getTableId(), rows, schema, destination));
  }
  job.setStatistics(
      new JobStatistics()
          .setExtract(new JobStatistics4().setDestinationUriFileCounts(destinationFileCounts)));
  return new JobStatus().setState("DONE");
}
 
Example #13
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/**
 * Updates the specified Bigquery table to reflect the metadata from the input.
 *
 * <p>Returns the input DestinationTable. If the specified table does not already exist, it will
 * be inserted into the dataset.
 *
 * <p>Clients can call this function directly to update a table on demand, or can pass it to
 * Futures.transform() to update a table produced as the asynchronous result of a load or query
 * job (e.g. to add a description to it).
 */
private DestinationTable updateTable(final DestinationTable destinationTable) {
  Table table = destinationTable.getTable();
  TableReference ref = table.getTableReference();
  try {
    if (checkTableExists(ref.getDatasetId(), ref.getTableId())) {
      // Make sure to use patch() rather than update(). The former changes only those properties
      // which are specified, while the latter would change everything, blanking out unspecified
      // properties.
      bigquery
          .tables()
          .patch(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(), table)
          .execute();
    } else {
      bigquery.tables().insert(ref.getProjectId(), ref.getDatasetId(), table).execute();
    }
    return destinationTable;
  } catch (IOException e) {
    throw BigqueryJobFailureException.create(e);
  }
}
 
Example #14
Source File: BigQueryIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void validate(PipelineOptions pipelineOptions) {
  BigQueryOptions options = pipelineOptions.as(BigQueryOptions.class);

  // The user specified a table.
  if (getJsonTableRef() != null && getJsonTableRef().isAccessible() && getValidate()) {
    TableReference table = getTableWithDefaultProject(options).get();
    DatasetService datasetService = getBigQueryServices().getDatasetService(options);
    // Check for destination table presence and emptiness for early failure notification.
    // Note that a presence check can fail when the table or dataset is created by an earlier
    // stage of the pipeline. For these cases the #withoutValidation method can be used to
    // disable the check.
    BigQueryHelpers.verifyDatasetPresence(datasetService, table);
    if (getCreateDisposition() == BigQueryIO.Write.CreateDisposition.CREATE_NEVER) {
      BigQueryHelpers.verifyTablePresence(datasetService, table);
    }
    if (getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
      BigQueryHelpers.verifyTableNotExistOrEmpty(datasetService, table);
    }
  }
}
 
Example #15
Source File: BigQueryIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the table to write, or {@code null} if writing with {@code tableFunction}.
 *
 * <p>If the table's project is not specified, use the executing project.
 */
@Nullable
ValueProvider<TableReference> getTableWithDefaultProject(BigQueryOptions bqOptions) {
  ValueProvider<TableReference> table = getTable();
  if (table == null) {
    return table;
  }

  if (!table.isAccessible()) {
    LOG.info(
        "Using a dynamic value for table input. This must contain a project"
            + " in the table reference: {}",
        table);
    return table;
  }
  if (Strings.isNullOrEmpty(table.get().getProjectId())) {
    // If user does not specify a project we assume the table to be located in
    // the default project.
    TableReference tableRef = table.get();
    tableRef.setProjectId(bqOptions.getProject());
    return NestedValueProvider.of(
        StaticValueProvider.of(BigQueryHelpers.toJsonString(tableRef)),
        new JsonTableRefToTableRef());
  }
  return table;
}
 
Example #16
Source File: FederatedBigQueryOutputCommitter.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
/**
 * Runs a federated import job on BigQuery for the data in the output path in addition to calling
 * the delegate's commitJob.
 */
@Override
public void commitJob(JobContext context) throws IOException {
  super.commitJob(context);

  // Get the destination configuration information.
  Configuration conf = context.getConfiguration();
  TableReference destTable = BigQueryOutputConfiguration.getTableReference(conf);
  String jobProjectId = BigQueryOutputConfiguration.getJobProjectId(conf);
  Optional<BigQueryTableSchema> destSchema = BigQueryOutputConfiguration.getTableSchema(conf);
  BigQueryFileFormat outputFileFormat = BigQueryOutputConfiguration.getFileFormat(conf);
  List<String> sourceUris = getOutputFileURIs();

  getBigQueryHelper()
      .importFederatedFromGcs(
          jobProjectId,
          destTable,
          destSchema.isPresent() ? destSchema.get().get() : null,
          outputFileFormat,
          sourceUris);
}
 
Example #17
Source File: BigQueryUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableGet() throws InterruptedException, IOException {
  onTableGet(basicTableSchema());

  TableDataList dataList = new TableDataList().setTotalRows(0L);
  onTableList(dataList);

  BigQueryServicesImpl.DatasetServiceImpl services =
      new BigQueryServicesImpl.DatasetServiceImpl(mockClient, options);

  services.getTable(
      new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table"));

  verifyTableGet();
}
 
Example #18
Source File: IndirectBigQueryOutputCommitter.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Runs an import job on BigQuery for the data in the output path in addition to calling the
 * delegate's commitJob.
 */
@Override
public void commitJob(JobContext context) throws IOException {
  super.commitJob(context);

  // Get the destination configuration information.
  Configuration conf = context.getConfiguration();
  TableReference destTable = BigQueryOutputConfiguration.getTableReference(conf);
  String jobProjectId = BigQueryOutputConfiguration.getJobProjectId(conf);
  String writeDisposition = BigQueryOutputConfiguration.getWriteDisposition(conf);
  String createDisposition = BigQueryOutputConfiguration.getCreateDisposition(conf);
  Optional<BigQueryTableSchema> destSchema = BigQueryOutputConfiguration.getTableSchema(conf);
  Optional<BigQueryTimePartitioning> timePartitioning =
      BigQueryOutputConfiguration.getTablePartitioning(conf);
  String kmsKeyName = BigQueryOutputConfiguration.getKmsKeyName(conf);
  BigQueryFileFormat outputFileFormat = BigQueryOutputConfiguration.getFileFormat(conf);
  List<String> sourceUris = getOutputFileURIs();

  try {
    getBigQueryHelper()
        .importFromGcs(
            jobProjectId,
            destTable,
            destSchema.isPresent() ? destSchema.get().get() : null,
            timePartitioning.isPresent() ? timePartitioning.get().get() : null,
            kmsKeyName,
            outputFileFormat,
            createDisposition,
            writeDisposition,
            sourceUris,
            true);
  } catch (InterruptedException e) {
    Thread.currentThread().interrupt();
    throw new IOException("Failed to import GCS into BigQuery", e);
  }

  cleanup(context);
}
 
Example #19
Source File: PubsubMessageToTableRow.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Given a KV containing a destination and a message, return the message content as a {@link
 * TableRow} ready to pass to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}.
 */
public TableRow kvToTableRow(KV<TableDestination, PubsubMessage> kv) {
  if (format == null) {
    format = createFormat();
  }
  final TableReference ref = kv.getKey().getTableReference();
  final TableId tableId = TableId.of(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
  final PubsubMessage message = kv.getValue();
  return Json.asTableRow(format.apply(tableId, message.getAttributeMap(), message.getPayload()));
}
 
Example #20
Source File: TestBigQuery.java    From beam with Apache License 2.0 5 votes vote down vote up
private Table createTable(Description description) throws IOException, InterruptedException {
  TableReference tableReference =
      new TableReference()
          .setProjectId(pipelineOptions.getProject())
          .setDatasetId(pipelineOptions.getTargetDataset())
          .setTableId(createRandomizedName(description));

  table =
      new Table()
          .setTableReference(tableReference)
          .setSchema(BigQueryUtils.toTableSchema(schema))
          .setDescription(
              "Table created for "
                  + description.getDisplayName()
                  + " by TestBigQueryRule. "
                  + "Should be automatically cleaned up after test completion.");

  if (datasetService.getTable(tableReference) != null) {
    throw new IllegalStateException(
        "Table '"
            + tableReference
            + "' already exists. "
            + "It should have been cleaned up by the test rule.");
  }

  datasetService.createTable(table);
  return table;
}
 
Example #21
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetTableSucceeds() throws Exception {
  TableReference tableRef =
      new TableReference()
          .setProjectId("projectId")
          .setDatasetId("datasetId")
          .setTableId("tableId");

  Table testTable = new Table();
  testTable.setTableReference(tableRef);

  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(403).thenReturn(200);
  when(response.getContent())
      .thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403)))
      .thenReturn(toStream(testTable));

  BigQueryServicesImpl.DatasetServiceImpl datasetService =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());

  Table table = datasetService.getTable(tableRef, null, BackOff.ZERO_BACKOFF, Sleeper.DEFAULT);

  assertEquals(testTable, table);
  verify(response, times(2)).getStatusCode();
  verify(response, times(2)).getContent();
  verify(response, times(2)).getContentType();
}
 
Example #22
Source File: BigQuerySourceBase.java    From beam with Apache License 2.0 5 votes vote down vote up
private List<ResourceId> executeExtract(
    String jobId,
    TableReference table,
    JobService jobService,
    String executingProject,
    String extractDestinationDir,
    String bqLocation)
    throws InterruptedException, IOException {

  JobReference jobRef =
      new JobReference().setProjectId(executingProject).setLocation(bqLocation).setJobId(jobId);

  String destinationUri = BigQueryIO.getExtractDestinationUri(extractDestinationDir);
  JobConfigurationExtract extract =
      new JobConfigurationExtract()
          .setSourceTable(table)
          .setDestinationFormat("AVRO")
          .setDestinationUris(ImmutableList.of(destinationUri));

  LOG.info("Starting BigQuery extract job: {}", jobId);
  jobService.startExtractJob(jobRef, extract);
  Job extractJob = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
  if (BigQueryHelpers.parseStatus(extractJob) != Status.SUCCEEDED) {
    throw new IOException(
        String.format(
            "Extract job %s failed, status: %s.",
            extractJob.getJobReference().getJobId(),
            BigQueryHelpers.statusToPrettyString(extractJob.getStatus())));
  }

  LOG.info("BigQuery extract job completed: {}", jobId);

  return BigQueryIO.getExtractFilePaths(extractDestinationDir, extractJob);
}
 
Example #23
Source File: OpinionAnalysisPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 5 votes vote down vote up
private static TableReference getSentimentTableReference(IndexerPipelineOptions options) {
	TableReference tableRef = new TableReference();
	tableRef.setProjectId(options.getProject());
	tableRef.setDatasetId(options.getBigQueryDataset());
	tableRef.setTableId(IndexerPipelineUtils.SENTIMENT_TABLE);
	return tableRef;
}
 
Example #24
Source File: BigQueryIOReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testEstimatedSizeWithStreamingBuffer() throws Exception {
  List<TableRow> data =
      ImmutableList.of(
          new TableRow().set("name", "a").set("number", 1L),
          new TableRow().set("name", "b").set("number", 2L),
          new TableRow().set("name", "c").set("number", 3L),
          new TableRow().set("name", "d").set("number", 4L),
          new TableRow().set("name", "e").set("number", 5L),
          new TableRow().set("name", "f").set("number", 6L));

  TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
  fakeDatasetService.createDataset("project", "data_set", "", "", null);
  fakeDatasetService.createTable(
      new Table()
          .setTableReference(table)
          .setSchema(
              new TableSchema()
                  .setFields(
                      ImmutableList.of(
                          new TableFieldSchema().setName("name").setType("STRING"),
                          new TableFieldSchema().setName("number").setType("INTEGER"))))
          .setStreamingBuffer(new Streamingbuffer().setEstimatedBytes(BigInteger.valueOf(10))));
  fakeDatasetService.insertAll(table, data, null);

  String stepUuid = "testStepUuid";
  BoundedSource<TableRow> bqSource =
      BigQueryTableSourceDef.create(fakeBqServices, ValueProvider.StaticValueProvider.of(table))
          .toSource(stepUuid, TableRowJsonCoder.of(), BigQueryIO.TableRowParser.INSTANCE);

  PipelineOptions options = PipelineOptionsFactory.create();

  // Each row should have 24 bytes (See StringUtf8Coder in detail):
  //   first 1 byte indicating length and following 23 bytes: {"name":"a","number":1}
  // 10 bytes comes from the estimated bytes of the Streamingbuffer
  long expectedSize = 24L * data.size() + 10;
  assertEquals(expectedSize, bqSource.getEstimatedSizeBytes(options));
}
 
Example #25
Source File: BqClient.java    From digdag with Apache License 2.0 5 votes vote down vote up
void emptyTable(String projectId, Table table)
        throws IOException
{
    TableReference r = table.getTableReference();
    deleteTable(r.getProjectId(), r.getDatasetId(), r.getTableId());
    createTable(projectId, table);
}
 
Example #26
Source File: AbstractBigQueryInputFormat.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
private static Export constructExport(
    Configuration configuration,
    ExportFileFormat format,
    String exportPath,
    BigQueryHelper bigQueryHelper,
    InputFormat<LongWritable, Text> delegateInputFormat)
    throws IOException {
  logger.atFine().log("constructExport() with export path %s", exportPath);

  // Extract relevant configuration settings.
  Map<String, String> mandatoryConfig =
      getMandatoryConfig(configuration, MANDATORY_CONFIG_PROPERTIES_INPUT);
  String jobProjectId = mandatoryConfig.get(PROJECT_ID.getKey());
  String inputProjectId = mandatoryConfig.get(INPUT_PROJECT_ID.getKey());
  String datasetId = mandatoryConfig.get(INPUT_DATASET_ID.getKey());
  String tableName = mandatoryConfig.get(INPUT_TABLE_ID.getKey());

  TableReference exportTableReference = new TableReference()
      .setDatasetId(datasetId)
      .setProjectId(inputProjectId)
      .setTableId(tableName);
  Table table = bigQueryHelper.getTable(exportTableReference);

  if (EXTERNAL_TABLE_TYPE.equals(table.getType())) {
      logger.atInfo().log("Table is already external, so skipping export");
      return new NoopFederatedExportToCloudStorage(
          configuration, format, bigQueryHelper, jobProjectId, table, delegateInputFormat);
  }

  return new UnshardedExportToCloudStorage(
      configuration,
      exportPath,
      format,
      bigQueryHelper,
      jobProjectId,
      table,
      delegateInputFormat);
}
 
Example #27
Source File: BigQueryHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * It returns the number of rows for a given table.
 *
 * @return The number of rows in the table or null if it cannot get any estimate.
 */
@Nullable
public static BigInteger getNumRows(BigQueryOptions options, TableReference tableRef)
    throws InterruptedException, IOException {

  DatasetService datasetService = new BigQueryServicesImpl().getDatasetService(options);
  Table table = datasetService.getTable(tableRef);
  if (table == null) {
    return null;
  }
  return table.getNumRows();
}
 
Example #28
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRemoveTemporaryTables() throws Exception {
  FakeDatasetService datasetService = new FakeDatasetService();
  String projectId = "project";
  String datasetId = "dataset";
  datasetService.createDataset(projectId, datasetId, "", "", null);
  List<TableReference> tableRefs =
      Lists.newArrayList(
          BigQueryHelpers.parseTableSpec(
              String.format("%s:%s.%s", projectId, datasetId, "table1")),
          BigQueryHelpers.parseTableSpec(
              String.format("%s:%s.%s", projectId, datasetId, "table2")),
          BigQueryHelpers.parseTableSpec(
              String.format("%s:%s.%s", projectId, datasetId, "table3")));
  for (TableReference tableRef : tableRefs) {
    datasetService.createTable(new Table().setTableReference(tableRef));
  }

  // Add one more table to delete that does not actually exist.
  tableRefs.add(
      BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table4")));

  WriteRename.removeTemporaryTables(datasetService, tableRefs);

  for (TableReference ref : tableRefs) {
    loggedWriteRename.verifyDebug("Deleting table " + toJsonString(ref));
    checkState(datasetService.getTable(ref) == null, "Table " + ref + " was not deleted!");
  }
}
 
Example #29
Source File: BigQueryStorageTableSource.java    From beam with Apache License 2.0 5 votes vote down vote up
private BigQueryStorageTableSource(
    ValueProvider<TableReference> tableRefProvider,
    @Nullable TableReadOptions readOptions,
    @Nullable ValueProvider<List<String>> selectedFields,
    @Nullable ValueProvider<String> rowRestriction,
    SerializableFunction<SchemaAndRecord, T> parseFn,
    Coder<T> outputCoder,
    BigQueryServices bqServices) {
  super(readOptions, selectedFields, rowRestriction, parseFn, outputCoder, bqServices);
  this.tableReferenceProvider = checkNotNull(tableRefProvider, "tableRefProvider");
  cachedTable = new AtomicReference<>();
}
 
Example #30
Source File: WriteRename.java    From beam with Apache License 2.0 5 votes vote down vote up
public PendingJobData(
    BigQueryHelpers.PendingJob retryJob,
    TableDestination tableDestination,
    List<TableReference> tempTables) {
  this.retryJob = retryJob;
  this.tableDestination = tableDestination;
  this.tempTables = tempTables;
}