com.google.api.services.bigquery.model.Dataset Java Examples

The following examples show how to use com.google.api.services.bigquery.model.Dataset. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BigQueryHelpers.java    From beam with Apache License 2.0 6 votes vote down vote up
static String getDatasetLocation(
    DatasetService datasetService, String projectId, String datasetId) {
  Dataset dataset;
  try {
    dataset = datasetService.getDataset(projectId, datasetId);
  } catch (Exception e) {
    if (e instanceof InterruptedException) {
      Thread.currentThread().interrupt();
    }
    throw new RuntimeException(
        String.format(
            "unable to obtain dataset for dataset %s in project %s", datasetId, projectId),
        e);
  }
  return dataset.getLocation();
}
 
Example #2
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
private Dataset dataset(String defaultProjectId, JsonNode node)
{
    if (node.isTextual()) {
        return new Dataset()
                .setDatasetReference(datasetReference(defaultProjectId, node.asText()));
    }
    else {
        DatasetConfig config;
        try {
            config = objectMapper.readValue(node.traverse(), DatasetConfig.class);
        }
        catch (IOException e) {
            throw new ConfigException("Invalid dataset reference or configuration: " + node, e);
        }
        return dataset(defaultProjectId, config);
    }
}
 
Example #3
Source File: CheckedBigquery.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/**
 * Ensures the dataset exists by trying to create it. Note that it's not appreciably cheaper
 * to check for dataset existence than it is to try to create it and check for exceptions.
 */
// Note that these are not static so they can be mocked for testing.
private void ensureDataset(Bigquery bigquery, String projectId, String datasetId)
    throws IOException {
  try {
    bigquery.datasets()
        .insert(projectId,
            new Dataset().setDatasetReference(
                new DatasetReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)))
        .execute();
  } catch (IOException e) {
    // Swallow errors about a duplicate dataset, and throw any other ones.
    if (!BigqueryJobFailureException.create(e).getReason().equals("duplicate")) {
      throw e;
    }
  }
}
 
Example #4
Source File: CheckedBigqueryTest.java    From nomulus with Apache License 2.0 6 votes vote down vote up
@Before
public void before() throws Exception {
  when(bigquery.datasets()).thenReturn(bigqueryDatasets);
  when(bigqueryDatasets.insert(eq("Project-Id"), any(Dataset.class)))
      .thenReturn(bigqueryDatasetsInsert);
  when(bigquery.tables()).thenReturn(bigqueryTables);
  when(bigqueryTables.insert(eq("Project-Id"), any(String.class), any(Table.class)))
      .thenReturn(bigqueryTablesInsert);
  checkedBigquery = new CheckedBigquery();
  checkedBigquery.bigquery = bigquery;
  checkedBigquery.bigquerySchemas =
      new ImmutableMap.Builder<String, ImmutableList<TableFieldSchema>>()
          .put(
              "Table-Id",
              ImmutableList.of(new TableFieldSchema().setName("column1").setType(STRING.name())))
          .put(
              "Table2",
              ImmutableList.of(new TableFieldSchema().setName("column1").setType(STRING.name())))
          .build();
}
 
Example #5
Source File: BqClient.java    From digdag with Apache License 2.0 6 votes vote down vote up
void createDataset(String projectId, Dataset dataset)
        throws IOException
{
    try {
        client.datasets().insert(projectId, dataset)
                .execute();
    }
    catch (GoogleJsonResponseException e) {
        if (e.getStatusCode() == HttpStatusCodes.STATUS_CODE_CONFLICT) {
            logger.debug("Dataset already exists: {}:{}", dataset.getDatasetReference());
        }
        else {
            throw e;
        }
    }
}
 
Example #6
Source File: CheckedBigqueryTest.java    From nomulus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSuccess_datastoreAndTableCreation() throws Exception {
  checkedBigquery.ensureDataSetAndTableExist("Project-Id", "Dataset2", "Table2");

  ArgumentCaptor<Dataset> datasetArg = ArgumentCaptor.forClass(Dataset.class);
  verify(bigqueryDatasets).insert(eq("Project-Id"), datasetArg.capture());
  assertThat(datasetArg.getValue().getDatasetReference().getProjectId())
      .isEqualTo("Project-Id");
  assertThat(datasetArg.getValue().getDatasetReference().getDatasetId())
      .isEqualTo("Dataset2");
  verify(bigqueryDatasetsInsert).execute();

  ArgumentCaptor<Table> tableArg = ArgumentCaptor.forClass(Table.class);
  verify(bigqueryTables).insert(eq("Project-Id"), eq("Dataset2"), tableArg.capture());
  TableReference ref = tableArg.getValue().getTableReference();
  assertThat(ref.getProjectId()).isEqualTo("Project-Id");
  assertThat(ref.getDatasetId()).isEqualTo("Dataset2");
  assertThat(ref.getTableId()).isEqualTo("Table2");
  assertThat(tableArg.getValue().getSchema().getFields())
      .containsExactly(new TableFieldSchema().setName("column1").setType(STRING.name()));
  verify(bigqueryTablesInsert).execute();
}
 
Example #7
Source File: UpdateSnapshotViewActionTest.java    From nomulus with Apache License 2.0 6 votes vote down vote up
@Before
public void before() throws Exception {
  when(checkedBigquery.ensureDataSetExists(anyString(), anyString())).thenReturn(bigquery);
  when(bigquery.datasets()).thenReturn(bigqueryDatasets);
  when(bigqueryDatasets.insert(anyString(), any(Dataset.class)))
      .thenReturn(bigqueryDatasetsInsert);
  when(bigquery.tables()).thenReturn(bigqueryTables);
  when(bigqueryTables.update(anyString(), anyString(), anyString(), any(Table.class)))
      .thenReturn(bigqueryTablesUpdate);

  action = new UpdateSnapshotViewAction();
  action.checkedBigquery = checkedBigquery;
  action.datasetId = "some_dataset";
  action.kindName = "fookind";
  action.viewName = "latest_datastore_export";
  action.projectId = "myproject";
  action.tableId = "12345_fookind";
}
 
Example #8
Source File: UploadDatastoreBackupActionTest.java    From nomulus with Apache License 2.0 6 votes vote down vote up
@Before
public void before() throws Exception {
  when(checkedBigquery.ensureDataSetExists("Project-Id", BACKUP_DATASET)).thenReturn(bigquery);
  when(bigquery.jobs()).thenReturn(bigqueryJobs);
  when(bigqueryJobs.insert(eq("Project-Id"), any(Job.class))).thenReturn(bigqueryJobsInsert);
  when(bigquery.datasets()).thenReturn(bigqueryDatasets);
  when(bigqueryDatasets.insert(eq("Project-Id"), any(Dataset.class)))
      .thenReturn(bigqueryDatasetsInsert);
  action = new UploadDatastoreBackupAction();
  action.checkedBigquery = checkedBigquery;
  action.bigqueryPollEnqueuer = bigqueryPollEnqueuer;
  action.projectId = "Project-Id";
  action.backupFolderUrl = "gs://bucket/path";
  action.backupId = "2018-12-05T17:46:39_92612";
  action.backupKinds = "one,two,three";
}
 
Example #9
Source File: ExampleUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
private void setupBigQueryTable(
    String projectId, String datasetId, String tableId, TableSchema schema) throws IOException {
  if (bigQueryClient == null) {
    bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build();
  }

  Datasets datasetService = bigQueryClient.datasets();
  if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
    Dataset newDataset =
        new Dataset()
            .setDatasetReference(
                new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
    datasetService.insert(projectId, newDataset).execute();
  }

  Tables tableService = bigQueryClient.tables();
  Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
  if (table == null) {
    Table newTable =
        new Table()
            .setSchema(schema)
            .setTableReference(
                new TableReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)
                    .setTableId(tableId));
    tableService.insert(projectId, datasetId, newTable).execute();
  } else if (!table.getSchema().equals(schema)) {
    throw new RuntimeException(
        "Table exists and schemas do not match, expecting: "
            + schema.toPrettyString()
            + ", actual: "
            + table.getSchema().toPrettyString());
  }
}
 
Example #10
Source File: GcpUtil.java    From digdag with Apache License 2.0 5 votes vote down vote up
static Dataset createDataset(Bigquery bq, String projectId, String datasetId)
        throws IOException, RetryExecutor.RetryGiveupException
{
    Dataset dataset = new Dataset()
            .setDatasetReference(new DatasetReference()
                    .setDatasetId(datasetId));
    Dataset created = createDataset(bq, projectId, dataset);
    assertThat(datasetExists(bq, projectId, datasetId), is(true));
    return created;
}
 
Example #11
Source File: BigQueryIT.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoad()
        throws Exception
{
    assumeThat(GCS_TEST_BUCKET, not(isEmptyOrNullString()));

    // Create source data object
    String objectName = GCS_PREFIX + "test.csv";
    byte[] data = Joiner.on('\n').join("a,b", "c,d").getBytes(UTF_8);
    InputStreamContent content = new InputStreamContent("text/csv", new ByteArrayInputStream(data))
            .setLength(data.length);
    StorageObject metadata = new StorageObject().setName(objectName);
    retryExecutor.run(() -> gcs.objects()
            .insert(GCS_TEST_BUCKET, metadata, content)
            .execute());

    // Create output dataset
    String datasetId = BQ_TAG + "_load_test";
    Dataset dataset = new Dataset().setDatasetReference(new DatasetReference()
            .setProjectId(gcpProjectId)
            .setDatasetId(datasetId));
    retryExecutor.run(() -> bq.datasets().insert(gcpProjectId, dataset)
            .execute());

    // Run load
    String tableId = "data";
    addWorkflow(projectDir, "acceptance/bigquery/load.dig");
    Id attemptId = pushAndStart(server.endpoint(), projectDir, "load", ImmutableMap.of(
            "source_bucket", GCS_TEST_BUCKET,
            "source_object", objectName,
            "target_dataset", datasetId,
            "target_table", tableId,
            "outfile", outfile.toString()));
    expect(Duration.ofMinutes(5), attemptSuccess(server.endpoint(), attemptId));
    assertThat(Files.exists(outfile), is(true));

    // Check that destination table was created
    Table destinationTable = retryExecutor.run(() -> bq.tables().get(gcpProjectId, datasetId, tableId).execute());
    assertThat(destinationTable.getTableReference().getTableId(), is(tableId));
}
 
Example #12
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private Dataset dataset(String defaultProjectId, DatasetConfig config)
{
    return new Dataset()
            .setDatasetReference(new DatasetReference()
                    .setProjectId(config.project().or(defaultProjectId))
                    .setDatasetId(config.id()))
            .setFriendlyName(config.friendly_name().orNull())
            .setDefaultTableExpirationMs(config.default_table_expiration().transform(d -> d.getDuration().toMillis()).orNull())
            .setLocation(config.location().orNull())
            .setAccess(config.access().orNull())
            .setLabels(config.labels().orNull());
}
 
Example #13
Source File: BqClient.java    From digdag with Apache License 2.0 5 votes vote down vote up
void emptyDataset(String projectId, Dataset dataset)
        throws IOException
{
    String datasetId = dataset.getDatasetReference().getDatasetId();
    deleteDataset(projectId, datasetId);
    createDataset(projectId, dataset);
}
 
Example #14
Source File: CheckedBigqueryTest.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Test
public void testSuccess_datastoreCreation() throws Exception {
  checkedBigquery.ensureDataSetExists("Project-Id", "Dataset-Id");

  ArgumentCaptor<Dataset> datasetArg = ArgumentCaptor.forClass(Dataset.class);
  verify(bigqueryDatasets).insert(eq("Project-Id"), datasetArg.capture());
  assertThat(datasetArg.getValue().getDatasetReference().getProjectId())
      .isEqualTo("Project-Id");
  assertThat(datasetArg.getValue().getDatasetReference().getDatasetId())
      .isEqualTo("Dataset-Id");
  verify(bigqueryDatasetsInsert).execute();
}
 
Example #15
Source File: BigqueryConnection.java    From nomulus with Apache License 2.0 5 votes vote down vote up
/**
 * Helper that creates a dataset with this name if it doesn't already exist, and returns true
 * if creation took place.
 */
public boolean createDatasetIfNeeded(String datasetName) throws IOException {
  if (!checkDatasetExists(datasetName)) {
    bigquery.datasets()
        .insert(getProjectId(), new Dataset().setDatasetReference(new DatasetReference()
            .setProjectId(getProjectId())
            .setDatasetId(datasetName)))
        .execute();
    logger.atInfo().log("Created dataset: %s:%s\n", getProjectId(), datasetName);
    return true;
  }
  return false;
}
 
Example #16
Source File: FakeDatasetService.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Dataset getDataset(String projectId, String datasetId)
    throws IOException, InterruptedException {
  synchronized (tables) {
    Map<String, TableContainer> dataset = tables.get(projectId, datasetId);
    if (dataset == null) {
      throwNotFound(
          "Tried to get a dataset %s:%s, but no such table was set", projectId, datasetId);
    }
    return new Dataset()
        .setDatasetReference(
            new DatasetReference().setDatasetId(datasetId).setProjectId(projectId));
  }
}
 
Example #17
Source File: BigQueryServicesImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 *
 * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds.
 *
 * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts.
 */
@Override
public Dataset getDataset(String projectId, String datasetId)
    throws IOException, InterruptedException {
  return executeWithRetries(
      client.datasets().get(projectId, datasetId),
      String.format(
          "Unable to get dataset: %s, aborting after %d retries.", datasetId, MAX_RPC_RETRIES),
      Sleeper.DEFAULT,
      createDefaultBackoff(),
      DONT_RETRY_NOT_FOUND);
}
 
Example #18
Source File: ExampleUtils.java    From deployment-examples with MIT License 5 votes vote down vote up
private void setupBigQueryTable(
    String projectId, String datasetId, String tableId, TableSchema schema) throws IOException {
  if (bigQueryClient == null) {
    bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build();
  }

  Datasets datasetService = bigQueryClient.datasets();
  if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
    Dataset newDataset =
        new Dataset()
            .setDatasetReference(
                new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
    datasetService.insert(projectId, newDataset).execute();
  }

  Tables tableService = bigQueryClient.tables();
  Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
  if (table == null) {
    Table newTable =
        new Table()
            .setSchema(schema)
            .setTableReference(
                new TableReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)
                    .setTableId(tableId));
    tableService.insert(projectId, datasetId, newTable).execute();
  } else if (!table.getSchema().equals(schema)) {
    throw new RuntimeException(
        "Table exists and schemas do not match, expecting: "
            + schema.toPrettyString()
            + ", actual: "
            + table.getSchema().toPrettyString());
  }
}
 
Example #19
Source File: BigQueryServices.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Gets the specified {@link Dataset} resource by dataset ID. */
Dataset getDataset(String projectId, String datasetId) throws IOException, InterruptedException;
 
Example #20
Source File: BigQueryIT.java    From digdag with Apache License 2.0 4 votes vote down vote up
@Test
public void testExtract()
        throws Exception
{
    assumeThat(GCS_TEST_BUCKET, not(isEmptyOrNullString()));

    // Create source table
    String tableId = "data";
    String datasetId = BQ_TAG + "_extract_test";
    Dataset dataset = new Dataset().setDatasetReference(new DatasetReference()
            .setProjectId(gcpProjectId)
            .setDatasetId(datasetId));
    retryExecutor.run(() -> bq.datasets().insert(gcpProjectId, dataset)
            .execute());
    Table table = new Table().setTableReference(new TableReference()
            .setProjectId(gcpProjectId)
            .setTableId(tableId))
            .setSchema(new TableSchema()
                    .setFields(ImmutableList.of(
                            new TableFieldSchema().setName("foo").setType("STRING"),
                            new TableFieldSchema().setName("bar").setType("STRING")
                    )));
    retryExecutor.run(() -> bq.tables().insert(gcpProjectId, datasetId, table)
            .execute());

    // Populate source table
    TableDataInsertAllRequest content = new TableDataInsertAllRequest()
            .setRows(ImmutableList.of(
                    new TableDataInsertAllRequest.Rows().setJson(ImmutableMap.of(
                            "foo", "a",
                            "bar", "b")),
                    new TableDataInsertAllRequest.Rows().setJson(ImmutableMap.of(
                            "foo", "c",
                            "bar", "d"))));
    retryExecutor.run(() -> bq.tabledata().insertAll(gcpProjectId, datasetId, tableId, content)
            .execute());

    // Run extract
    String objectName = GCS_PREFIX + "test.csv";
    addWorkflow(projectDir, "acceptance/bigquery/extract.dig");
    Id attemptId = pushAndStart(server.endpoint(), projectDir, "extract", ImmutableMap.of(
            "src_dataset", datasetId,
            "src_table", tableId,
            "dst_bucket", GCS_TEST_BUCKET,
            "dst_object", objectName,
            "outfile", outfile.toString()));
    expect(Duration.ofMinutes(5), attemptSuccess(server.endpoint(), attemptId));
    assertThat(Files.exists(outfile), is(true));

    // Check that destination file was created
    StorageObject metadata = retryExecutor.run(() -> gcs.objects().get(GCS_TEST_BUCKET, objectName)
            .execute());
    assertThat(metadata.getName(), is(objectName));
    ByteArrayOutputStream data = new ByteArrayOutputStream();
    retryExecutor.run(() -> {
        try {
            gcs.objects().get(GCS_TEST_BUCKET, objectName)
                    .executeMediaAndDownloadTo(data);
        }
        catch (IOException e) {
            throw Throwables.propagate(e);
        }
    });
}
 
Example #21
Source File: GcpUtil.java    From digdag with Apache License 2.0 4 votes vote down vote up
static Dataset createDataset(Bigquery bq, String projectId, Dataset dataset)
        throws RetryExecutor.RetryGiveupException
{
    return retryExecutor.run(() -> bq.datasets().insert(projectId, dataset).execute());
}
 
Example #22
Source File: BigQueryHelper.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Imports data from GCS into BigQuery via a load job. Optionally polls for completion before
 * returning.
 *
 * @param projectId the project on whose behalf to perform the load.
 * @param tableRef the reference to the destination table.
 * @param schema the schema of the source data to populate the destination table by.
 * @param timePartitioning time partitioning to populate the destination table.
 * @param kmsKeyName the Cloud KMS encryption key used to protect the output table.
 * @param sourceFormat the file format of the source data.
 * @param createDisposition the create disposition of the output table.
 * @param writeDisposition the write disposition of the output table.
 * @param gcsPaths the location of the source data in GCS.
 * @param awaitCompletion if true, block and poll until job completes, otherwise return as soon as
 *     the job has been successfully dispatched.
 * @throws IOException
 * @throws InterruptedException if interrupted while waiting for job completion.
 */
public void importFromGcs(
    String projectId,
    TableReference tableRef,
    @Nullable TableSchema schema,
    @Nullable TimePartitioning timePartitioning,
    @Nullable String kmsKeyName,
    BigQueryFileFormat sourceFormat,
    String createDisposition,
    String writeDisposition,
    List<String> gcsPaths,
    boolean awaitCompletion)
    throws IOException, InterruptedException {
  logger.atInfo().log(
      "Importing into table '%s' from %s paths; path[0] is '%s'; awaitCompletion: %s;"
          + " timePartitioning: %s",
      lazy(() -> BigQueryStrings.toString(tableRef)),
      gcsPaths.size(),
      gcsPaths.isEmpty() ? "(empty)" : gcsPaths.get(0),
      awaitCompletion,
      timePartitioning);

  // Create load conf with minimal requirements.
  JobConfigurationLoad loadConfig = new JobConfigurationLoad();
  loadConfig.setSchema(schema);
  loadConfig.setSourceFormat(sourceFormat.getFormatIdentifier());
  loadConfig.setSourceUris(gcsPaths);
  loadConfig.setDestinationTable(tableRef);
  loadConfig.setTimePartitioning(timePartitioning);
  loadConfig.setCreateDisposition(createDisposition);
  loadConfig.setWriteDisposition(writeDisposition);
  if (!Strings.isNullOrEmpty(kmsKeyName)) {
    loadConfig.setDestinationEncryptionConfiguration(
        new EncryptionConfiguration().setKmsKeyName(kmsKeyName));
  }
  // Auto detect the schema if we're not given one, otherwise use the passed schema.
  if (schema == null) {
    logger.atInfo().log("No import schema provided, auto detecting schema.");
    loadConfig.setAutodetect(true);
  } else {
    logger.atInfo().log("Using provided import schema '%s'.", schema);
  }

  JobConfiguration config = new JobConfiguration();
  config.setLoad(loadConfig);

  // Get the dataset to determine the location
  Dataset dataset;
  try {
    dataset = service.datasets().get(tableRef.getProjectId(), tableRef.getDatasetId()).execute();
  } catch (IOException ioe) {
    throw new IOException(
        String.format(
            "Failed to get dataset '%s' in project '%s' for table '%s'",
            tableRef.getDatasetId(), tableRef.getProjectId(), tableRef),
        ioe);
  }

  JobReference jobReference =
      createJobReference(projectId, "direct-bigqueryhelper-import", dataset.getLocation());
  Job job = new Job();
  job.setConfiguration(config);
  job.setJobReference(jobReference);

  // Insert and run job.
  insertJobOrFetchDuplicate(projectId, job);

  if (awaitCompletion) {
    // Poll until job is complete.
    BigQueryUtils.waitForJobCompletion(getRawBigquery(), projectId, jobReference, () -> {});
  }
}
 
Example #23
Source File: AbstractBigQueryIoIntegrationTestBase.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp()
    throws IOException, GeneralSecurityException {
  MockitoAnnotations.initMocks(this);

  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);
  LoggerConfig.getConfig(BigQueryUtils.class).setLevel(Level.FINE);
  LoggerConfig.getConfig(GsonRecordReader.class).setLevel(Level.FINE);

  bucketHelper = new TestBucketHelper("bq_integration_test");
  // A unique per-setUp String to avoid collisions between test runs.
  String testId = bucketHelper.getUniqueBucketPrefix();

  projectIdValue = TestConfiguration.getInstance().getProjectId();
  if (Strings.isNullOrEmpty(projectIdValue)) {
    projectIdValue = System.getenv(BIGQUERY_PROJECT_ID_ENVVARNAME);
  }

  checkArgument(
      !Strings.isNullOrEmpty(projectIdValue), "Must provide %s", BIGQUERY_PROJECT_ID_ENVVARNAME);
  testDataset = testId + "_dataset";
  testBucket = testId + "_bucket";

  // We have to create the output dataset ourselves.
  // TODO(user): Extract dataset creation into a library which is also used by
  // BigQueryOutputCommitter.
  Dataset outputDataset = new Dataset();
  DatasetReference datasetReference = new DatasetReference();
  datasetReference.setProjectId(projectIdValue);
  datasetReference.setDatasetId(testDataset);

  config = getConfigForGcsFromBigquerySettings(projectIdValue);
  BigQueryFactory factory = new BigQueryFactory();
  bigqueryInstance = factory.getBigQuery(config);

  Bigquery.Datasets datasets = bigqueryInstance.datasets();
  outputDataset.setDatasetReference(datasetReference);
  logger.atInfo().log(
      "Creating temporary dataset '%s' for project '%s'", testDataset, projectIdValue);
  datasets.insert(projectIdValue, outputDataset).execute();

  Path toCreate = new Path(String.format("gs://%s", testBucket));
  FileSystem fs = toCreate.getFileSystem(config);
  logger.atInfo().log("Creating temporary test bucket '%s'", toCreate);
  fs.mkdirs(toCreate);

  // Since the TaskAttemptContext and JobContexts are mostly used just to access a
  // "Configuration" object, we'll mock the two contexts to just return our fake configuration
  // object with which we'll provide the settings we want to test.
  config.clear();
  setConfigForGcsFromBigquerySettings();

  when(mockTaskAttemptContext.getConfiguration())
      .thenReturn(config);
  when(mockJobContext.getConfiguration())
      .thenReturn(config);

  // Have a realistic-looking fake TaskAttemptID.
  int taskNumber = 3;
  int taskAttempt = 2;
  int jobNumber = 42;
  String jobIdString = "jobid" + System.currentTimeMillis();
  JobID jobId = new JobID(jobIdString, jobNumber);
  TaskAttemptID taskAttemptId =
      new TaskAttemptID(new TaskID(jobId, false, taskNumber), taskAttempt);
  when(mockTaskAttemptContext.getTaskAttemptID())
      .thenReturn(taskAttemptId);
  when(mockJobContext.getJobID()).thenReturn(jobId);

  testTable = testId + "_table_" + jobIdString;
}
 
Example #24
Source File: BigQueryHelperTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws IOException {
  MockitoAnnotations.initMocks(this);
  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);

  // Create fake job reference.
  JobReference fakeJobReference = new JobReference().setProjectId(jobProjectId).setJobId(jobId);

  // Create the job result.
  jobStatus = new JobStatus();
  jobStatus.setState("DONE");
  jobStatus.setErrorResult(null);

  jobHandle = new Job();
  jobHandle.setStatus(jobStatus);
  jobHandle.setJobReference(fakeJobReference);

  // Mocks for Bigquery jobs.
  when(mockBigquery.jobs()).thenReturn(mockBigqueryJobs);

  // Mock getting Bigquery job.
  when(mockBigqueryJobs.get(any(String.class), any(String.class)))
      .thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.setLocation(any(String.class))).thenReturn(mockBigqueryJobsGet);

  // Mock inserting Bigquery job.
  when(mockBigqueryJobs.insert(any(String.class), any(Job.class)))
      .thenReturn(mockBigqueryJobsInsert);

  // Fake table.
  fakeTableSchema = new TableSchema();
  fakeTable = new Table().setSchema(fakeTableSchema).setLocation("test_location");

  // Mocks for Bigquery tables.
  when(mockBigquery.tables()).thenReturn(mockBigqueryTables);
  when(mockBigqueryTables.get(any(String.class), any(String.class), any(String.class)))
      .thenReturn(mockBigqueryTablesGet);

  Datasets datasets = Mockito.mock(Datasets.class);
  Datasets.Get datasetsGet = Mockito.mock(Datasets.Get.class);
  Dataset dataset = new Dataset().setLocation("test_location");
  when(mockBigquery.datasets()).thenReturn(datasets);
  when(datasets.get(any(String.class), any(String.class))).thenReturn(datasetsGet);
  when(datasetsGet.execute()).thenReturn(dataset);

  // Create table reference.
  tableRef = new TableReference();
  tableRef.setProjectId(projectId);
  tableRef.setDatasetId(datasetId);
  tableRef.setTableId(tableId);

  helper = new BigQueryHelper(mockBigquery);
  helper.setErrorExtractor(mockErrorExtractor);
}
 
Example #25
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 votes vote down vote up
Optional<List<Dataset.Access>> access(); 
Example #26
Source File: BqDdlOperatorFactory.java    From digdag with Apache License 2.0 votes vote down vote up
Optional<List<Dataset.Access>> access();