com.google.cloud.bigquery.FormatOptions Java Examples

The following examples show how to use com.google.cloud.bigquery.FormatOptions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: BigQueryTemplateIntegrationTests.java From spring-cloud-gcp with Apache License 2.0

6 votes

@Test
public void testLoadFile() throws IOException, ExecutionException, InterruptedException {
	ListenableFuture<Job> bigQueryJobFuture =
			bigQueryTemplate.writeDataToTable(TABLE_NAME, dataFile.getInputStream(), FormatOptions.csv());

	Job job = bigQueryJobFuture.get();
	assertThat(job.getStatus().getState()).isEqualTo(JobStatus.State.DONE);

	QueryJobConfiguration queryJobConfiguration = QueryJobConfiguration
			.newBuilder("SELECT * FROM test_dataset.template_test_table").build();
	TableResult result = this.bigQuery.query(queryJobConfiguration);

	assertThat(result.getTotalRows()).isEqualTo(1);
	assertThat(
			result.getValues().iterator().next().get("State").getStringValue()).isEqualTo("Alabama");
}

Example #2

Source File: PutBigQueryBatchIT.java From nifi with Apache License 2.0

6 votes

@Test
public void PutBigQueryBatchBadRecordTest() throws Exception {
    String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
    runner = setCredentialsControllerService(runner);
    runner.setProperty(AbstractGCPProcessor.GCP_CREDENTIALS_PROVIDER_SERVICE, CONTROLLER_SERVICE);
    runner.setProperty(BigQueryAttributes.DATASET_ATTR, dataset.getDatasetId().getDataset());
    runner.setProperty(BigQueryAttributes.TABLE_NAME_ATTR, methodName);
    runner.setProperty(BigQueryAttributes.SOURCE_TYPE_ATTR, FormatOptions.json().getType());
    runner.setProperty(BigQueryAttributes.TABLE_SCHEMA_ATTR, TABLE_SCHEMA_STRING);

    String str = "{\"field_1\":\"Daniel is great\"}\r\n";

    runner.enqueue(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)));
    runner.run(1);
    runner.assertAllFlowFilesTransferred(AbstractBigQueryProcessor.REL_FAILURE, 1);
}

Example #3

Source File: PutBigQueryBatchIT.java From nifi with Apache License 2.0

6 votes

@Test
public void PutBigQueryBatchSmallPayloadTest() throws Exception {
    String methodName = Thread.currentThread().getStackTrace()[1].getMethodName();
    runner = setCredentialsControllerService(runner);
    runner.setProperty(AbstractGCPProcessor.GCP_CREDENTIALS_PROVIDER_SERVICE, CONTROLLER_SERVICE);
    runner.setProperty(BigQueryAttributes.DATASET_ATTR, dataset.getDatasetId().getDataset());
    runner.setProperty(BigQueryAttributes.TABLE_NAME_ATTR, methodName);
    runner.setProperty(BigQueryAttributes.SOURCE_TYPE_ATTR, FormatOptions.json().getType());
    runner.setProperty(BigQueryAttributes.TABLE_SCHEMA_ATTR, TABLE_SCHEMA_STRING);

    String str = "{\"field_1\":\"Daniel is great\",\"field_2\":\"Daniel is great\"}\r\n";

    runner.enqueue(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)));
    runner.run(1);
    for (MockFlowFile flowFile : runner.getFlowFilesForRelationship(AbstractBigQueryProcessor.REL_SUCCESS)) {
        validateNoServiceExceptionAttribute(flowFile);
    }
    runner.assertAllFlowFilesTransferred(AbstractBigQueryProcessor.REL_SUCCESS, 1);
}

Example #4

Source File: CloudSnippets.java From google-cloud-java with Apache License 2.0

6 votes

/** Example of loading a parquet file from GCS to a table. */
public void loadTableGcsParquet(String datasetName) throws InterruptedException {
  // [START bigquery_load_table_gcs_parquet]
  String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet";
  TableId tableId = TableId.of(datasetName, "us_states");
  LoadJobConfiguration configuration =
      LoadJobConfiguration.builder(tableId, sourceUri)
          .setFormatOptions(FormatOptions.parquet())
          .build();
  // Load the table
  Job loadJob = bigquery.create(JobInfo.of(configuration));
  loadJob = loadJob.waitFor();
  // Check the table
  StandardTableDefinition destinationTable = bigquery.getTable(tableId).getDefinition();
  System.out.println("State: " + loadJob.getStatus().getState());
  System.out.printf("Loaded %d rows.\n", destinationTable.getNumRows());
  // [END bigquery_load_table_gcs_parquet]
}

Example #5

Source File: TableSnippets.java From google-cloud-java with Apache License 2.0

6 votes

/** Example loading data from a single Google Cloud Storage file. */
// [TARGET load(FormatOptions, String, JobOption...)]
// [VARIABLE "gs://my_bucket/filename.csv"]
public Job loadSingle(String sourceUri) {
  // [START bigquery_load_table_gcs_csv]
  Job job = table.load(FormatOptions.csv(), sourceUri);
  // Wait for the job to complete
  try {
    Job completedJob =
        job.waitFor(
            RetryOption.initialRetryDelay(Duration.ofSeconds(1)),
            RetryOption.totalTimeout(Duration.ofMinutes(3)));
    if (completedJob != null && completedJob.getStatus().getError() == null) {
      // Job completed successfully
    } else {
      // Handle error case
    }
  } catch (InterruptedException e) {
    // Handle interrupted wait
  }
  // [END bigquery_load_table_gcs_csv]
  return job;
}

Example #6

Source File: CreateTableAndLoadData.java From google-cloud-java with Apache License 2.0

6 votes

public static void main(String... args) throws InterruptedException, TimeoutException {
  BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
  TableId tableId = TableId.of("dataset", "table");
  Table table = bigquery.getTable(tableId);
  if (table == null) {
    System.out.println("Creating table " + tableId);
    Field integerField = Field.of("fieldName", LegacySQLTypeName.INTEGER);
    Schema schema = Schema.of(integerField);
    table = bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
  }
  System.out.println("Loading data into table " + tableId);
  Job loadJob = table.load(FormatOptions.csv(), "gs://bucket/path");
  loadJob = loadJob.waitFor();
  if (loadJob.getStatus().getError() != null) {
    System.out.println("Job completed with errors");
  } else {
    System.out.println("Job succeeded");
  }
}

Example #7

Source File: BigQueryTemplateIntegrationTests.java From spring-cloud-gcp with Apache License 2.0

6 votes

@Test
public void testLoadBytes() throws ExecutionException, InterruptedException {
	byte[] byteArray =
			"CountyId,State,County\n1001,Alabama,Autauga County\n".getBytes();
	ByteArrayInputStream byteStream = new ByteArrayInputStream(byteArray);

	ListenableFuture<Job> bigQueryJobFuture =
			bigQueryTemplate.writeDataToTable(TABLE_NAME, byteStream, FormatOptions.csv());

	Job job = bigQueryJobFuture.get();
	assertThat(job.getStatus().getState()).isEqualTo(JobStatus.State.DONE);

	QueryJobConfiguration queryJobConfiguration = QueryJobConfiguration
			.newBuilder("SELECT * FROM test_dataset.template_test_table").build();
	TableResult result = this.bigQuery.query(queryJobConfiguration);

	assertThat(result.getTotalRows()).isEqualTo(1);
	assertThat(
			result.getValues().iterator().next().get("State").getStringValue()).isEqualTo("Alabama");
}

Example #8

Source File: BigQueryFileMessageHandlerIntegrationTests.java From spring-cloud-gcp with Apache License 2.0

6 votes

@Test
public void testLoadFile_cancel() {
	HashMap<String, Object> messageHeaders = new HashMap<>();
	messageHeaders.put(BigQuerySpringMessageHeaders.TABLE_NAME, TABLE_NAME);
	messageHeaders.put(BigQuerySpringMessageHeaders.FORMAT_OPTIONS, FormatOptions.csv());

	Message<File> message = MessageBuilder.createMessage(
			new File("src/test/resources/data.csv"),
			new MessageHeaders(messageHeaders));

	ListenableFuture<Job> jobFuture =
			(ListenableFuture<Job>) this.messageHandler.handleRequestMessage(message);
	jobFuture.cancel(true);

	await().atMost(10, TimeUnit.SECONDS).untilAsserted(() -> {
		// This asserts that the BigQuery job polling task is no longer in the scheduler after cancel.
		assertThat(this.taskScheduler.getScheduledThreadPoolExecutor().getQueue()).hasSize(0);
	});
}

Example #9

Source File: BigQueryFileMessageHandlerIntegrationTests.java From spring-cloud-gcp with Apache License 2.0

6 votes

@Test
public void testLoadFile_sync() throws InterruptedException {
	this.messageHandler.setSync(true);

	HashMap<String, Object> messageHeaders = new HashMap<>();
	messageHeaders.put(BigQuerySpringMessageHeaders.TABLE_NAME, TABLE_NAME);
	messageHeaders.put(BigQuerySpringMessageHeaders.FORMAT_OPTIONS, FormatOptions.csv());

	Message<File> message = MessageBuilder.createMessage(
			new File("src/test/resources/data.csv"),
			new MessageHeaders(messageHeaders));

	Job job = (Job) this.messageHandler.handleRequestMessage(message);
	assertThat(job).isNotNull();

	QueryJobConfiguration queryJobConfiguration = QueryJobConfiguration
			.newBuilder("SELECT * FROM test_dataset.test_table").build();
	TableResult result = this.bigquery.query(queryJobConfiguration);
	assertThat(result.getTotalRows()).isEqualTo(1);
}

Example #10

Source File: BigQueryFileMessageHandlerIntegrationTests.java From spring-cloud-gcp with Apache License 2.0

5 votes

@Test
public void testLoadFile() throws InterruptedException, ExecutionException {
	HashMap<String, Object> messageHeaders = new HashMap<>();
	this.messageHandler.setTableName(TABLE_NAME);
	this.messageHandler.setFormatOptions(FormatOptions.csv());

	Message<File> message = MessageBuilder.createMessage(
			new File("src/test/resources/data.csv"),
			new MessageHeaders(messageHeaders));

	ListenableFuture<Job> jobFuture =
			(ListenableFuture<Job>) this.messageHandler.handleRequestMessage(message);

	// Assert that a BigQuery polling task is scheduled successfully.
	await().atMost(Duration.FIVE_SECONDS)
			.untilAsserted(
					() -> assertThat(
							this.taskScheduler.getScheduledThreadPoolExecutor().getQueue()).hasSize(1));
	jobFuture.get();

	QueryJobConfiguration queryJobConfiguration = QueryJobConfiguration
			.newBuilder("SELECT * FROM test_dataset.test_table").build();
	TableResult result = this.bigquery.query(queryJobConfiguration);

	assertThat(result.getTotalRows()).isEqualTo(1);
	assertThat(
			result.getValues().iterator().next().get("State").getStringValue()).isEqualTo("Alabama");

	// This asserts that the BigQuery job polling task is no longer in the scheduler.
	assertThat(this.taskScheduler.getScheduledThreadPoolExecutor().getQueue()).hasSize(0);
}

Example #11

Source File: BigQueryTemplate.java From spring-cloud-gcp with Apache License 2.0

5 votes

@Override
public ListenableFuture<Job> writeDataToTable(
		String tableName, InputStream inputStream, FormatOptions dataFormatOptions) {
	TableId tableId = TableId.of(datasetName, tableName);

	WriteChannelConfiguration writeChannelConfiguration = WriteChannelConfiguration
			.newBuilder(tableId)
			.setFormatOptions(dataFormatOptions)
			.setAutodetect(this.autoDetectSchema)
			.setWriteDisposition(this.writeDisposition)
			.build();

	TableDataWriteChannel writer = bigQuery.writer(writeChannelConfiguration);

	try (OutputStream sink = Channels.newOutputStream(writer)) {
		// Write data from data input file to BigQuery
		StreamUtils.copy(inputStream, sink);
	}
	catch (IOException e) {
		throw new BigQueryException("Failed to write data to BigQuery tables.", e);
	}

	if (writer.getJob() == null) {
		throw new BigQueryException(
				"Failed to initialize the BigQuery write job.");
	}

	return createJobFuture(writer.getJob());
}

Example #12

Source File: BigQuerySampleConfiguration.java From spring-cloud-gcp with Apache License 2.0

5 votes

@Bean
@ServiceActivator(inputChannel = "bigQueryWriteDataChannel")
public MessageHandler messageSender(BigQueryTemplate bigQueryTemplate) {
	BigQueryFileMessageHandler messageHandler = new BigQueryFileMessageHandler(bigQueryTemplate);
	messageHandler.setFormatOptions(FormatOptions.csv());
	messageHandler.setOutputChannel(bigQueryJobReplyChannel());
	return messageHandler;
}

Example #13

Source File: TableSnippets.java From google-cloud-java with Apache License 2.0

5 votes

/** Example loading data from a list of Google Cloud Storage files. */
// [TARGET load(FormatOptions, List, JobOption...)]
// [VARIABLE "gs://my_bucket/filename1.csv"]
// [VARIABLE "gs://my_bucket/filename2.csv"]
public Job loadList(String gcsUrl1, String gcsUrl2) {
  // [START ]
  List<String> sourceUris = new ArrayList<>();
  sourceUris.add(gcsUrl1);
  sourceUris.add(gcsUrl2);
  Job job = table.load(FormatOptions.csv(), sourceUris);
  // Wait for the job to complete
  try {
    Job completedJob =
        job.waitFor(
            RetryOption.initialRetryDelay(Duration.ofSeconds(1)),
            RetryOption.totalTimeout(Duration.ofMinutes(3)));
    if (completedJob != null && completedJob.getStatus().getError() == null) {
      // Job completed successfully
    } else {
      // Handle error case
    }
  } catch (InterruptedException e) {
    // Handle interrupted wait
  }
  // [END ]
  return job;
}

Example #14

Source File: WebController.java From spring-cloud-gcp with Apache License 2.0

5 votes

/**
 * Handles a file upload using {@link BigQueryTemplate}.
 *
 * @param file the CSV file to upload to BigQuery
 * @param tableName name of the table to load data into
 * @return ModelAndView of the response the send back to users
 *
 * @throws IOException if the file is unable to be loaded.
 */
@PostMapping("/uploadFile")
public ModelAndView handleFileUpload(
		@RequestParam("file") MultipartFile file, @RequestParam("tableName") String tableName)
		throws IOException {

	ListenableFuture<Job> loadJob = this.bigQueryTemplate.writeDataToTable(
			tableName, file.getInputStream(), FormatOptions.csv());

	return getResponse(loadJob, tableName);
}

Example #15

Source File: BigQuery.java From gcp-ingestion with Mozilla Public License 2.0

5 votes

@Override
protected CompletableFuture<Void> close() {
  List<String> sourceUris = sourceBlobIds.stream().map(BlobIdToString::apply)
      .collect(Collectors.toList());
  boolean loadSuccess = false;
  try {
    JobStatus status = bigQuery
        .create(JobInfo.of(LoadJobConfiguration.newBuilder(tableId, sourceUris)
            .setCreateDisposition(JobInfo.CreateDisposition.CREATE_NEVER)
            .setWriteDisposition(JobInfo.WriteDisposition.WRITE_APPEND)
            .setFormatOptions(FormatOptions.json()).setIgnoreUnknownValues(true)
            .setAutodetect(false).setMaxBadRecords(0).build()))
        .waitFor().getStatus();
    if (status.getError() != null) {
      throw new BigQueryErrors(ImmutableList.of(status.getError()));
    } else if (status.getExecutionErrors() != null
        && status.getExecutionErrors().size() > 0) {
      throw new BigQueryErrors(status.getExecutionErrors());
    }
    loadSuccess = true;
    return CompletableFuture.completedFuture(null);
  } catch (InterruptedException e) {
    throw new RuntimeException(e);
  } finally {
    if (delete == Delete.always || (delete == Delete.onSuccess && loadSuccess)) {
      try {
        storage.delete(sourceBlobIds);
      } catch (RuntimeException ignore2) {
        // don't fail batch when delete throws
      }
    }
  }
}

Example #16

Source File: BigQuerySnippets.java From google-cloud-java with Apache License 2.0

5 votes

/** Example of writing a local file to a table. */
// [TARGET writer(WriteChannelConfiguration)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
// [VARIABLE FileSystems.getDefault().getPath(".", "my-data.csv")]
// [VARIABLE "us"]
public long writeFileToTable(String datasetName, String tableName, Path csvPath, String location)
    throws IOException, InterruptedException, TimeoutException {
  // [START bigquery_load_from_file]
  TableId tableId = TableId.of(datasetName, tableName);
  WriteChannelConfiguration writeChannelConfiguration =
      WriteChannelConfiguration.newBuilder(tableId).setFormatOptions(FormatOptions.csv()).build();
  // Generally, location can be inferred based on the location of the referenced dataset.
  // However,
  // it can also be set explicitly to force job execution to be routed to a specific processing
  // location.  See https://cloud.google.com/bigquery/docs/locations for more info.
  JobId jobId = JobId.newBuilder().setLocation(location).build();
  TableDataWriteChannel writer = bigquery.writer(jobId, writeChannelConfiguration);
  // Write data to writer
  try (OutputStream stream = Channels.newOutputStream(writer)) {
    Files.copy(csvPath, stream);
  } finally {
    writer.close();
  }
  // Get load job
  Job job = writer.getJob();
  job = job.waitFor();
  LoadStatistics stats = job.getStatistics();
  return stats.getOutputRows();
  // [END bigquery_load_from_file]
}

Example #17

Source File: BigQuerySnippets.java From google-cloud-java with Apache License 2.0

5 votes

/** Example of loading a newline-delimited-json file with textual fields from GCS to a table. */
// [TARGET create(JobInfo, JobOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
public Long writeRemoteFileToTable(String datasetName, String tableName)
    throws InterruptedException {
  // [START bigquery_load_table_gcs_json]
  String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
  TableId tableId = TableId.of(datasetName, tableName);
  // Table field definition
  Field[] fields =
      new Field[] {
        Field.of("name", LegacySQLTypeName.STRING),
        Field.of("post_abbr", LegacySQLTypeName.STRING)
      };
  // Table schema definition
  Schema schema = Schema.of(fields);
  LoadJobConfiguration configuration =
      LoadJobConfiguration.builder(tableId, sourceUri)
          .setFormatOptions(FormatOptions.json())
          .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .setSchema(schema)
          .build();
  // Load the table
  Job loadJob = bigquery.create(JobInfo.of(configuration));
  loadJob = loadJob.waitFor();
  // Check the table
  System.out.println("State: " + loadJob.getStatus().getState());
  return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
  // [END bigquery_load_table_gcs_json]
}

Example #18

Source File: BigQueryExample.java From google-cloud-java with Apache License 2.0

5 votes

@Override
TableInfo parse(String... args) throws Exception {
  if (args.length >= 5) {
    String dataset = args[0];
    String table = args[1];
    TableId tableId = TableId.of(dataset, table);
    ExternalTableDefinition externalTableDefinition =
        ExternalTableDefinition.of(
            args[args.length - 1],
            parseSchema(args, 3, args.length - 1),
            FormatOptions.of(args[2]));
    return TableInfo.of(tableId, externalTableDefinition);
  }
  throw new IllegalArgumentException("Missing required arguments.");
}

Example #19

Source File: BigQueryExample.java From google-cloud-java with Apache License 2.0

5 votes

@Override
JobInfo parse(String... args) throws Exception {
  if (args.length >= 4) {
    String dataset = args[0];
    String table = args[1];
    String format = args[2];
    TableId tableId = TableId.of(dataset, table);
    LoadJobConfiguration configuration =
        LoadJobConfiguration.of(
            tableId, Arrays.asList(args).subList(3, args.length), FormatOptions.of(format));
    return JobInfo.of(configuration);
  }
  throw new IllegalArgumentException("Missing required arguments.");
}

Example #20

Source File: BigQueryExample.java From google-cloud-java with Apache License 2.0

5 votes

@Override
Tuple<WriteChannelConfiguration, String> parse(String... args) throws Exception {
  if (args.length == 4) {
    String dataset = args[0];
    String table = args[1];
    String format = args[2];
    TableId tableId = TableId.of(dataset, table);
    WriteChannelConfiguration configuration =
        WriteChannelConfiguration.of(tableId, FormatOptions.of(format));
    return Tuple.of(configuration, args[3]);
  }
  throw new IllegalArgumentException("Missing required arguments.");
}

Example #21

Source File: SparkBigQueryConfig.java From spark-bigquery-connector with Apache License 2.0

4 votes

public static SparkBigQueryConfig from(
        DataSourceOptions options,
        ImmutableMap<String, String> globalOptions,
        Configuration hadoopConfiguration,
        int defaultParallelism) {
    SparkBigQueryConfig config = new SparkBigQueryConfig();

    String tableParam = getRequiredOption(options, "table");
    Optional<String> datasetParam = getOption(options, "dataset");
    Optional<String> projectParam = firstPresent(getOption(options, "project"),
            Optional.ofNullable(hadoopConfiguration.get(GCS_CONFIG_PROJECT_ID_PROPERTY)));
    config.tableId = parseTableId(tableParam, datasetParam, projectParam);
    config.parentProjectId = getAnyOption(globalOptions, options, "parentProject");
    config.credentialsKey = getAnyOption(globalOptions, options, "credentials");
    config.credentialsFile = firstPresent(getAnyOption(globalOptions, options, "credentialsFile"),
            Optional.ofNullable(hadoopConfiguration.get(GCS_CONFIG_CREDENTIALS_FILE_PROPERTY)));
    config.accessToken = getAnyOption(globalOptions, options, "gcpAccessToken");
    config.filter = getOption(options, "filter");
    config.maxParallelism = toOptionalInt(getOptionFromMultipleParams(
            options, ImmutableList.of("maxParallelism", "parallelism"), DEFAULT_FALLBACK)
            .map(Integer::valueOf));
    config.defaultParallelism = defaultParallelism;
    config.temporaryGcsBucket = getAnyOption(globalOptions, options, "temporaryGcsBucket");
    config.intermediateFormat = getAnyOption(globalOptions, options, INTERMEDIATE_FORMAT_OPTION)
            .map(String::toUpperCase)
            .map(FormatOptions::of)
            .orElse(DEFAULT_INTERMEDIATE_FORMAT);
    if (!PERMITTED_INTERMEDIATE_FORMATS.contains(config.intermediateFormat)) {
        throw new IllegalArgumentException(
                format("Intermediate format '%s' is not supported. Supported formats are %s",
                        config.intermediateFormat.getType(), PERMITTED_INTERMEDIATE_FORMATS.stream().map(FormatOptions::getType).collect(joining(","))
                ));
    }
    String readDataFormatParam = getAnyOption(globalOptions, options, READ_DATA_FORMAT_OPTION)
            .map(String::toUpperCase)
            .orElse(DEFAULT_READ_DATA_FORMAT.toString());
    if (!PERMITTED_READ_DATA_FORMATS.contains(readDataFormatParam)) {
        throw new IllegalArgumentException(
                format("Data read format '%s' is not supported. Supported formats are '%s'", readDataFormatParam, String.join(",", PERMITTED_READ_DATA_FORMATS))
        );
    }
    config.readDataFormat = DataFormat.valueOf(readDataFormatParam);
    config.combinePushedDownFilters = getAnyBooleanOption(
            globalOptions, options, "combinePushedDownFilters", true);
    config.viewsEnabled = getAnyBooleanOption(
            globalOptions, options, VIEWS_ENABLED_OPTION, false);
    config.materializationProject =
            getAnyOption(globalOptions, options,
                    ImmutableList.of("materializationProject", "viewMaterializationProject"));
    config.materializationDataset =
            getAnyOption(globalOptions, options,
                    ImmutableList.of("materializationDataset", "viewMaterializationDataset"));

    config.partitionField = getOption(options, "partitionField");
    config.partitionExpirationMs = toOptionalLong(getOption(options, "partitionExpirationMs").map(Long::valueOf));
    config.partitionRequireFilter = getOption(options, "partitionRequireFilter").map(Boolean::valueOf);
    config.partitionType = getOption(options, "partitionType");
    config.clusteredFields = getOption(options, "clusteredFields").map(s -> s.split(","));

    config.createDisposition = getOption(options, "createDisposition")
            .map(String::toUpperCase)
            .map(JobInfo.CreateDisposition::valueOf);

    config.optimizedEmptyProjection = getAnyBooleanOption(
            globalOptions, options, "optimizedEmptyProjection", true);

    boolean allowFieldAddition = getAnyBooleanOption(
            globalOptions, options, "allowFieldAddition", false);
    boolean allowFieldRelaxation = getAnyBooleanOption(
            globalOptions, options, "allowFieldRelaxation", false);
    ImmutableList.Builder<JobInfo.SchemaUpdateOption> loadSchemaUpdateOptions = ImmutableList.builder();
    if (allowFieldAddition) {
        loadSchemaUpdateOptions.add(JobInfo.SchemaUpdateOption.ALLOW_FIELD_ADDITION);
    }
    if (allowFieldRelaxation) {
        loadSchemaUpdateOptions.add(JobInfo.SchemaUpdateOption.ALLOW_FIELD_RELAXATION);
    }
    config.loadSchemaUpdateOptions = loadSchemaUpdateOptions.build();

    return config;
}

Example #22

Source File: SparkBigQueryConfigTest.java From spark-bigquery-connector with Apache License 2.0

4 votes

@Test
public void testConfigFromOptions() {
    Configuration hadoopConfiguration  = new Configuration();
    DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder()
            .put("table","test_t")
            .put("dataset","test_d")
            .put("project","test_p")
            .put("filter","test > 0")
            .put("parentProject","test_pp")
            .put("maxParallelism","99")
            .put("viewsEnabled","true")
            .put("viewMaterializationProject","vmp")
            .put("viewMaterializationDataset","vmd")
            .put("readDataFormat","ARROW")
            .put("optimizedEmptyProjection","false")
            .put("createDisposition","CREATE_NEVER")
            .put("temporaryGcsBucket","some_bucket")
            .put("intermediateFormat","ORC")
            .put("partitionRequireFilter", "true")
            .put("partitionField","some_field")
            .put("partitionExpirationMs","999")
            .put("clusteredFields","field1,field2")
            .put("allowFieldAddition","true")
            .put("allowFieldRelaxation","true")
    .build());
    SparkBigQueryConfig config = SparkBigQueryConfig.from(options, ImmutableMap.of(), hadoopConfiguration, 10);
    assertThat(config.getTableId()).isEqualTo(TableId.of("test_p", "test_d", "test_t"));
    assertThat(config.getFilter()).isEqualTo(Optional.of("test > 0"));
    assertThat(config.getSchema()).isEqualTo(Optional.empty());
    assertThat(config.getMaxParallelism()).isEqualTo(OptionalInt.of(99));
    assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.of("some_bucket"));
    assertThat(config.getIntermediateFormat()).isEqualTo(FormatOptions.orc());
    assertThat(config.getReadDataFormat()).isEqualTo(DataFormat.ARROW);
    assertThat(config.getMaterializationProject()).isEqualTo(Optional.of("vmp"));
    assertThat(config.getMaterializationDataset()).isEqualTo(Optional.of("vmd"));
    assertThat(config.getPartitionField()).isEqualTo(Optional.of("some_field"));
    assertThat(config.getPartitionExpirationMs()).isEqualTo(OptionalLong.of(999));
    assertThat(config.getPartitionRequireFilter()).isEqualTo(Optional.of(true));
    assertThat(config.getClusteredFields().get()).isEqualTo(new String[] {"field1", "field2"});
    assertThat(config.getCreateDisposition()).isEqualTo(Optional.of(JobInfo.CreateDisposition.CREATE_NEVER));
    assertThat(config.getLoadSchemaUpdateOptions()).isEqualTo(ImmutableList.of(JobInfo.SchemaUpdateOption.ALLOW_FIELD_ADDITION, JobInfo.SchemaUpdateOption.ALLOW_FIELD_RELAXATION));
    assertThat(config.getViewExpirationTimeInHours()).isEqualTo(24);
    assertThat(config.getMaxReadRowsRetries()).isEqualTo(3);
}

Example #23

Source File: SparkBigQueryConfig.java From spark-bigquery-connector with Apache License 2.0

4 votes

public FormatOptions getIntermediateFormat() {
    return intermediateFormat;
}

Example #24

Source File: BigQueryOperations.java From spring-cloud-gcp with Apache License 2.0

2 votes

/**
 * Writes data to a specified BigQuery table.
 *
 * @param tableName name of the table to write to
 * @param inputStream input stream of the table data to write
 * @param dataFormatOptions the format of the data to write
 * @return {@link ListenableFuture} containing the BigQuery Job indicating completion of
 * operation
 *
 * @throws BigQueryException if errors occur when loading data to the BigQuery table
 */
ListenableFuture<Job> writeDataToTable(
		String tableName, InputStream inputStream, FormatOptions dataFormatOptions);

Example #25

Source File: BigQueryFileMessageHandler.java From spring-cloud-gcp with Apache License 2.0

2 votes

/**
 * Sets the handler's {@link FormatOptions} which describe the type/format of data files being
 * loaded. This overwrites any previous settings made by {@link #setFormatOptionsExpression}.
 * @param formatOptions the format of the data file being loaded
 */
public void setFormatOptions(FormatOptions formatOptions) {
	Assert.notNull(formatOptions, "Format options must not be null.");
	this.formatOptionsExpression = new ValueExpression<>(formatOptions);
}