Java Code Examples for com.google.api.services.bigquery.model.TableSchema

The following examples show how to use com.google.api.services.bigquery.model.TableSchema. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public TableSchema getSchema(String targetTable) {
  Map<String, KV<Schema, Schema>> schemaMap = this.sideInput(schemaMapView);
  KV<Schema, Schema> keyAndValueSchemas = schemaMap.get(targetTable);

  TableFieldSchema rowSchema = new TableFieldSchema()
      .setName("fullRecord")
      .setType("RECORD")
      .setMode("NULLABLE")   // This field is null for deletions
      .setFields(BigQueryUtils.toTableSchema(keyAndValueSchemas.getValue()).getFields());

  TableFieldSchema pkSchema = new TableFieldSchema()
      .setName("primaryKey")
      .setType("RECORD")
      .setFields(BigQueryUtils.toTableSchema(keyAndValueSchemas.getKey()).getFields());

  TableSchema changelogTableSchema = new TableSchema()
      .setFields(Arrays.asList(
          rowSchema,
          pkSchema,
          new TableFieldSchema().setName("operation").setType("STRING"),
          new TableFieldSchema().setName("timestampMs").setType("INT64"),
          new TableFieldSchema().setName("tableName").setType("STRING")));

  return changelogTableSchema;
}
 
Example 2
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {
  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 3
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteEmptyPCollection() throws Exception {
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));

  p.apply(Create.empty(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withTestServices(fakeBqServices)
              .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withSchema(schema)
              .withoutValidation());
  p.run();

  checkNotNull(
      fakeDatasetService.getTable(
          BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id")));
}
 
Example 4
Source Project: feast   Source File: BigQueryFeatureSink.java    License: Apache License 2.0 6 votes vote down vote up
/** @param featureSetSpecs Feature set to be written */
@Override
public PCollection<FeatureSetReference> prepareWrite(
    PCollection<KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec>> featureSetSpecs) {
  PCollection<KV<FeatureSetReference, TableSchema>> schemas =
      featureSetSpecs
          .apply(
              "GenerateTableSchema",
              ParDo.of(
                  new FeatureSetSpecToTableSchema(
                      DatasetId.of(getProjectId(), getDatasetId()), getBQClient())))
          .setCoder(
              KvCoder.of(
                  AvroCoder.of(FeatureSetReference.class),
                  FeatureSetSpecToTableSchema.TableSchemaCoder.of()));

  schemasView =
      schemas
          .apply("ReferenceString", ParDo.of(new ReferenceToString()))
          .apply("View", View.asMultimap());

  return schemas.apply("Ready", Keys.create());
}
 
Example 5
Source Project: feast   Source File: FeatureSetSpecToTableSchema.java    License: Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(
    @Element KV<FeatureSetReference, FeatureSetProto.FeatureSetSpec> element,
    OutputReceiver<KV<FeatureSetReference, TableSchema>> output,
    ProcessContext context) {
  String specKey = element.getKey().getReference();

  Table existingTable = getExistingTable(specKey);
  Schema schema = createSchemaFromSpec(element.getValue(), specKey, existingTable);

  if (existingTable == null) {
    createTable(specKey, schema);
  }

  output.output(KV.of(element.getKey(), serializeSchema(schema)));
}
 
Example 6
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 7
Source Project: beam   Source File: BigQueryUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testToTableSchema_row() {
  TableSchema schema = toTableSchema(ROW_TYPE);

  assertThat(schema.getFields().size(), equalTo(1));
  TableFieldSchema field = schema.getFields().get(0);
  assertThat(field.getName(), equalTo("row"));
  assertThat(field.getType(), equalTo(StandardSQLTypeName.STRUCT.toString()));
  assertThat(field.getMode(), nullValue());
  assertThat(
      field.getFields(),
      containsInAnyOrder(
          ID,
          VALUE,
          NAME,
          TIMESTAMP_VARIANT1,
          TIMESTAMP_VARIANT2,
          TIMESTAMP_VARIANT3,
          TIMESTAMP_VARIANT4,
          VALID,
          BINARY));
}
 
Example 8
/**
 * Setup step {A}
 * Helper method that defines the BigQuery schema used for the output.
 */
private static TableSchema getWebResourceSchema() {
	List<TableFieldSchema> fields = new ArrayList<>();
	fields.add(new TableFieldSchema().setName("WebResourceHash").setType("STRING").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("Url").setType("STRING"));
	fields.add(new TableFieldSchema().setName("PublicationTime").setType("TIMESTAMP").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("PublicationDateId").setType("INTEGER").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("ProcessingTime").setType("TIMESTAMP").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("ProcessingDateId").setType("INTEGER").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("DocumentHash").setType("STRING"));
	fields.add(new TableFieldSchema().setName("DocumentCollectionId").setType("STRING"));
	fields.add(new TableFieldSchema().setName("CollectionItemId").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Title").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Domain").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Author").setType("STRING"));
	fields.add(new TableFieldSchema().setName("ParentWebResourceHash").setType("STRING"));

	TableSchema schema = new TableSchema().setFields(fields);
	return schema;
}
 
Example 9
Source Project: quetzal   Source File: BigQueryLoader.java    License: Eclipse Public License 2.0 6 votes vote down vote up
public static void write(String table, TableSchema schema, PCollection<JSONObject> data) {
	data.apply("convert to TableRow", ParDo.of(new DoFn<JSONObject,TableRow>() {
		private static final long serialVersionUID = -4204128594221801617L;
		@SuppressWarnings("unchecked")
		@ProcessElement
		public void processElement(ProcessContext c) {
			JSONObject obj = c.element();
			TableRow x = new TableRow();
			obj.keySet().forEach((Object key) -> {
				x.set((String) key, obj.get(key));
			});
			c.output(x);
		}
	})).apply(BigQueryIO.Write
			.withTableDescription(table)
			.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
			.withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
			.withSchema(schema)
			.to(table));
}
 
Example 10
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteValidateFailsBothFormatFunctions() {
  p.enableAbandonedNodeEnforcement(false);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(
      "Only one of withFormatFunction or withAvroFormatFunction/withAvroWriter maybe set, not both.");
  p.apply(Create.empty(INPUT_RECORD_CODER))
      .apply(
          BigQueryIO.<InputRecord>write()
              .to("dataset.table")
              .withSchema(new TableSchema())
              .withFormatFunction(r -> new TableRow())
              .withAvroFormatFunction(r -> new GenericData.Record(r.getSchema()))
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED));
}
 
Example 11
Source Project: beam   Source File: FakeJobService.java    License: Apache License 2.0 6 votes vote down vote up
private JobStatus runExtractJob(Job job, JobConfigurationExtract extract)
    throws InterruptedException, IOException {
  TableReference sourceTable = extract.getSourceTable();

  List<TableRow> rows =
      datasetService.getAllRows(
          sourceTable.getProjectId(), sourceTable.getDatasetId(), sourceTable.getTableId());
  TableSchema schema = datasetService.getTable(sourceTable).getSchema();
  List<Long> destinationFileCounts = Lists.newArrayList();
  for (String destination : extract.getDestinationUris()) {
    destinationFileCounts.add(writeRows(sourceTable.getTableId(), rows, schema, destination));
  }
  job.setStatistics(
      new JobStatistics()
          .setExtract(new JobStatistics4().setDestinationUriFileCounts(destinationFileCounts)));
  return new JobStatus().setState("DONE");
}
 
Example 12
Source Project: beam   Source File: BigQueryIOIT.java    License: Apache License 2.0 6 votes vote down vote up
private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
  Pipeline pipeline = Pipeline.create(options);

  BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
  pipeline
      .apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions)))
      .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName)))
      .apply("Map records", ParDo.of(new MapKVToV()))
      .apply(
          "Write to BQ",
          writeIO
              .to(tableQualifier)
              .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot))
              .withMethod(method)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          Collections.singletonList(
                              new TableFieldSchema().setName("data").setType("BYTES")))));

  PipelineResult pipelineResult = pipeline.run();
  pipelineResult.waitUntilFinish();
  extractAndPublishTime(pipelineResult, metricName);
}
 
Example 13
@Override
public TableSchema getSchema(KV<String, List<String>> destination) {

  TableSchema schema = Util.getSchema(destination.getValue());
  LOG.debug("***Schema {}", schema.toString());
  return schema;
}
 
Example 14
Source Project: dlp-dataflow-deidentification   Source File: Util.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("serial")
public static TableSchema getSchema(List<String> outputHeaders) {
  return new TableSchema()
      .setFields(
          new ArrayList<TableFieldSchema>() {

            {
              outputHeaders.forEach(
                  header -> {
                    add(new TableFieldSchema().setName(header).setType("STRING"));
                  });
            }
          });
}
 
Example 15
Source Project: dlp-dataflow-deidentification   Source File: UtilTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetSchema() {
  List<String> schema = new ArrayList<String>();
  schema.add("Column1");
  schema.add("Column2");

  TableSchema result = Util.getSchema(schema);

  assertEquals(result.getFields().get(0).getName(), "Column1");
  assertEquals(result.getFields().get(1).getName(), "Column2");
  assertEquals(result.getFields().get(0).getType(), "STRING");
  assertEquals(result.getFields().get(1).getType(), "STRING");
}
 
Example 16
Source Project: beam   Source File: DynamicDestinationsHelpers.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TableSchema getSchema(TableDestination destination) {
  Map<String, String> mapValue = sideInput(schemaView);
  String schema = mapValue.get(destination.getTableSpec());
  checkArgument(
      schema != null,
      "Schema view must contain data for every destination used, "
          + "but view %s does not contain data for table destination %s "
          + "produced by %s",
      schemaView,
      destination.getTableSpec(),
      inner);
  return BigQueryHelpers.fromJsonString(schema, TableSchema.class);
}
 
Example 17
Source Project: beam   Source File: BigQueryStorageStreamSource.java    License: Apache License 2.0 5 votes vote down vote up
public static <T> BigQueryStorageStreamSource<T> create(
    ReadSession readSession,
    Stream stream,
    TableSchema tableSchema,
    SerializableFunction<SchemaAndRecord, T> parseFn,
    Coder<T> outputCoder,
    BigQueryServices bqServices) {
  return new BigQueryStorageStreamSource<>(
      readSession,
      stream,
      toJsonString(checkNotNull(tableSchema, "tableSchema")),
      parseFn,
      outputCoder,
      bqServices);
}
 
Example 18
Source Project: deployment-examples   Source File: ExampleUtils.java    License: MIT License 5 votes vote down vote up
private void setupBigQueryTable(
    String projectId, String datasetId, String tableId, TableSchema schema) throws IOException {
  if (bigQueryClient == null) {
    bigQueryClient = newBigQueryClient(options.as(BigQueryOptions.class)).build();
  }

  Datasets datasetService = bigQueryClient.datasets();
  if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
    Dataset newDataset =
        new Dataset()
            .setDatasetReference(
                new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
    datasetService.insert(projectId, newDataset).execute();
  }

  Tables tableService = bigQueryClient.tables();
  Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
  if (table == null) {
    Table newTable =
        new Table()
            .setSchema(schema)
            .setTableReference(
                new TableReference()
                    .setProjectId(projectId)
                    .setDatasetId(datasetId)
                    .setTableId(tableId));
    tableService.insert(projectId, datasetId, newTable).execute();
  } else if (!table.getSchema().equals(schema)) {
    throw new RuntimeException(
        "Table exists and schemas do not match, expecting: "
            + schema.toPrettyString()
            + ", actual: "
            + table.getSchema().toPrettyString());
  }
}
 
Example 19
Source Project: quetzal   Source File: BigQueryLoader.java    License: Eclipse Public License 2.0 5 votes vote down vote up
public static TableSchema predicateTable() {
	TableSchema x = new TableSchema();
	x.setFields(
			Arrays.asList(
					field("pred", "STRING"),
					field("onetoone", "BOOLEAN"),
					field("num_hashes", "INTEGER"),
					field("db2type", "STRING"),
					field("spills", "INTEGER"),
					field("hash0", "INTEGER"), 
					field("hash1", "INTEGER"), 
					field("hash2", "INTEGER")));
	return x;
}
 
Example 20
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildWriteDisplayData() {
  String tableSpec = "project:dataset.table";
  TableSchema schema = new TableSchema().set("col1", "type1").set("col2", "type2");
  final String tblDescription = "foo bar table";

  BigQueryIO.Write<TableRow> write =
      BigQueryIO.writeTableRows()
          .to(tableSpec)
          .withSchema(schema)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
          .withSchemaUpdateOptions(
              EnumSet.of(BigQueryIO.Write.SchemaUpdateOption.ALLOW_FIELD_ADDITION))
          .withTableDescription(tblDescription)
          .withoutValidation();

  DisplayData displayData = DisplayData.from(write);

  assertThat(displayData, hasDisplayItem("table"));
  assertThat(displayData, hasDisplayItem("schema"));
  assertThat(
      displayData,
      hasDisplayItem(
          "createDisposition", BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED.toString()));
  assertThat(
      displayData,
      hasDisplayItem(
          "writeDisposition", BigQueryIO.Write.WriteDisposition.WRITE_APPEND.toString()));
  assertThat(
      displayData,
      hasDisplayItem(
          "schemaUpdateOptions",
          EnumSet.of(BigQueryIO.Write.SchemaUpdateOption.ALLOW_FIELD_ADDITION).toString()));
  assertThat(displayData, hasDisplayItem("tableDescription", tblDescription));
  assertThat(displayData, hasDisplayItem("validation", false));
}
 
Example 21
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
 * unique name column is missing.
 */
@Test
public void testAvroToEntityNoIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Record record =
      generateSingleFieldAvroRecord(
          shortStringField, "string", shortStringFieldDesc, shortStringFieldValue);
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(!outputEntity.hasKey());
}
 
Example 22
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a key when the
 * unique name column exceeds the maximum size allowed of 1500 bytes.
 */
@Test
public void testAvroToEntityTooLongIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, longStringFieldValue);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(!outputEntity.hasKey());
}
 
Example 23
Source Project: beam   Source File: BigQueryIOReadTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEstimatedSizeWithStreamingBuffer() throws Exception {
  List<TableRow> data =
      ImmutableList.of(
          new TableRow().set("name", "a").set("number", 1L),
          new TableRow().set("name", "b").set("number", 2L),
          new TableRow().set("name", "c").set("number", 3L),
          new TableRow().set("name", "d").set("number", 4L),
          new TableRow().set("name", "e").set("number", 5L),
          new TableRow().set("name", "f").set("number", 6L));

  TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
  fakeDatasetService.createDataset("project", "data_set", "", "", null);
  fakeDatasetService.createTable(
      new Table()
          .setTableReference(table)
          .setSchema(
              new TableSchema()
                  .setFields(
                      ImmutableList.of(
                          new TableFieldSchema().setName("name").setType("STRING"),
                          new TableFieldSchema().setName("number").setType("INTEGER"))))
          .setStreamingBuffer(new Streamingbuffer().setEstimatedBytes(BigInteger.valueOf(10))));
  fakeDatasetService.insertAll(table, data, null);

  String stepUuid = "testStepUuid";
  BoundedSource<TableRow> bqSource =
      BigQueryTableSourceDef.create(fakeBqServices, ValueProvider.StaticValueProvider.of(table))
          .toSource(stepUuid, TableRowJsonCoder.of(), BigQueryIO.TableRowParser.INSTANCE);

  PipelineOptions options = PipelineOptionsFactory.create();

  // Each row should have 24 bytes (See StringUtf8Coder in detail):
  //   first 1 byte indicating length and following 23 bytes: {"name":"a","number":1}
  // 10 bytes comes from the estimated bytes of the Streamingbuffer
  long expectedSize = 24L * data.size() + 10;
  assertEquals(expectedSize, bqSource.getEstimatedSizeBytes(options));
}
 
Example 24
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a valid key when the
 * unique name column is string.
 */
@Test
public void testAvroToEntityStringIdColumn() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "string", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, idFieldValueStr);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  assertTrue(outputEntity.hasKey());
  assertEquals(idFieldValueStr, outputEntity.getKey().getPath(0).getName());
  validateMetadata(outputEntity);
}
 
Example 25
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity with a default namespace
 * when the namespace is not specified.
 */
@Test
public void testAvroToEntityDefaultNamespace() throws Exception {
  // Create test data
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(new TableFieldSchema().setName(idField).setType("STRING"));
  fields.add(new TableFieldSchema().setName(shortStringField).setType("STRING"));
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Schema avroSchema =
      new Schema.Parser()
          .parse(
              String.format(
                  avroSchemaTemplate,
                  new StringBuilder()
                      .append(String.format(avroFieldTemplate, idField, "int", idFieldDesc))
                      .append(",")
                      .append(generateShortStringField())
                      .toString()));
  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  builder.set(idField, 1);
  builder.set(shortStringField, shortStringFieldValue);
  Record record = builder.build();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  AvroToEntity noNamespaceConverter =
      AvroToEntity.newBuilder()
          .setEntityKind(entityKind)
          .setUniqueNameColumn(uniqueNameColumn)
          .build();
  Entity outputEntity = noNamespaceConverter.apply(inputBqData);
  // Assess results
  assertTrue(outputEntity.hasKey());
  assertEquals("", outputEntity.getKey().getPartitionId().getNamespaceId());
}
 
Example 26
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteWithoutInsertId() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", 1);
  TableRow row2 = new TableRow().set("name", "b").set("number", 2);
  TableRow row3 = new TableRow().set("name", "c").set("number", 3);
  p.apply(Create.of(row1, row2, row3).withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("name").setType("STRING"),
                              new TableFieldSchema().setName("number").setType("INTEGER"))))
              .withTestServices(fakeBqServices)
              .ignoreInsertIds()
              .withoutValidation());
  p.run();
  assertThat(
      fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
      containsInAnyOrder(row1, row2, row3));
  // Verify no insert id is added.
  assertThat(
      fakeDatasetService.getAllIds("project-id", "dataset-id", "table-id"), containsInAnyOrder());
}
 
Example 27
Source Project: quetzal   Source File: BigQueryLoader.java    License: Eclipse Public License 2.0 5 votes vote down vote up
public static TableSchema datatypeTable() {
	TableSchema x = new TableSchema();
	x.setFields(
			Arrays.asList(
					field("datatype_name", "STRING"),
					field("datatype_id", "INTEGER")));
	return x;
}
 
Example 28
Source Project: beam   Source File: BigQuerySourceBase.java    License: Apache License 2.0 5 votes vote down vote up
protected ExtractResult extractFiles(PipelineOptions options) throws Exception {
  BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
  TableReference tableToExtract = getTableToExtract(bqOptions);
  BigQueryServices.DatasetService datasetService = bqServices.getDatasetService(bqOptions);
  Table table = datasetService.getTable(tableToExtract);
  if (table == null) {
    throw new IOException(
        String.format(
            "Cannot start an export job since table %s does not exist",
            BigQueryHelpers.toTableSpec(tableToExtract)));
  }

  TableSchema schema = table.getSchema();
  JobService jobService = bqServices.getJobService(bqOptions);
  String extractJobId = getExtractJobId(createJobIdToken(options.getJobName(), stepUuid));
  final String extractDestinationDir =
      resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid);
  String bqLocation =
      BigQueryHelpers.getDatasetLocation(
          datasetService, tableToExtract.getProjectId(), tableToExtract.getDatasetId());
  List<ResourceId> tempFiles =
      executeExtract(
          extractJobId,
          tableToExtract,
          jobService,
          bqOptions.getProject(),
          extractDestinationDir,
          bqLocation);
  return new ExtractResult(schema, tempFiles);
}
 
Example 29
Source Project: hadoop-connectors   Source File: BigQueryTableHelper.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates {@link TableSchema} from the JSON representation of the table fields.
 *
 * @param fieldsJson JSON fields to convert to {@link TableSchema}
 * @return {@link TableSchema}
 * @throws IOException
 */
static TableSchema createTableSchemaFromFields(String fieldsJson) throws IOException {
  List<TableFieldSchema> fields = new ArrayList<>();
  JsonParser parser = JacksonFactory.getDefaultInstance().createJsonParser(fieldsJson);
  parser.parseArrayAndClose(fields, TableFieldSchema.class);

  return new TableSchema().setFields(fields);
}
 
Example 30
Source Project: beam   Source File: BigQuerySchemaUpdateOptionsIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAllowFieldAddition() throws Exception {
  String tableName = makeTestTable();

  Set<SchemaUpdateOption> schemaUpdateOptions =
      EnumSet.of(BigQueryIO.Write.SchemaUpdateOption.ALLOW_FIELD_ADDITION);

  TableSchema newSchema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema().setName("new_field").setType("STRING"),
                  new TableFieldSchema().setName("optional_field").setType("STRING"),
                  new TableFieldSchema()
                      .setName("required_field")
                      .setType("STRING")
                      .setMode("REQUIRED")));

  String[] values = {"meow", "bark"};
  TableRow rowToInsert =
      new TableRow().set("new_field", values[0]).set("required_field", values[1]);

  String testQuery =
      String.format(
          "SELECT new_field, required_field FROM [%s.%s];", BIG_QUERY_DATASET_ID, tableName);

  List<List<String>> expectedResult = Arrays.asList(Arrays.asList(values));
  runWriteTest(schemaUpdateOptions, tableName, newSchema, rowToInsert, testQuery, expectedResult);
}