Java Code Examples for com.google.api.services.bigquery.model.TableFieldSchema

The following examples show how to use com.google.api.services.bigquery.model.TableFieldSchema. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: BigQueryUtils.java    License: Apache License 2.0 7 votes vote down vote up
private static Schema fromTableFieldSchema(List<TableFieldSchema> tableFieldSchemas) {
  Schema.Builder schemaBuilder = Schema.builder();
  for (TableFieldSchema tableFieldSchema : tableFieldSchemas) {
    FieldType fieldType =
        fromTableFieldSchemaType(tableFieldSchema.getType(), tableFieldSchema.getFields());

    Optional<Mode> fieldMode = Optional.ofNullable(tableFieldSchema.getMode()).map(Mode::valueOf);
    if (fieldMode.filter(m -> m == Mode.REPEATED).isPresent()) {
      fieldType = FieldType.array(fieldType);
    }

    // if the mode is not defined or if it is set to NULLABLE, then the field is nullable
    boolean nullable =
        !fieldMode.isPresent() || fieldMode.filter(m -> m == Mode.NULLABLE).isPresent();
    Field field = Field.of(tableFieldSchema.getName(), fieldType).withNullable(nullable);
    if (tableFieldSchema.getDescription() != null
        && !"".equals(tableFieldSchema.getDescription())) {
      field = field.withDescription(tableFieldSchema.getDescription());
    }
    schemaBuilder.addField(field);
  }
  return schemaBuilder.build();
}
 
Example 2
@Override
public TableSchema getSchema(String targetTable) {
  Map<String, KV<Schema, Schema>> schemaMap = this.sideInput(schemaMapView);
  KV<Schema, Schema> keyAndValueSchemas = schemaMap.get(targetTable);

  TableFieldSchema rowSchema = new TableFieldSchema()
      .setName("fullRecord")
      .setType("RECORD")
      .setMode("NULLABLE")   // This field is null for deletions
      .setFields(BigQueryUtils.toTableSchema(keyAndValueSchemas.getValue()).getFields());

  TableFieldSchema pkSchema = new TableFieldSchema()
      .setName("primaryKey")
      .setType("RECORD")
      .setFields(BigQueryUtils.toTableSchema(keyAndValueSchemas.getKey()).getFields());

  TableSchema changelogTableSchema = new TableSchema()
      .setFields(Arrays.asList(
          rowSchema,
          pkSchema,
          new TableFieldSchema().setName("operation").setType("STRING"),
          new TableFieldSchema().setName("timestampMs").setType("INT64"),
          new TableFieldSchema().setName("tableName").setType("STRING")));

  return changelogTableSchema;
}
 
Example 3
Source Project: beam   Source File: BigQuerySchemaUpdateOptionsIT.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAllowFieldRelaxation() throws Exception {
  String tableName = makeTestTable();

  Set<SchemaUpdateOption> schemaUpdateOptions =
      EnumSet.of(BigQueryIO.Write.SchemaUpdateOption.ALLOW_FIELD_RELAXATION);

  TableSchema newSchema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema().setName("optional_field").setType("STRING")));

  String value = "hellooo";
  TableRow rowToInsert = new TableRow().set("optional_field", value);

  String testQuery =
      String.format("SELECT optional_field FROM [%s.%s];", BIG_QUERY_DATASET_ID, tableName);

  List<List<String>> expectedResult = Arrays.asList(Arrays.asList(value));
  runWriteTest(schemaUpdateOptions, tableName, newSchema, rowToInsert, testQuery, expectedResult);
}
 
Example 4
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 5
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 6
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteEmptyPCollection() throws Exception {
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));

  p.apply(Create.empty(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withTestServices(fakeBqServices)
              .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withSchema(schema)
              .withoutValidation());
  p.run();

  checkNotNull(
      fakeDatasetService.getTable(
          BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id")));
}
 
Example 7
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that BigQueryConverters.validateKeyColumn() throws IllegalArgumentException when the
 * BigQuery column NULL.
 */
@Test
public void testValidateKeyColumnNull() {
  TableFieldSchema column = new TableFieldSchema().setName(nullField).setType("STRING");
  Record record = generateSingleFieldAvroRecord(nullField, "null", nullFieldDesc, null);
  boolean isThrown = false;
  String message = null;
  try {
    BigQueryConverters.validateKeyColumn(column, record.get(nullField));
  } catch (IllegalArgumentException e) {
    isThrown = true;
    message = e.getMessage();
  }
  assertTrue(isThrown);
  assertTrue(message != null);
  assertEquals(
      message,
      String.format("Column [%s] with NULL value cannot be set as Entity name.", nullField));
}
 
Example 8
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that BigQueryConverters.validateKeyColumn() throws IllegalArgumentException when the
 * BigQuery column is a STRING exceeding 1500 bytes.
 */
@Test
public void testValidateKeyColumnStringLong() {
  TableFieldSchema column = new TableFieldSchema().setName(longStringField).setType("STRING");
  Record record =
      generateSingleFieldAvroRecord(
          longStringField, "string", longStringFieldDesc, longStringFieldValue);
  boolean isThrown = false;
  String message = null;
  try {
    BigQueryConverters.validateKeyColumn(column, record.get(longStringField));
  } catch (IllegalArgumentException e) {
    isThrown = true;
    message = e.getMessage();
  }
  assertTrue(isThrown);
  assertTrue(message != null);
  assertEquals(
      message,
      String.format(
          "Column [%s] exceeding %d bytes cannot be set as Entity name.",
          longStringField, BigQueryConverters.MAX_STRING_SIZE_BYTES));
}
 
Example 9
Source Project: hadoop-connectors   Source File: BigQueryUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests getSchemaFromString method of BigQueryUtils for simple schema.
 */
@Test
public void testGetSchemaFromString() {
  // Set fields schema for testing.
  String fields =
      "[{'name': 'MyName', 'type': 'STRING'},"
      + "{'name': 'Number', 'type': 'INTEGER', 'mode': 'sample'}]";
  List<TableFieldSchema> list = BigQueryUtils.getSchemaFromString(fields);
  assertThat(list).hasSize(2);
  assertThat(list.get(0).getName()).isEqualTo("MyName");
  assertThat(list.get(0).getType()).isEqualTo("STRING");

  assertThat(list.get(1).getName()).isEqualTo("Number");
  assertThat(list.get(1).getType()).isEqualTo("INTEGER");
  assertThat(list.get(1).getMode()).isEqualTo("sample");
}
 
Example 10
Source Project: nomulus   Source File: CheckedBigqueryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void before() throws Exception {
  when(bigquery.datasets()).thenReturn(bigqueryDatasets);
  when(bigqueryDatasets.insert(eq("Project-Id"), any(Dataset.class)))
      .thenReturn(bigqueryDatasetsInsert);
  when(bigquery.tables()).thenReturn(bigqueryTables);
  when(bigqueryTables.insert(eq("Project-Id"), any(String.class), any(Table.class)))
      .thenReturn(bigqueryTablesInsert);
  checkedBigquery = new CheckedBigquery();
  checkedBigquery.bigquery = bigquery;
  checkedBigquery.bigquerySchemas =
      new ImmutableMap.Builder<String, ImmutableList<TableFieldSchema>>()
          .put(
              "Table-Id",
              ImmutableList.of(new TableFieldSchema().setName("column1").setType(STRING.name())))
          .put(
              "Table2",
              ImmutableList.of(new TableFieldSchema().setName("column1").setType(STRING.name())))
          .build();
}
 
Example 11
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that BigQueryConverters.columnToValue() throws IllegalArgumentException when the BigQuery
 * column is an invalid TIMESTAMP.
 */
@Test
public void testColumnToValueTimestampInvalid() {
  TableFieldSchema column =
      new TableFieldSchema().setName(invalidTimestampField).setType("TIMESTAMP");
  Record record =
      generateSingleFieldAvroRecord(
          invalidTimestampField,
          "long",
          invalidTimestampFieldDesc,
          invalidTimestampFieldValueNanos);
  boolean isThrown = false;
  try {
    Value value = BigQueryConverters.columnToValue(column, record.get(invalidTimestampField));
  } catch (IllegalArgumentException e) {
    isThrown = true;
  }
  assertTrue(isThrown);
}
 
Example 12
Source Project: nomulus   Source File: IcannReportingStager.java    License: Apache License 2.0 6 votes vote down vote up
/** Adds a row's values to an existing list of integers (totals). */
private void addToTotal(List<Integer> totals, Map<TableFieldSchema, Object> row) {
  List<Integer> rowVals =
      row.values()
          .stream()
          // Ignore TLD, Registrar name and IANA id
          .skip(3)
          .map((Object o) -> Integer.parseInt(o.toString()))
          .collect(toImmutableList());
  checkState(
      rowVals.size() == totals.size(),
      "Number of elements in totals not equal to number of elements in row!");
  for (int i = 0; i < rowVals.size(); i++) {
    totals.set(i, totals.get(i) + rowVals.get(i));
  }
}
 
Example 13
Source Project: nomulus   Source File: CheckedBigquery.java    License: Apache License 2.0 6 votes vote down vote up
/** Ensures the table exists in Bigquery. */
private void ensureTable(Bigquery bigquery, TableReference table, List<TableFieldSchema> schema)
    throws IOException {
  try {
    bigquery.tables().insert(table.getProjectId(), table.getDatasetId(), new Table()
        .setSchema(new TableSchema().setFields(schema))
        .setTableReference(table))
        .execute();
    logger.atInfo().log(
        "Created BigQuery table %s:%s.%s",
        table.getProjectId(), table.getDatasetId(), table.getTableId());
  } catch (IOException e) {
    // Swallow errors about a table that exists, and throw any other ones.
    if (!BigqueryJobFailureException.create(e).getReason().equals("duplicate")) {
      throw e;
    }
  }
}
 
Example 14
Source Project: zeppelin   Source File: BigQueryInterpreter.java    License: Apache License 2.0 6 votes vote down vote up
public static String printRows(final GetQueryResultsResponse response) {
  StringBuilder msg = new StringBuilder();
  try {
    List<String> schemNames = new ArrayList<String>();
    for (TableFieldSchema schem: response.getSchema().getFields()) {
      schemNames.add(schem.getName());
    }
    msg.append(Joiner.on(TAB).join(schemNames));
    msg.append(NEWLINE);
    for (TableRow row : response.getRows()) {
      List<String> fieldValues = new ArrayList<String>();
      for (TableCell field : row.getF()) {
        fieldValues.add(field.getV().toString());
      }
      msg.append(Joiner.on(TAB).join(fieldValues));
      msg.append(NEWLINE);
    }
    return msg.toString();
  } catch (NullPointerException ex) {
    throw new NullPointerException("SQL Execution returned an error!");
  }
}
 
Example 15
Source Project: beam   Source File: BigQueryServicesImplTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests that table creation succeeds when the table already exists. */
@Test
public void testCreateTableSucceedsAlreadyExists() throws IOException {
  TableReference ref =
      new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema().setName("column1").setType("String"),
                  new TableFieldSchema().setName("column2").setType("Integer")));
  Table testTable = new Table().setTableReference(ref).setSchema(schema);

  when(response.getStatusCode()).thenReturn(409); // 409 means already exists

  BigQueryServicesImpl.DatasetServiceImpl services =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
  Table ret =
      services.tryCreateTable(
          testTable, new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF), Sleeper.DEFAULT);

  assertNull(ret);
  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
}
 
Example 16
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that {@link BigQueryConverters.AvroToEntity} creates an Entity without a valid key when a
 * field is of type Record.
 */
@Test
public void testAvroToEntityRecordField() throws Exception {
  // Create test data
  TableFieldSchema column = generateNestedTableFieldSchema();
  List<TableFieldSchema> fields = new ArrayList<>();
  fields.add(column);
  TableSchema bqSchema = new TableSchema().setFields(fields);
  Record record = generateNestedAvroRecord();
  SchemaAndRecord inputBqData = new SchemaAndRecord(record, bqSchema);
  // Run the test
  Entity outputEntity = converter.apply(inputBqData);
  // Assess results
  String expectedCauseMessage = String.format("Column [address] of type [RECORD] not supported.");
  assertTrue(!outputEntity.hasKey());
  assertEquals(
      expectedCauseMessage, outputEntity.getPropertiesMap().get("cause").getStringValue());
  assertEquals(record.toString(), outputEntity.getPropertiesMap().get("row").getStringValue());
}
 
Example 17
/**
 * Setup step {A}
 * Helper method that defines the BigQuery schema used for the output.
 */
private static TableSchema getWebResourceSchema() {
	List<TableFieldSchema> fields = new ArrayList<>();
	fields.add(new TableFieldSchema().setName("WebResourceHash").setType("STRING").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("Url").setType("STRING"));
	fields.add(new TableFieldSchema().setName("PublicationTime").setType("TIMESTAMP").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("PublicationDateId").setType("INTEGER").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("ProcessingTime").setType("TIMESTAMP").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("ProcessingDateId").setType("INTEGER").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("DocumentHash").setType("STRING"));
	fields.add(new TableFieldSchema().setName("DocumentCollectionId").setType("STRING"));
	fields.add(new TableFieldSchema().setName("CollectionItemId").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Title").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Domain").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Author").setType("STRING"));
	fields.add(new TableFieldSchema().setName("ParentWebResourceHash").setType("STRING"));

	TableSchema schema = new TableSchema().setFields(fields);
	return schema;
}
 
Example 18
Source Project: dataflow-opinion-analysis   Source File: IndexerPipeline.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Setup step {A}
 * Helper method that defines the BigQuery schema used for the output.
 */
private static TableSchema getWebResourceSchema() {
	List<TableFieldSchema> fields = new ArrayList<>();
	fields.add(new TableFieldSchema().setName("WebResourceHash").setType("STRING").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("Url").setType("STRING"));
	fields.add(new TableFieldSchema().setName("PublicationTime").setType("TIMESTAMP").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("PublicationDateId").setType("INTEGER").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("ProcessingTime").setType("TIMESTAMP").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("ProcessingDateId").setType("INTEGER").setMode("REQUIRED"));
	fields.add(new TableFieldSchema().setName("DocumentHash").setType("STRING"));
	fields.add(new TableFieldSchema().setName("DocumentCollectionId").setType("STRING"));
	fields.add(new TableFieldSchema().setName("CollectionItemId").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Title").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Domain").setType("STRING"));
	fields.add(new TableFieldSchema().setName("Author").setType("STRING"));
	fields.add(new TableFieldSchema().setName("ParentWebResourceHash").setType("STRING"));
	fields.add(new TableFieldSchema().setName("MetaFields").setType("STRING").setMode("REPEATED"));

	TableSchema schema = new TableSchema().setFields(fields);
	return schema;
}
 
Example 19
Source Project: nomulus   Source File: IcannReportingStager.java    License: Apache License 2.0 6 votes vote down vote up
/** Creates and stores activity reports on GCS, returns a list of files stored. */
private ImmutableList<String> stageActivityReports(
    YearMonth yearMonth,
    String subdir,
    String headerRow,
    ImmutableCollection<Map<TableFieldSchema, Object>> rows)
    throws IOException {
  ImmutableList.Builder<String> manifestBuilder = new ImmutableList.Builder<>();
  // Create a report csv for each tld from query table, and upload to GCS
  for (Map<TableFieldSchema, Object> row : rows) {
    // Get the tld (first cell in each row)
    String tld = row.values().iterator().next().toString();
    if (isNullOrEmpty(tld)) {
      throw new RuntimeException("Found an empty row in the activity report table!");
    }
    ImmutableList<String> rowStrings = ImmutableList.of(constructRow(row.values()));
    // Create and upload the activity report with a single row
    manifestBuilder.add(
        saveReportToGcs(
            tld, yearMonth, subdir, createReport(headerRow, rowStrings), ReportType.ACTIVITY));
  }
  return manifestBuilder.build();
}
 
Example 20
Source Project: beam   Source File: BigQueryAvroUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static TableRow convertGenericRecordToTableRow(
    GenericRecord record, List<TableFieldSchema> fields) {
  TableRow row = new TableRow();
  for (TableFieldSchema subSchema : fields) {
    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the name field
    // is required, so it may not be null.
    Field field = record.getSchema().getField(subSchema.getName());
    Object convertedValue =
        getTypedCellValue(field.schema(), subSchema, record.get(field.name()));
    if (convertedValue != null) {
      // To match the JSON files exported by BigQuery, do not include null values in the output.
      row.set(field.name(), convertedValue);
    }
  }

  return row;
}
 
Example 21
Source Project: gcp-ingestion   Source File: Json.java    License: Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Read a {@link Schema} from a byte array.
 *
 * <p>{@link Schema} does not natively support Jackson deserialization, so we rely on a
 * roundabout method inspired by https://github.com/googleapis/google-cloud-java/issues/2753.
 *
 * @exception IOException if {@code data} does not contain a valid {@link Schema}.
 */
public static Schema readBigQuerySchema(byte[] data) throws IOException {
  List<TableFieldSchema> fieldsList = (List<TableFieldSchema>) JSON_FACTORY //
      .createJsonParser(new String(data, Charsets.UTF_8)) //
      .parseArray(ArrayList.class, TableFieldSchema.class);
  TableSchema tableSchema = new TableSchema().setFields(fieldsList);

  try {
    return (Schema) SCHEMA_FROM_PB.invoke(null, tableSchema);
  } catch (IllegalAccessException | InvocationTargetException e) {
    throw new RuntimeException(e);
  }
}
 
Example 22
Source Project: feast   Source File: FeatureSetSpecToTableSchema.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convert Table schema into json-like object (prepared for serialization)
 *
 * @param schema bq table schema
 * @return json-like schema
 */
private TableSchema serializeSchema(Schema schema) {
  TableSchema tableSchema = new TableSchema();
  FieldList fields = schema.getFields();
  List<TableFieldSchema> tableFieldSchemas =
      fields.stream()
          .map(
              field -> {
                TableFieldSchema f =
                    new TableFieldSchema()
                        .setName(field.getName())
                        .setType(field.getType().name());

                if (field.getMode() != null) {
                  f.setMode(field.getMode().name());
                }

                if (field.getDescription() != null) {
                  f.setDescription(field.getDescription());
                }
                return f;
              })
          .collect(Collectors.toList());

  tableSchema.setFields(tableFieldSchemas);
  return tableSchema;
}
 
Example 23
Source Project: beam   Source File: BigQueryUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static List<TableFieldSchema> toTableFieldSchema(Schema schema) {
  List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount());
  for (Field schemaField : schema.getFields()) {
    FieldType type = schemaField.getType();

    TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName());
    if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) {
      field.setDescription(schemaField.getDescription());
    }

    if (!schemaField.getType().getNullable()) {
      field.setMode(Mode.REQUIRED.toString());
    }
    if (type.getTypeName().isCollectionType()) {
      type = type.getCollectionElementType();
      if (type.getTypeName().isCollectionType() || type.getTypeName().isMapType()) {
        throw new IllegalArgumentException("Array of collection is not supported in BigQuery.");
      }
      field.setMode(Mode.REPEATED.toString());
    }
    if (TypeName.ROW == type.getTypeName()) {
      Schema subType = type.getRowSchema();
      field.setFields(toTableFieldSchema(subType));
    }
    if (TypeName.MAP == type.getTypeName()) {
      throw new IllegalArgumentException("Maps are not supported in BigQuery.");
    }
    field.setType(toStandardSQLTypeName(type).toString());

    fields.add(field);
  }
  return fields;
}
 
Example 24
Source Project: DataflowTemplates   Source File: BigQueryToTFRecord.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * The {@link BigQueryToTFRecord#record2Example(SchemaAndRecord)} method uses takes in a
 * SchemaAndRecord Object returned from a BigQueryIO.read() step and builds a TensorFlow Example
 * from the record.
 */
@VisibleForTesting
protected static byte[] record2Example(SchemaAndRecord schemaAndRecord) {
  Example.Builder example = Example.newBuilder();
  Features.Builder features = example.getFeaturesBuilder();
  GenericRecord record = schemaAndRecord.getRecord();
  for (TableFieldSchema field : schemaAndRecord.getTableSchema().getFields()) {
    Feature feature = buildFeature(record.get(field.getName()), field.getType());
    features.putFeature(field.getName(), feature);
  }
  return example.build().toByteArray();
}
 
Example 25
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 5 votes vote down vote up
void schemaUpdateOptionsTest(
    BigQueryIO.Write.Method insertMethod, Set<SchemaUpdateOption> schemaUpdateOptions)
    throws Exception {
  TableRow row = new TableRow().set("date", "2019-01-01").set("number", "1");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema()
                      .setName("date")
                      .setType("DATE")
                      .setName("number")
                      .setType("INTEGER")));

  Write<TableRow> writeTransform =
      BigQueryIO.writeTableRows()
          .to("project-id:dataset-id.table-id")
          .withTestServices(fakeBqServices)
          .withMethod(insertMethod)
          .withSchema(schema)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
          .withSchemaUpdateOptions(schemaUpdateOptions);

  p.apply(Create.<TableRow>of(row)).apply(writeTransform);
  p.run();

  List<String> expectedOptions =
      schemaUpdateOptions.stream().map(Enum::name).collect(Collectors.toList());

  for (Job job : fakeJobService.getAllJobs()) {
    JobConfigurationLoad configuration = job.getConfiguration().getLoad();
    assertEquals(expectedOptions, configuration.getSchemaUpdateOptions());
  }
}
 
Example 26
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that BigQueryConverters.columnToValue() returns an indexed String {@link Value} when the
 * BigQuery column is a STRING of less than 1500 bytes.
 */
@Test
public void testColumnToValueStringShort() {
  TableFieldSchema column = new TableFieldSchema().setName(shortStringField).setType("STRING");
  Record record =
      generateSingleFieldAvroRecord(
          shortStringField, "string", shortStringFieldDesc, shortStringFieldValue);
  Value value = BigQueryConverters.columnToValue(column, record.get(shortStringField));
  assertEquals(shortStringFieldValue, value.getStringValue());
  assertFalse(value.getExcludeFromIndexes());
}
 
Example 27
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that BigQueryConverters.columnToValue() returns a non-indexed String {@link Value} when
 * the BigQuery column is a STRING longer than 1500 bytes.
 */
@Test
public void testColumnToValueStringLong() {
  TableFieldSchema column = new TableFieldSchema().setName(longStringField).setType("STRING");
  Record record =
      generateSingleFieldAvroRecord(
          longStringField, "string", longStringFieldDesc, longStringFieldValue);
  Value value = BigQueryConverters.columnToValue(column, record.get(longStringField));
  assertEquals(longStringFieldValue, value.getStringValue());
  assertTrue(value.getExcludeFromIndexes());
}
 
Example 28
Source Project: nomulus   Source File: BigqueryConnection.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns the query results for the given job as an ImmutableTable, row-keyed by row number
 * (indexed from 1), column-keyed by the TableFieldSchema for that field, and with the value
 * object as the cell value.  Note that null values will not actually be null (since we're using
 * ImmutableTable) but they can be checked for using Data.isNull().
 *
 * <p>This table is fully materialized in memory (not lazily loaded), so it should not be used
 * with queries expected to return large results.
 */
private ImmutableTable<Integer, TableFieldSchema, Object> getQueryResults(Job job) {
  try {
    ImmutableTable.Builder<Integer, TableFieldSchema, Object> builder =
        new ImmutableTable.Builder<>();
    String pageToken = null;
    int rowNumber = 1;
    while (true) {
      GetQueryResultsResponse queryResults = bigquery.jobs()
            .getQueryResults(getProjectId(), job.getJobReference().getJobId())
            .setPageToken(pageToken)
            .execute();
      // If the job isn't complete yet, retry; getQueryResults() waits for up to 10 seconds on
      // each invocation so this will effectively poll for completion.
      if (queryResults.getJobComplete()) {
        List<TableFieldSchema> schemaFields = queryResults.getSchema().getFields();
        for (TableRow row : queryResults.getRows()) {
          Iterator<TableFieldSchema> fieldIterator = schemaFields.iterator();
          Iterator<TableCell> cellIterator = row.getF().iterator();
          while (fieldIterator.hasNext() && cellIterator.hasNext()) {
            builder.put(rowNumber, fieldIterator.next(), cellIterator.next().getV());
          }
          rowNumber++;
        }
        pageToken = queryResults.getPageToken();
        if (pageToken == null) {
          break;
        }
      }
    }
    return builder.build();
  } catch (IOException e) {
    throw BigqueryJobFailureException.create(e);
  }
}
 
Example 29
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that BigQueryConverters.columnToValue() returns a Double {@link Value} when the BigQuery
 * column is a FLOAT.
 */
@Test
public void testColumnToValueFloat() {
  TableFieldSchema column = new TableFieldSchema().setName(floatField).setType("FLOAT");
  Record record =
      generateSingleFieldAvroRecord(floatField, "float", floatFieldDesc, floatFieldValue);
  Value value = BigQueryConverters.columnToValue(column, record.get(floatField));
  assertEquals(floatFieldValue, value.getDoubleValue(), 0.001);
}
 
Example 30
Source Project: DataflowTemplates   Source File: BigQueryConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests that BigQueryConverters.columnToValue() returns an Double {@link Value} when the BigQuery
 * column is a FLOAT64.
 */
@Test
public void testColumnToValueFloat64() {
  TableFieldSchema column = new TableFieldSchema().setName(float64Field).setType("FLOAT64");
  Record record =
      generateSingleFieldAvroRecord(float64Field, "float", float64FieldDesc, float64FieldValue);
  Value value = BigQueryConverters.columnToValue(column, record.get(float64Field));
  assertEquals(float64FieldValue, value.getDoubleValue(), 0.001);
}