com.google.cloud.bigquery.Schema Java Examples

The following examples show how to use com.google.cloud.bigquery.Schema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PutBigQueryStreamingIT.java    From nifi with Apache License 2.0 6 votes vote down vote up
private void createTable(String tableName) {
    TableId tableId = TableId.of(dataset.getDatasetId().getDataset(), tableName);

    // Table field definition
    Field id = Field.newBuilder("id", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).build();
    Field name = Field.newBuilder("name", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field alias = Field.newBuilder("alias", LegacySQLTypeName.STRING).setMode(Mode.REPEATED).build();

    Field zip = Field.newBuilder("zip", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field city = Field.newBuilder("city", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field addresses = Field.newBuilder("addresses", LegacySQLTypeName.RECORD, zip, city).setMode(Mode.REPEATED).build();

    Field position = Field.newBuilder("position", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field company = Field.newBuilder("company", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field job = Field.newBuilder("job", LegacySQLTypeName.RECORD, position, company).setMode(Mode.NULLABLE).build();

    // Table schema definition
    schema = Schema.of(id, name, alias, addresses, job);
    TableDefinition tableDefinition = StandardTableDefinition.of(schema);
    TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();

    // create table
    bigquery.create(tableInfo);
}
 
Example #2
Source File: CreateTableAndLoadData.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
public static void main(String... args) throws InterruptedException, TimeoutException {
  BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
  TableId tableId = TableId.of("dataset", "table");
  Table table = bigquery.getTable(tableId);
  if (table == null) {
    System.out.println("Creating table " + tableId);
    Field integerField = Field.of("fieldName", LegacySQLTypeName.INTEGER);
    Schema schema = Schema.of(integerField);
    table = bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
  }
  System.out.println("Loading data into table " + tableId);
  Job loadJob = table.load(FormatOptions.csv(), "gs://bucket/path");
  loadJob = loadJob.waitFor();
  if (loadJob.getStatus().getError() != null) {
    System.out.println("Job completed with errors");
  } else {
    System.out.println("Job succeeded");
  }
}
 
Example #3
Source File: DatasetSnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a table in the dataset with schema and time partitioning. */
// [TARGET create(String, TableDefinition, TableOption...)]
// [VARIABLE “my_table”]
// [VARIABLE “my_field”]
public Table createTable(String tableName, String fieldName) {
  // [START ]
  Schema schema = Schema.of(Field.of(fieldName, LegacySQLTypeName.STRING));
  StandardTableDefinition definition =
      StandardTableDefinition.newBuilder()
          .setSchema(schema)
          .setTimePartitioning(TimePartitioning.of(TimePartitioning.Type.DAY))
          .build();
  Table table = dataset.create(tableName, definition);
  // [END ]
  return table;
}
 
Example #4
Source File: BQTableDefinitionTest.java    From beast with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldCreatePartitionedTable() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(true);
    when(bqConfig.getBQTablePartitionKey()).thenReturn("timestamp_field");
    Schema bqSchema = Schema.of(
            Field.newBuilder("timestamp_field", LegacySQLTypeName.TIMESTAMP).build()
    );

    BQTableDefinition bqTableDefinition = new BQTableDefinition(bqConfig);
    StandardTableDefinition tableDefinition = bqTableDefinition.getTableDefinition(bqSchema);

    Schema returnedSchema = tableDefinition.getSchema();
    assertEquals(returnedSchema.getFields().size(), bqSchema.getFields().size());
    assertEquals(returnedSchema.getFields().get(0).getName(), bqSchema.getFields().get(0).getName());
    assertEquals(returnedSchema.getFields().get(0).getMode(), bqSchema.getFields().get(0).getMode());
    assertEquals(returnedSchema.getFields().get(0).getType(), bqSchema.getFields().get(0).getType());
    assertEquals("timestamp_field", tableDefinition.getTimePartitioning().getField());
}
 
Example #5
Source File: BQUtilsTest.java    From beast with Apache License 2.0 6 votes vote down vote up
@Test
public void compareBQSchemaFieldIfSchemaIsChanged() {
    ArrayList<Field> bqSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("test-1", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("test-2", LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.OFFSET_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TOPIC_COLUMN_NAME, LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.LOAD_TIME_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TIMESTAMP_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.PARTITION_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    }};

    ArrayList<Field> updatedBQSchemaFields = new ArrayList<>();
    updatedBQSchemaFields.addAll(bqSchemaFields);
    updatedBQSchemaFields.add(Field.newBuilder("new-field", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    boolean areEqual = BQUtils.compareBQSchemaFields(Schema.of(bqSchemaFields), Schema.of(updatedBQSchemaFields));
    assertFalse(areEqual);
}
 
Example #6
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of creating a table. */
// [TARGET create(TableInfo, TableOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
// [VARIABLE "string_field"]
public Table createTable(String datasetName, String tableName, String fieldName) {
  // [START bigquery_create_table]
  TableId tableId = TableId.of(datasetName, tableName);
  // Table field definition
  Field field = Field.of(fieldName, LegacySQLTypeName.STRING);
  // Table schema definition
  Schema schema = Schema.of(field);
  TableDefinition tableDefinition = StandardTableDefinition.of(schema);
  TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();
  Table table = bigquery.create(tableInfo);
  // [END bigquery_create_table]
  return table;
}
 
Example #7
Source File: BigQueryMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Returns {@code Table} after creating the table with no columns in BigQuery.
 *
 * @param tableId a TableId referencing the BigQuery table being requested.
 */
private Table createBigQueryTable(TableId tableId) {
  // Create Blank BigQuery Table
  List<Field> fieldList = new ArrayList<Field>();
  Schema schema = Schema.of(fieldList);

  StandardTableDefinition.Builder tableDefinitionBuilder =
      StandardTableDefinition.newBuilder().setSchema(schema);
  if (dayPartitioning) {
    tableDefinitionBuilder.setTimePartitioning(
        TimePartitioning.newBuilder(TimePartitioning.Type.DAY).build());
  }
  TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinitionBuilder.build()).build();
  Table table = bigquery.create(tableInfo);

  return table;
}
 
Example #8
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of listing table rows with schema. */
// [TARGET listTableData(String, String, Schema, TableDataListOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
// [VARIABLE ...]
// [VARIABLE "field"]
public TableResult listTableDataSchema(
    String datasetName, String tableName, Schema schema, String field) {
  // [START ]
  TableResult tableData = bigquery.listTableData(datasetName, tableName, schema);
  for (FieldValueList row : tableData.iterateAll()) {
    row.get(field);
  }
  // [END ]
  return tableData;
}
 
Example #9
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 6 votes vote down vote up
/** Example of listing table rows with schema. */
// [TARGET listTableData(TableId, Schema, TableDataListOption...)]
public FieldValueList listTableDataSchemaId() {
  // [START ]
  Schema schema =
      Schema.of(
          Field.of("word", LegacySQLTypeName.STRING),
          Field.of("word_count", LegacySQLTypeName.STRING),
          Field.of("corpus", LegacySQLTypeName.STRING),
          Field.of("corpus_date", LegacySQLTypeName.STRING));
  TableResult tableData =
      bigquery.listTableData(
          TableId.of("bigquery-public-data", "samples", "shakespeare"), schema);
  FieldValueList row = tableData.getValues().iterator().next();
  System.out.println(row.get("word").getStringValue());
  // [END ]
  return row;
}
 
Example #10
Source File: TestBigQueryDelegate.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Test
public void fieldsToMap() throws Exception {
  Schema schema = createTestSchema();
  List<FieldValue> fieldValues = createTestValues();

  BigQueryDelegate delegate = new BigQueryDelegate(mockBigquery, useLegacySql);
  LinkedHashMap<String, com.streamsets.pipeline.api.Field> map = delegate.fieldsToMap(schema.getFields(), fieldValues);
  assertTrue(map.containsKey("a"));
  assertEquals("a string", map.get("a").getValueAsString());
  assertArrayEquals("bytes".getBytes(), map.get("b").getValueAsByteArray());
  List<com.streamsets.pipeline.api.Field> c = map.get("c").getValueAsList();
  assertEquals(1L, c.get(0).getValueAsLong());
  assertEquals(2L, c.get(1).getValueAsLong());
  assertEquals(3L, c.get(2).getValueAsLong());
  assertEquals(2.0d, map.get("d").getValueAsDouble(), 1e-15);
  assertEquals(true, map.get("e").getValueAsBoolean());
  assertEquals(new Date(1351700038292L), map.get("f").getValueAsDatetime());
  assertEquals((new SimpleDateFormat("HH:mm:ss.SSS")).parse("08:39:01.123"), map.get("g").getValueAsDatetime());
  assertEquals((new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS")).parse("2019-02-05T23:59:59.123"), map.get("h").getValueAsDatetime());
  assertEquals((new SimpleDateFormat("yyy-MM-dd")).parse("2019-02-05"), map.get("i").getValueAsDate());
  Map<String, com.streamsets.pipeline.api.Field> j = map.get("j").getValueAsListMap();
  assertEquals("nested string", j.get("x").getValueAsString());
  Map<String, com.streamsets.pipeline.api.Field> y = j.get("y").getValueAsListMap();
  assertEquals("z", y.get("z").getValueAsString());
}
 
Example #11
Source File: TestBigQueryDelegate.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public static Schema createTestSchema() {
  return Schema.of(
      Field.of("a", LegacySQLTypeName.STRING),
      Field.of("b", LegacySQLTypeName.BYTES),
      Field.newBuilder("c", LegacySQLTypeName.INTEGER).setMode(Field.Mode.REPEATED).build(),
      Field.of("d", LegacySQLTypeName.FLOAT),
      Field.of("e", LegacySQLTypeName.BOOLEAN),
      Field.of("f", LegacySQLTypeName.TIMESTAMP),
      Field.of("g", LegacySQLTypeName.TIME),
      Field.of("h", LegacySQLTypeName.DATETIME),
      Field.of("i", LegacySQLTypeName.DATE),
      Field.of("j",
          LegacySQLTypeName.RECORD,
              Field.of("x", LegacySQLTypeName.STRING),
              Field.of("y",
                  LegacySQLTypeName.RECORD, Field.of("z", LegacySQLTypeName.STRING))
              )
  );
}
 
Example #12
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 6 votes vote down vote up
private org.apache.avro.Schema inferSchemaField(Field field) {
    Field.Mode mode = field.getMode();

    // Get the "basic" type of the field.
    org.apache.avro.Schema fieldSchema = inferSchemaFieldWithoutMode(field);

    // BigQuery fields are NULLABLE by default.
    if (Field.Mode.NULLABLE == mode || mode == null) {
        fieldSchema = AvroUtils.wrapAsNullable(fieldSchema);
    } else if (Field.Mode.REPEATED == mode) {
        // Determine if the field is an array.
        // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
        fieldSchema = SchemaBuilder.array().items(fieldSchema);
    }
    return fieldSchema;
}
 
Example #13
Source File: Json.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Read a {@link Schema} from a byte array.
 *
 * <p>{@link Schema} does not natively support Jackson deserialization, so we rely on a
 * roundabout method inspired by https://github.com/googleapis/google-cloud-java/issues/2753.
 *
 * @exception IOException if {@code data} does not contain a valid {@link Schema}.
 */
public static Schema readBigQuerySchema(byte[] data) throws IOException {
  List<TableFieldSchema> fieldsList = (List<TableFieldSchema>) JSON_FACTORY //
      .createJsonParser(new String(data, Charsets.UTF_8)) //
      .parseArray(ArrayList.class, TableFieldSchema.class);
  TableSchema tableSchema = new TableSchema().setFields(fieldsList);

  try {
    return (Schema) SCHEMA_FROM_PB.invoke(null, tableSchema);
  } catch (IllegalAccessException | InvocationTargetException e) {
    throw new RuntimeException(e);
  }
}
 
Example #14
Source File: PubsubMessageToObjectNode.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Turn message data into an {@link ObjectNode}.
 *
 * <p>{@code data} must not be compressed.
 *
 * <p>We also perform some manipulation of the parsed JSON to match details of our table schemas
 * in BigQuery.
 *
 * <p>If {@code schemasLocation} wasn't provided then {@link TableId} is used to get schemas
 * directly from BigQuery.
 */
@Override
public ObjectNode apply(TableId tableId, Map<String, String> attributes, byte[] data) {
  final Schema schema = schemaStore.getSchema(tableId, attributes);

  final ObjectNode contents;
  try {
    contents = Json.readObjectNode(data);
  } catch (IOException e) {
    throw new UncheckedIOException(e);
  }

  // Strip metadata so that it's not subject to transformation.
  final JsonNode metadata = contents.remove(AddMetadata.METADATA);

  // Make BQ-specific transformations to the payload structure.
  final ObjectNode additionalProperties = strictSchema.test(attributes) ? null
      : Json.createObjectNode();
  transformForBqSchema(contents, schema.getFields(), additionalProperties);

  if (metadata != null) {
    contents.set(AddMetadata.METADATA, metadata);
  }
  if (additionalProperties != null) {
    contents.put(FieldName.ADDITIONAL_PROPERTIES, Json.asString(additionalProperties));
  }
  return contents;
}
 
Example #15
Source File: JsonTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Test
public void testReadBigQuerySchema() throws Exception {
  Schema schema = Json.readBigQuerySchema(
      "[{\"mode\":\"NULLABLE\",\"name\":\"document_id\",\"type\": \"STRING\"}]"
          .getBytes(StandardCharsets.UTF_8));
  assertEquals(LegacySQLTypeName.STRING, schema.getFields().get(0).getType());
}
 
Example #16
Source File: BigQuerySchemaStore.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public Schema getSchema(TableId tableId, Map<String, String> attributes) {
  if (tableId == null) {
    // Always throws SchemaNotFoundException
    return getSchema(attributes);
  }
  if (tableSchemaCache == null) {
    // We need to be very careful about settings for the cache here. We have had significant
    // issues in the past due to exceeding limits on BigQuery API requests; see
    // https://bugzilla.mozilla.org/show_bug.cgi?id=1623000
    tableSchemaCache = CacheBuilder.newBuilder().expireAfterWrite(Duration.ofMinutes(10))
        .build();
  }
  if (bqService == null) {
    bqService = BigQueryOptions.newBuilder().setProjectId(tableId.getProject())
        .setRetrySettings(RETRY_SETTINGS).build().getService();
  }
  try {
    return Optional.of(tableSchemaCache.get(tableId, () -> {
      Table table = bqService.getTable(tableId);
      if (table != null) {
        return table.getDefinition().getSchema();
      } else {
        return null;
      }
    })).orElseThrow(() -> SchemaNotFoundException.forName(tableId.toString()));
  } catch (ExecutionException e) {
    throw new UncheckedExecutionException(e.getCause());
  }
}
 
Example #17
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
private org.apache.avro.Schema inferBigQuerySchema(Schema schema) {
    List<Field> bqFields = schema.getFields();
    if (bqFields.size() == 0) {
        return SchemaBuilder.builder().record("EmptyRecord").fields().endRecord();
    }

    SchemaBuilder.FieldAssembler<org.apache.avro.Schema> fieldAssembler =
            SchemaBuilder.record("BigQuerySchema").fields();
    for (Field bqField : bqFields) {
        String name = bqField.getName();
        org.apache.avro.Schema fieldSchema = inferSchemaField(bqField);
        fieldAssembler = fieldAssembler.name(name).type(fieldSchema).noDefault();
    }
    return fieldAssembler.endRecord();
}
 
Example #18
Source File: BQClient.java    From beast with Apache License 2.0 5 votes vote down vote up
public void upsertTable(List<Field> bqSchemaFields) throws BigQueryException {
    Schema schema = Schema.of(bqSchemaFields);
    TableDefinition tableDefinition = getTableDefinition(schema);
    TableInfo tableInfo = TableInfo.newBuilder(tableID, tableDefinition)
            .setLabels(bqConfig.getTableLabels())
            .build();
    upsertDatasetAndTable(tableInfo);
}
 
Example #19
Source File: BigQuerySnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of loading a newline-delimited-json file with textual fields from GCS to a table. */
// [TARGET create(JobInfo, JobOption...)]
// [VARIABLE "my_dataset_name"]
// [VARIABLE "my_table_name"]
public Long writeRemoteFileToTable(String datasetName, String tableName)
    throws InterruptedException {
  // [START bigquery_load_table_gcs_json]
  String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
  TableId tableId = TableId.of(datasetName, tableName);
  // Table field definition
  Field[] fields =
      new Field[] {
        Field.of("name", LegacySQLTypeName.STRING),
        Field.of("post_abbr", LegacySQLTypeName.STRING)
      };
  // Table schema definition
  Schema schema = Schema.of(fields);
  LoadJobConfiguration configuration =
      LoadJobConfiguration.builder(tableId, sourceUri)
          .setFormatOptions(FormatOptions.json())
          .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .setSchema(schema)
          .build();
  // Load the table
  Job loadJob = bigquery.create(JobInfo.of(configuration));
  loadJob = loadJob.waitFor();
  // Check the table
  System.out.println("State: " + loadJob.getStatus().getState());
  return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
  // [END bigquery_load_table_gcs_json]
}
 
Example #20
Source File: TableSnippets.java    From google-cloud-java with Apache License 2.0 5 votes vote down vote up
/** Example of listing rows in the table given a schema. */
// [TARGET list(Schema, TableDataListOption...)]
// [VARIABLE ...]
// [VARIABLE "my_field"]
public Page<FieldValueList> list(Schema schema, String field) {
  // [START ]
  Page<FieldValueList> page = table.list(schema);
  for (FieldValueList row : page.iterateAll()) {
    row.get(field);
  }
  // [END ]
  return page;
}
 
Example #21
Source File: BigQueryDatasetRuntimeTestIT.java    From components with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void initDatasetAndTable() throws IOException {
    BigQuery bigquery = BigQueryConnection.createClient(createDatastore());
    for (String dataset : datasets) {
        DatasetId datasetId = DatasetId.of(BigQueryTestConstants.PROJECT, dataset);
        bigquery.create(DatasetInfo.of(datasetId));
    }

    for (String table : tables) {
        TableDefinition tableDefinition =
                StandardTableDefinition.of(Schema.of(Field.of("test", LegacySQLTypeName.STRING)));
        TableId tableId = TableId.of(BigQueryTestConstants.PROJECT, datasets.get(0), table);
        bigquery.create(TableInfo.of(tableId, tableDefinition));
    }
}
 
Example #22
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
private Object convertField(FieldValue fieldValue, org.apache.avro.Schema fieldSchema) {
    boolean nullable = AvroUtils.isNullable(fieldSchema);
    if (nullable && fieldValue.isNull()) {
        return null;
    }
    fieldSchema = AvroUtils.unwrapIfNullable(fieldSchema);
    switch (fieldValue.getAttribute()) {
    case PRIMITIVE:
        if (BigQueryType.TIMESTAMP.toString().equals(fieldSchema.getProp(TALEND_COLUMN_DB_TYPE))) {
            Double doubleValue = ((Long) fieldValue.getTimestampValue()) / 1000000.0;
            return formatTimestamp(doubleValue.toString());
        } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._double())) {
            return fieldValue.getDoubleValue();
        } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._boolean())) {
            return fieldValue.getBooleanValue();
        } else {
            return fieldValue.getValue();
        }
    case REPEATED:
        List<Object> listValue = new ArrayList<>();
        List<FieldValue> repeatedChildValue = fieldValue.getRepeatedValue();
        for (FieldValue childValue : repeatedChildValue) {
            listValue.add(convertField(childValue, fieldSchema.getElementType()));
        }
        return listValue;
    case RECORD:
        return convertFileds(fieldValue.getRecordValue(), fieldSchema);
    }
    throw TalendRuntimeException.build(CommonErrorCodes.UNEXPECTED_ARGUMENT).create();
}
 
Example #23
Source File: BigQuerySchemaUtils.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
public static Schema beamSchemaToBigQueryClientSchema(
    org.apache.beam.sdk.schemas.Schema tableSchema) {
  ArrayList<Field> bqFields = new ArrayList<>(tableSchema.getFieldCount());

  for (org.apache.beam.sdk.schemas.Schema.Field f : tableSchema.getFields()) {
    bqFields.add(beamFieldToBigQueryClientField(f));
  }
  return Schema.of(bqFields);
}
 
Example #24
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
public Map<String, Object> convertFileds(List<FieldValue> fields, org.apache.avro.Schema schema) {
    Map<String, Object> container = new HashMap<>();
    for (int i = 0; i < fields.size(); i++) {
        FieldValue fieldValue = fields.get(i);
        org.apache.avro.Schema.Field fieldMeta = schema.getFields().get(i);
        container.put(fieldMeta.name(), convertField(fieldValue, fieldMeta.schema()));
    }
    return container;
}
 
Example #25
Source File: BigQueryClient.java    From beam with Apache License 2.0 5 votes vote down vote up
private void createTable(TableId tableId, Schema schema) {
  TableInfo tableInfo =
      TableInfo.newBuilder(tableId, StandardTableDefinition.of(schema))
          .setFriendlyName(tableId.getTable())
          .build();

  client.create(tableInfo, FIELD_OPTIONS);
}
 
Example #26
Source File: BigQueryClient.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new table with given schema when it not exists.
 *
 * @param tableName name of the desired table
 * @param schema schema of consequent table fields (name, type pairs).
 */
public void createTableIfNotExists(String tableName, Map<String, String> schema) {
  TableId tableId = TableId.of(projectId, dataset, tableName);

  if (client.getTable(tableId, FIELD_OPTIONS) == null) {
    List<Field> schemaFields =
        schema.entrySet().stream()
            .map(entry -> Field.of(entry.getKey(), LegacySQLTypeName.valueOf(entry.getValue())))
            .collect(Collectors.toList());

    createTable(tableId, Schema.of(schemaFields));
  }
}
 
Example #27
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
private BigQueryAvroRegistry() {
    registerSchemaInferrer(Schema.class, new SerializableFunction<Schema, org.apache.avro.Schema>() {

        @Override
        public org.apache.avro.Schema apply(Schema schema) {
            return inferBigQuerySchema(schema);
        }
    });
}
 
Example #28
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
public TableSchema guessBigQuerySchema(org.apache.avro.Schema schema) {
    List<org.apache.avro.Schema.Field> fields = schema.getFields();
    if (fields.size() == 0) {
        return null;
    }
    List<TableFieldSchema> bqFields = new ArrayList<>();
    for (org.apache.avro.Schema.Field field : fields) {
        bqFields.add(tryArrayFieldSchema(field));
    }
    return new TableSchema().setFields(bqFields);
}
 
Example #29
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
private TableFieldSchema tryArrayFieldSchema(org.apache.avro.Schema.Field field) {
    String fieldName = field.name();
    TableFieldSchema tableFieldSchema = new TableFieldSchema().setName(fieldName);
    boolean nullable = AvroUtils.isNullable(field.schema());
    if (!nullable) {
        tableFieldSchema = tableFieldSchema.setMode(REQUIRED_MODE);
    }
    org.apache.avro.Schema fieldSchema = AvroUtils.unwrapIfNullable(field.schema());
    if (fieldSchema.getType() == org.apache.avro.Schema.Type.ARRAY) {
        return tryFieldSchema(tableFieldSchema.setMode(REPEATED_MODE), fieldSchema.getElementType());
    }
    return tryFieldSchema(tableFieldSchema, fieldSchema);
}
 
Example #30
Source File: BigQueryAvroRegistry.java    From components with Apache License 2.0 5 votes vote down vote up
private TableFieldSchema tryFieldSchema(TableFieldSchema fieldSchema, org.apache.avro.Schema avroSchema) {
    fieldSchema = fieldSchema.setType(getBQFieldType(avroSchema));

    if (avroSchema.getType() == org.apache.avro.Schema.Type.RECORD) {
        List<TableFieldSchema> childFields = new ArrayList<>();
        List<org.apache.avro.Schema.Field> avroChildFields = avroSchema.getFields();
        for (org.apache.avro.Schema.Field avroChildField : avroChildFields) {
            childFields.add(tryArrayFieldSchema(avroChildField));
        }
        fieldSchema.setFields(childFields);
    }
    return fieldSchema;
}