Java Code Examples for com.google.cloud.bigquery.LegacySQLTypeName

The following examples show how to use com.google.cloud.bigquery.LegacySQLTypeName. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: nifi   Source File: BigQueryUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static Field mapToField(Map fMap) {
    String typeStr = fMap.get("type").toString();
    String nameStr = fMap.get("name").toString();
    String modeStr = fMap.get("mode").toString();
    LegacySQLTypeName type = null;

    if (typeStr.equals("BOOLEAN")) {
        type = LegacySQLTypeName.BOOLEAN;
    } else if (typeStr.equals("STRING")) {
        type = LegacySQLTypeName.STRING;
    } else if (typeStr.equals("BYTES")) {
        type = LegacySQLTypeName.BYTES;
    } else if (typeStr.equals("INTEGER")) {
        type = LegacySQLTypeName.INTEGER;
    } else if (typeStr.equals("FLOAT")) {
        type = LegacySQLTypeName.FLOAT;
    } else if (typeStr.equals("TIMESTAMP") || typeStr.equals("DATE")
            || typeStr.equals("TIME") || typeStr.equals("DATETIME")) {
        type = LegacySQLTypeName.TIMESTAMP;
    } else if (typeStr.equals("RECORD")) {
        type = LegacySQLTypeName.RECORD;
    }

    return Field.newBuilder(nameStr, type).setMode(Field.Mode.valueOf(modeStr)).build();
}
 
Example 2
@Test
public void testListWithNulls() throws Exception {
  ObjectNode additionalProperties = Json.createObjectNode();
  ObjectNode parent = Json.readObjectNode(
      ("{\"modules\":[{\"base_addr\":\"0x1390000\"},null]}").getBytes(StandardCharsets.UTF_8));
  List<Field> bqFields = ImmutableList.of(Field
      .newBuilder("modules", LegacySQLTypeName.RECORD,
          Field.of("base_addr", LegacySQLTypeName.STRING)) //
      .setMode(Mode.REPEATED).build() //
  ); //
  Map<String, Object> expected = Json.readMap("{\"modules\":[{\"base_addr\":\"0x1390000\"},{}]}");
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));

  Map<String, Object> expectedAdditional = Json.readMap("{\"modules\":[{},null]}");
  assertEquals(expectedAdditional, Json.asMap(additionalProperties));
}
 
Example 3
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extracts and applies new column information to BigQuery by comparing the TableRow against the
 * BigQuery Table. Retries the supplied number of times before failing.
 *
 * @param tableId a TableId referencing the BigQuery table to be loaded to.
 * @param row a TableRow with the raw data to be loaded into BigQuery.
 * @param inputSchema The source schema lookup to be used in mapping.
 * @param retries Number of remaining retries before error is raised.
 */
private void applyMapperToTableRow(
    TableId tableId, TableRow row, Map<String, LegacySQLTypeName> inputSchema, int retries) {
  try {
    updateTableIfRequired(tableId, row, inputSchema);
  } catch (Exception e) {
    if (retries > 0) {
      LOG.info("RETRY TABLE UPDATE - enter: {}", String.valueOf(retries));
      try {
        Thread.sleep(2000);
      } catch (InterruptedException i) {
        throw e;
      }
      LOG.info("RETRY TABLE UPDATE - apply: {}", String.valueOf(retries));
      applyMapperToTableRow(tableId, row, inputSchema, retries - 1);
    } else {
      LOG.info("RETRY TABLE UPDATE - throw: {}", String.valueOf(retries));
      throw e;
    }
  }
}
 
Example 4
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 6 votes vote down vote up
private Boolean addNewTableField(TableId tableId, TableRow row, String rowKey,
    List<Field> newFieldList, Map<String, LegacySQLTypeName> inputSchema) {
  // Call Get Schema and Extract New Field Type
  Field newField;

  if (inputSchema.containsKey(rowKey)) {
    newField = Field.of(rowKey, inputSchema.get(rowKey));
  } else {
    newField = Field.of(rowKey, LegacySQLTypeName.STRING);
  }

  newFieldList.add(newField);

  // Currently we always add new fields for each call
  // TODO: add an option to ignore new field and why boolean?
  return true;
}
 
Example 5
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<OutputT> expand(PCollection<InputT> tableKVPCollection) {
  return tableKVPCollection.apply(
      "TableRowExtractDestination",
      MapElements.via(
          new SimpleFunction<InputT, OutputT>() {
            @Override
            public OutputT apply(InputT input) {
              /*
                  We run validation against every event to ensure all columns
                  exist in source.
                  If a column is in the event and not in BigQuery,
                  the column is added to the table before the event can continue.
              */
              setUp();
              TableId tableId = getTableId(input);
              TableRow row = getTableRow(input);
              Map<String, LegacySQLTypeName> inputSchema = getObjectSchema(input);
              int retries = getMapperRetries();

              applyMapperToTableRow(tableId, row, inputSchema, retries);
              return getOutputObject(input);
            }
          }));
}
 
Example 6
Source Project: beast   Source File: ConverterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldTestConvertToSchemaForRepeatedFields() {
    ProtoField protoField = new ProtoField(new ArrayList<ProtoField>() {{
        add(new ProtoField("field1_map",
                DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32,
                DescriptorProtos.FieldDescriptorProto.Label.LABEL_REPEATED));
        add(new ProtoField("field2_repeated",
                DescriptorProtos.FieldDescriptorProto.Type.TYPE_STRING,
                DescriptorProtos.FieldDescriptorProto.Label.LABEL_REPEATED));

    }});

    List<Field> fields = converter.generateBigquerySchema(protoField);

    assertEquals(protoField.getFields().size(), fields.size());
    assertBqField(protoField.getFields().get(0).getName(), LegacySQLTypeName.INTEGER, Field.Mode.REPEATED, fields.get(0));
    assertBqField(protoField.getFields().get(1).getName(), LegacySQLTypeName.STRING, Field.Mode.REPEATED, fields.get(1));
}
 
Example 7
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 6 votes vote down vote up
private Boolean addNewTableField(TableId tableId, TableRow row, String rowKey,
    List<Field> newFieldList, Map<String, LegacySQLTypeName> inputSchema) {
  // Call Get Schema and Extract New Field Type
  Field newField;

  if (inputSchema.containsKey(rowKey)) {
    newField = Field.of(rowKey, inputSchema.get(rowKey));
  } else {
    newField = Field.of(rowKey, LegacySQLTypeName.STRING);
  }

  newFieldList.add(newField);

  // Currently we always add new fields for each call
  // TODO: add an option to ignore new field and why boolean?
  return true;
}
 
Example 8
Source Project: google-cloud-java   Source File: DatasetSnippets.java    License: Apache License 2.0 6 votes vote down vote up
/** Example of creating a table in the dataset with schema and time partitioning. */
// [TARGET create(String, TableDefinition, TableOption...)]
// [VARIABLE “my_table”]
// [VARIABLE “my_field”]
public Table createTable(String tableName, String fieldName) {
  // [START ]
  Schema schema = Schema.of(Field.of(fieldName, LegacySQLTypeName.STRING));
  StandardTableDefinition definition =
      StandardTableDefinition.newBuilder()
          .setSchema(schema)
          .setTimePartitioning(TimePartitioning.of(TimePartitioning.Type.DAY))
          .build();
  Table table = dataset.create(tableName, definition);
  // [END ]
  return table;
}
 
Example 9
@Test
public void testNestedListAdditionalProperties() throws Exception {
  ObjectNode additionalProperties = Json.createObjectNode();
  ObjectNode parent = Json.readObjectNode("{\n" //
      + "  \"payload\": [[{\"a\":1}],[{\"a\":2},{\"a\":3,\"b\":4}]]\n" //
      + "}\n");
  List<Field> bqFields = ImmutableList.of(Field.newBuilder("payload", LegacySQLTypeName.RECORD, //
      Field.newBuilder("list", LegacySQLTypeName.RECORD, //
          Field.of("a", LegacySQLTypeName.INTEGER)) //
          .setMode(Mode.REPEATED).build() //
  ).setMode(Mode.REPEATED).build()); //
  Map<String, Object> expected = Json
      .readMap("{\"payload\":[{\"list\":[{\"a\":1}]},{\"list\":[{\"a\":2},{\"a\":3}]}]}");
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));

  Map<String, Object> expectedAdditional = Json.readMap("{\"payload\":[null,[{},{\"b\":4}]]}");
  assertEquals(expectedAdditional, Json.asMap(additionalProperties));
}
 
Example 10
Source Project: beast   Source File: BQTableDefinitionTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldCreatePartitionedTable() {
    when(bqConfig.isBQTablePartitioningEnabled()).thenReturn(true);
    when(bqConfig.getBQTablePartitionKey()).thenReturn("timestamp_field");
    Schema bqSchema = Schema.of(
            Field.newBuilder("timestamp_field", LegacySQLTypeName.TIMESTAMP).build()
    );

    BQTableDefinition bqTableDefinition = new BQTableDefinition(bqConfig);
    StandardTableDefinition tableDefinition = bqTableDefinition.getTableDefinition(bqSchema);

    Schema returnedSchema = tableDefinition.getSchema();
    assertEquals(returnedSchema.getFields().size(), bqSchema.getFields().size());
    assertEquals(returnedSchema.getFields().get(0).getName(), bqSchema.getFields().get(0).getName());
    assertEquals(returnedSchema.getFields().get(0).getMode(), bqSchema.getFields().get(0).getMode());
    assertEquals(returnedSchema.getFields().get(0).getType(), bqSchema.getFields().get(0).getType());
    assertEquals("timestamp_field", tableDefinition.getTimePartitioning().getField());
}
 
Example 11
Source Project: google-cloud-java   Source File: BigQuerySnippets.java    License: Apache License 2.0 6 votes vote down vote up
/** Example of listing table rows with schema. */
// [TARGET listTableData(TableId, Schema, TableDataListOption...)]
public FieldValueList listTableDataSchemaId() {
  // [START ]
  Schema schema =
      Schema.of(
          Field.of("word", LegacySQLTypeName.STRING),
          Field.of("word_count", LegacySQLTypeName.STRING),
          Field.of("corpus", LegacySQLTypeName.STRING),
          Field.of("corpus_date", LegacySQLTypeName.STRING));
  TableResult tableData =
      bigquery.listTableData(
          TableId.of("bigquery-public-data", "samples", "shakespeare"), schema);
  FieldValueList row = tableData.getValues().iterator().next();
  System.out.println(row.get("word").getStringValue());
  // [END ]
  return row;
}
 
Example 12
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extracts and applies new column information to BigQuery by comparing the TableRow against the
 * BigQuery Table.
 *
 * @param tableId a TableId referencing the BigQuery table to be loaded to.
 * @param row a TableRow with the raw data to be loaded into BigQuery.
 * @param inputSchema The source schema lookup to be used in mapping.
 */
private void updateTableIfRequired(
    TableId tableId, TableRow row, Map<String, LegacySQLTypeName> inputSchema) {
  Table table = getOrCreateBigQueryTable(tableId);
  FieldList tableFields = table.getDefinition().getSchema().getFields();

  Set<String> rowKeys = row.keySet();
  Boolean tableWasUpdated = false;
  List<Field> newFieldList = new ArrayList<Field>();
  for (String rowKey : rowKeys) {
    // Check if rowKey (column from data) is in the BQ Table
    try {
      Field tableField = tableFields.get(rowKey);
    } catch (IllegalArgumentException e) {
      tableWasUpdated = addNewTableField(tableId, row, rowKey, newFieldList, inputSchema);
    }
  }

  if (tableWasUpdated) {
    LOG.info("Updating Table");
    updateBigQueryTable(tableId, table, tableFields, newFieldList);
  }
}
 
Example 13
@Test
public void testTupleIntoStructAdditionalProperties() throws Exception {
  ObjectNode additionalProperties = Json.createObjectNode();
  ObjectNode parent = Json.readObjectNode("{\n" //
      + "  \"payload\": [26,{\"a\":83,\"b\":44}]\n" //
      + "}\n");
  List<Field> bqFields = ImmutableList.of(Field.newBuilder("payload", LegacySQLTypeName.RECORD, //
      Field.of("f0_", LegacySQLTypeName.INTEGER), //
      Field.of("f1_", LegacySQLTypeName.RECORD, //
          Field.of("a", LegacySQLTypeName.INTEGER))) //
      .setMode(Mode.NULLABLE).build());
  Map<String, Object> expected = Json.readMap("{\"payload\":{\"f0_\":26,\"f1_\":{\"a\":83}}}");
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));

  Map<String, Object> expectedAdditional = Json.readMap("{\"payload\":[null,{\"b\":44}]}");
  assertEquals(expectedAdditional, Json.asMap(additionalProperties));
}
 
Example 14
@Test
public void testCoerceMapValueToString() throws Exception {
  String mainPing = "{\"payload\":{\"processes\":{\"parent\":{\"scalars\":"
      + "{\"timestamps.first_paint\":5405}}}}}";
  ObjectNode parent = Json.readObjectNode(mainPing);
  ObjectNode additionalProperties = Json.createObjectNode();
  parent.put("64bit", true);
  parent.put("hi-fi", true);
  List<Field> bqFields = ImmutableList.of(Field.of("payload", LegacySQLTypeName.RECORD,
      Field.of("processes", LegacySQLTypeName.RECORD,
          Field.of("parent", LegacySQLTypeName.RECORD,
              Field.newBuilder("scalars", LegacySQLTypeName.RECORD, //
                  Field.of("key", LegacySQLTypeName.STRING), //
                  Field.of("value", LegacySQLTypeName.STRING)) //
                  .setMode(Mode.REPEATED).build()))));
  Map<String, Object> expected = ImmutableMap.of("payload",
      ImmutableMap.of("processes", ImmutableMap.of("parent", ImmutableMap.of("scalars",
          ImmutableList.of(ImmutableMap.of("key", "timestamps.first_paint", "value", "5405"))))));
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));
}
 
Example 15
Source Project: nifi   Source File: PutBigQueryStreamingIT.java    License: Apache License 2.0 6 votes vote down vote up
private void createTable(String tableName) {
    TableId tableId = TableId.of(dataset.getDatasetId().getDataset(), tableName);

    // Table field definition
    Field id = Field.newBuilder("id", LegacySQLTypeName.INTEGER).setMode(Mode.REQUIRED).build();
    Field name = Field.newBuilder("name", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field alias = Field.newBuilder("alias", LegacySQLTypeName.STRING).setMode(Mode.REPEATED).build();

    Field zip = Field.newBuilder("zip", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field city = Field.newBuilder("city", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field addresses = Field.newBuilder("addresses", LegacySQLTypeName.RECORD, zip, city).setMode(Mode.REPEATED).build();

    Field position = Field.newBuilder("position", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field company = Field.newBuilder("company", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build();
    Field job = Field.newBuilder("job", LegacySQLTypeName.RECORD, position, company).setMode(Mode.NULLABLE).build();

    // Table schema definition
    schema = Schema.of(id, name, alias, addresses, job);
    TableDefinition tableDefinition = StandardTableDefinition.of(schema);
    TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();

    // create table
    bigquery.create(tableInfo);
}
 
Example 16
@Test
public void testAdditionalProperties() throws Exception {
  ObjectNode parent = Json.createObjectNode().set("outer", Json.createObjectNode()
      .put("otherfield", 3).set("mapField", Json.createObjectNode().put("foo", 3).put("bar", 4)));
  parent.put("clientId", "abc123");
  parent.put("otherStrangeIdField", 3);
  List<Field> bqFields = ImmutableList.of(Field.of("client_id", LegacySQLTypeName.STRING), //
      Field.of("outer", LegacySQLTypeName.RECORD, //
          MAP_FIELD));
  Map<String, Object> expected = ImmutableMap.of("client_id", "abc123", "outer",
      ImmutableMap.of("map_field", ImmutableList.of(ImmutableMap.of("key", "foo", "value", 3),
          ImmutableMap.of("key", "bar", "value", 4))));
  Map<String, Object> expectedAdditional = ImmutableMap.of("otherStrangeIdField", 3, "outer",
      ImmutableMap.of("otherfield", 3));
  ObjectNode additionalProperties = Json.createObjectNode();
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));
  assertEquals(expectedAdditional, Json.asMap(additionalProperties));
}
 
Example 17
@Test
public void testDoublyNestedList() throws Exception {
  ObjectNode additionalProperties = Json.createObjectNode();
  ObjectNode parent = Json.readObjectNode("{\n" //
      + "  \"payload\": [[[0],[1]],[[2]]]\n" //
      + "}\n");
  List<Field> bqFields = ImmutableList.of(Field.newBuilder("payload", LegacySQLTypeName.RECORD, //
      Field.newBuilder("list", LegacySQLTypeName.RECORD, //
          Field.newBuilder("list", LegacySQLTypeName.INTEGER).setMode(Mode.REPEATED).build()) //
          .setMode(Mode.REPEATED).build() //
  ).setMode(Mode.REPEATED).build()); //
  Map<String, Object> expected = Json.readMap("{\"payload\":[" //
      + "{\"list\":[{\"list\":[0]},{\"list\":[1]}]}," //
      + "{\"list\":[{\"list\":[2]}]}" //
      + "]}");
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));
}
 
Example 18
@Test
public void testRepeatedRecordAdditionalProperties() throws Exception {
  ObjectNode additionalProperties = Json.createObjectNode();
  ObjectNode parent = Json.readObjectNode("{\n" //
      + "  \"payload\": [\n" //
      + "     {\"a\": 1},\n" //
      + "     {\"a\": 2, \"b\": 22},\n" //
      + "     {\"a\": 3}\n" //
      + "]}\n");
  List<Field> bqFields = ImmutableList.of(Field.newBuilder("payload", LegacySQLTypeName.RECORD, //
      Field.of("a", LegacySQLTypeName.INTEGER)) //
      .setMode(Mode.REPEATED).build());
  Map<String, Object> expected = Json.readMap("{\"payload\":[{\"a\":1},{\"a\":2},{\"a\":3}]}");
  TRANSFORM.transformForBqSchema(parent, bqFields, additionalProperties);
  assertEquals(expected, Json.asMap(parent));

  Map<String, Object> expectedAdditional = Json.readMap("{\"payload\":[{},{\"b\":22},{}]}");
  assertEquals(expectedAdditional, Json.asMap(additionalProperties));
}
 
Example 19
Source Project: spark-bigquery-connector   Source File: SchemaConverters.java    License: Apache License 2.0 5 votes vote down vote up
private static DataType getDataType(Field field) {

        if (LegacySQLTypeName.INTEGER.equals(field.getType())) {
            return DataTypes.LongType;
        } else if (LegacySQLTypeName.FLOAT.equals(field.getType())) {
            return DataTypes.DoubleType;
        } else if (LegacySQLTypeName.NUMERIC.equals(field.getType())) {
            return NUMERIC_SPARK_TYPE;
        } else if (LegacySQLTypeName.STRING.equals(field.getType())) {
            return DataTypes.StringType;
        } else if (LegacySQLTypeName.BOOLEAN.equals(field.getType())) {
            return DataTypes.BooleanType;
        } else if (LegacySQLTypeName.BYTES.equals(field.getType())) {
            return DataTypes.BinaryType;
        } else if (LegacySQLTypeName.DATE.equals(field.getType())) {
            return DataTypes.DateType;
        } else if (LegacySQLTypeName.TIMESTAMP.equals(field.getType())) {
            return DataTypes.TimestampType;
        } else if (LegacySQLTypeName.TIME.equals(field.getType())) {
            return DataTypes.LongType;
            // TODO(#5): add a timezone to allow parsing to timestamp
            // This can be safely cast to TimestampType, but doing so causes the date to be inferred
            // as the current date. It's safer to leave as a stable string and give the user the
            // option of casting themselves.
        } else if (LegacySQLTypeName.DATETIME.equals(field.getType())) {
            return DataTypes.StringType;
        } else if (LegacySQLTypeName.RECORD.equals(field.getType())) {
            List<StructField> structFields = field.getSubFields().stream().map(SchemaConverters::convert).collect(Collectors.toList());
            return new StructType(structFields.toArray(new StructField[0]));
        } else if (LegacySQLTypeName.GEOGRAPHY.equals(field.getType())) {
            return DataTypes.StringType;
        } else {
            throw new IllegalStateException("Unexpected type: " + field.getType());
        }
    }
 
Example 20
Source Project: google-cloud-java   Source File: BigQueryExample.java    License: Apache License 2.0 5 votes vote down vote up
static Schema parseSchema(String[] args, int start, int end) {
  List<Field> schemaFields = new ArrayList<>();
  for (int i = start; i < end; i++) {
    String[] fieldsArray = args[i].split(":");
    if (fieldsArray.length != 2) {
      throw new IllegalArgumentException("Unrecognized field definition '" + args[i] + "'.");
    }
    String fieldName = fieldsArray[0];
    String typeString = fieldsArray[1].toLowerCase();
    LegacySQLTypeName fieldType;
    switch (typeString) {
      case "string":
        fieldType = LegacySQLTypeName.STRING;
        break;
      case "integer":
        fieldType = LegacySQLTypeName.INTEGER;
        break;
      case "timestamp":
        fieldType = LegacySQLTypeName.TIMESTAMP;
        break;
      case "float":
        fieldType = LegacySQLTypeName.FLOAT;
        break;
      case "boolean":
        fieldType = LegacySQLTypeName.BOOLEAN;
        break;
      case "bytes":
        fieldType = LegacySQLTypeName.BYTES;
        break;
      default:
        throw new IllegalArgumentException("Unrecognized field type '" + typeString + "'.");
    }
    schemaFields.add(Field.of(fieldName, fieldType));
  }
  return Schema.of(schemaFields);
}
 
Example 21
Source Project: presto   Source File: TestTypeConversions.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertOneLevelRecordField()
{
    Field field = Field.of(
            "rec",
            LegacySQLTypeName.RECORD,
            Field.of("sub_s", LegacySQLTypeName.STRING),
            Field.of("sub_i", LegacySQLTypeName.INTEGER));
    ColumnMetadata metadata = Conversions.toColumnMetadata(field);
    RowType targetType = RowType.rowType(
            RowType.field("sub_s", VarcharType.VARCHAR),
            RowType.field("sub_i", BigintType.BIGINT));
    assertThat(metadata.getType()).isEqualTo(targetType);
}
 
Example 22
Source Project: presto   Source File: TestTypeConversions.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertStringArrayField()
{
    Field field = Field.newBuilder("test", LegacySQLTypeName.STRING)
            .setMode(Field.Mode.REPEATED)
            .build();
    ColumnMetadata metadata = Conversions.toColumnMetadata(field);
    assertThat(metadata.getType()).isEqualTo(new ArrayType(VarcharType.VARCHAR));
}
 
Example 23
Source Project: beam   Source File: BigQueryClient.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new table with given schema when it not exists.
 *
 * @param tableName name of the desired table
 * @param schema schema of consequent table fields (name, type pairs).
 */
public void createTableIfNotExists(String tableName, Map<String, String> schema) {
  TableId tableId = TableId.of(projectId, dataset, tableName);

  if (client.getTable(tableId, FIELD_OPTIONS) == null) {
    List<Field> schemaFields =
        schema.entrySet().stream()
            .map(entry -> Field.of(entry.getKey(), LegacySQLTypeName.valueOf(entry.getValue())))
            .collect(Collectors.toList());

    createTable(tableId, Schema.of(schemaFields));
  }
}
 
Example 24
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 5 votes vote down vote up
private void updateTableIfRequired(TableId tableId, TableRow row,
    Map<String, LegacySQLTypeName> inputSchema) {
  // Ensure Instance of BigQuery Exists
  if (this.bigquery == null) {
    this.bigquery =
        BigQueryOptions.newBuilder()
            .setProjectId(getProjectId())
            .build()
            .getService();
  }

  // Get BigQuery Table for Given Row
  Table table = getBigQueryTable(tableId);

  // Validate Table Schema
  FieldList tableFields = table.getDefinition().getSchema().getFields();

  Set<String> rowKeys = row.keySet();
  Boolean tableWasUpdated = false;
  List<Field> newFieldList = new ArrayList<Field>();
  for (String rowKey : rowKeys) {
    // Check if rowKey (column from data) is in the BQ Table
    try {
      Field tableField = tableFields.get(rowKey);
    } catch (IllegalArgumentException e) {
      tableWasUpdated = addNewTableField(tableId, row, rowKey, newFieldList, inputSchema);
    }
  }

  if (tableWasUpdated) {
    LOG.info("Updating Table");
    updateBigQueryTable(tableId, table, tableFields, newFieldList);
  }
}
 
Example 25
Source Project: beast   Source File: BQField.java    License: Apache License 2.0 5 votes vote down vote up
private LegacySQLTypeName getType(ProtoField protoField) {
    LegacySQLTypeName typeFromFieldName = FIELD_NAME_TO_BQ_TYPE_MAP.get(protoField.getTypeName()) != null
            ? FIELD_NAME_TO_BQ_TYPE_MAP.get(protoField.getTypeName())
            : FIELD_TYPE_TO_BQ_TYPE_MAP.get(protoField.getType());
    if (typeFromFieldName == null) {
        statsClient.increment(String.format("proto.bq.typemapping.notfound.errors,field=%s,type=%s,typeName=%s", protoField.getName(), protoField.getType(), protoField.getTypeName()));
        throw new BQSchemaMappingException(String.format("No type mapping found for field: %s, fieldType: %s, typeName: %s", protoField.getName(), protoField.getType(), protoField.getTypeName()));
    }
    return typeFromFieldName;
}
 
Example 26
Source Project: beast   Source File: BQField.java    License: Apache License 2.0 5 votes vote down vote up
public static final List<Field> getMetadataFields() {
    return new ArrayList<Field>() {{
        add(Field.newBuilder(Constants.OFFSET_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TOPIC_COLUMN_NAME, LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.LOAD_TIME_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TIMESTAMP_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.PARTITION_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    }};
}
 
Example 27
Source Project: beast   Source File: ConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldTestShouldConvertIntegerDataTypes() {
    List<DescriptorProtos.FieldDescriptorProto.Type> allIntTypes = new ArrayList<DescriptorProtos.FieldDescriptorProto.Type>() {{
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT64);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_UINT64);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_INT32);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_UINT32);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_FIXED64);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_FIXED32);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_SFIXED32);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_SFIXED64);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_SINT32);
        add(DescriptorProtos.FieldDescriptorProto.Type.TYPE_SINT64);
    }};

    List<ProtoField> nestedBQFields = IntStream.range(0, allIntTypes.size())
            .mapToObj(index -> new ProtoField("field-" + index, allIntTypes.get(index), DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL))
            .collect(Collectors.toList());


    List<Field> fields = converter.generateBigquerySchema(new ProtoField(nestedBQFields));
    assertEquals(nestedBQFields.size(), fields.size());
    IntStream.range(0, nestedBQFields.size())
            .forEach(index -> {
                assertEquals(Field.Mode.NULLABLE, fields.get(index).getMode());
                assertEquals(nestedBQFields.get(index).getName(), fields.get(index).getName());
                assertEquals(LegacySQLTypeName.INTEGER, fields.get(index).getType());
            });
}
 
Example 28
Source Project: beast   Source File: ConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldTestConvertToSchemaForTimestamp() {
    ProtoField protoField = new ProtoField(new ArrayList<ProtoField>() {{
        add(new ProtoField("field1_timestamp",
                Constants.ProtobufTypeName.TIMESTAMP_PROTOBUF_TYPE_NAME,
                DescriptorProtos.FieldDescriptorProto.Type.TYPE_MESSAGE,
                DescriptorProtos.FieldDescriptorProto.Label.LABEL_OPTIONAL));
    }});

    List<Field> fields = converter.generateBigquerySchema(protoField);

    assertEquals(protoField.getFields().size(), fields.size());
    assertBqField(protoField.getFields().get(0).getName(), LegacySQLTypeName.TIMESTAMP, Field.Mode.NULLABLE, fields.get(0));
}
 
Example 29
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 5 votes vote down vote up
private Map<String, LegacySQLTypeName> getObjectSchema(InputT input) {
  Map<String, LegacySQLTypeName> inputSchema = getInputSchema(input);
  if (this.defaultSchema != null) {
    inputSchema.putAll(this.defaultSchema);
  }

  return inputSchema;
}
 
Example 30
Source Project: beast   Source File: ProtoUpdateListenerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = BQTableUpdateFailure.class)
public void shouldThrowExceptionIfConverterFails() throws IOException {
    ProtoField returnedProtoField = new ProtoField();
    when(protoFieldFactory.getProtoField()).thenReturn(returnedProtoField);
    returnedProtoField.addField(new ProtoField("order_number", 1));
    returnedProtoField.addField(new ProtoField("order_url", 2));

    HashMap<String, DescriptorAndTypeName> descriptorsMap = new HashMap<String, DescriptorAndTypeName>() {{
        put(String.format("%s.%s", TestKey.class.getPackage(), TestKey.class.getName()), new DescriptorAndTypeName(TestKey.getDescriptor(), String.format(".%s.%s", TestKey.getDescriptor().getFile().getPackage(), TestKey.getDescriptor().getName())));
    }};
    when(protoMappingParser.parseFields(returnedProtoField, stencilConfig.getProtoSchema(), StencilUtils.getAllProtobufDescriptors(descriptorsMap), StencilUtils.getTypeNameToPackageNameMap(descriptorsMap))).thenReturn(returnedProtoField);
    ObjectNode objNode = JsonNodeFactory.instance.objectNode();
    objNode.put("1", "order_number");
    objNode.put("2", "order_url");
    String expectedProtoMapping = objectMapper.writeValueAsString(objNode);
    when(protoMappingConverter.generateColumnMappings(returnedProtoField.getFields())).thenReturn(expectedProtoMapping);

    ArrayList<Field> returnedSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("order_number", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("order_url", LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
    }};
    when(protoMappingConverter.generateBigquerySchema(returnedProtoField)).thenReturn(returnedSchemaFields);

    ArrayList<Field> bqSchemaFields = new ArrayList<Field>() {{
        add(Field.newBuilder("order_number", LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder("order_url", LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.OFFSET_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TOPIC_COLUMN_NAME, LegacySQLTypeName.STRING).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.LOAD_TIME_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.TIMESTAMP_COLUMN_NAME, LegacySQLTypeName.TIMESTAMP).setMode(Field.Mode.NULLABLE).build());
        add(Field.newBuilder(Constants.PARTITION_COLUMN_NAME, LegacySQLTypeName.INTEGER).setMode(Field.Mode.NULLABLE).build());
    }};
    doThrow(new BigQueryException(10, "bigquery mapping has failed")).when(bqInstance).upsertTable(bqSchemaFields);

    protoUpdateListener.onProtoUpdate(stencilConfig.getStencilUrl(), descriptorsMap);
}