Java Code Examples for com.google.api.services.bigquery.model.TableSchema#setFields()

The following examples show how to use com.google.api.services.bigquery.model.TableSchema#setFields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {
  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 2
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 3
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 4
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 5
Source File: FeatureSetSpecToTableSchema.java    From feast with Apache License 2.0 5 votes vote down vote up
/**
 * Convert Table schema into json-like object (prepared for serialization)
 *
 * @param schema bq table schema
 * @return json-like schema
 */
private TableSchema serializeSchema(Schema schema) {
  TableSchema tableSchema = new TableSchema();
  FieldList fields = schema.getFields();
  List<TableFieldSchema> tableFieldSchemas =
      fields.stream()
          .map(
              field -> {
                TableFieldSchema f =
                    new TableFieldSchema()
                        .setName(field.getName())
                        .setType(field.getType().name());

                if (field.getMode() != null) {
                  f.setMode(field.getMode().name());
                }

                if (field.getDescription() != null) {
                  f.setDescription(field.getDescription());
                }
                return f;
              })
          .collect(Collectors.toList());

  tableSchema.setFields(tableFieldSchemas);
  return tableSchema;
}
 
Example 6
Source File: BigQueryToTFRecordTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testBigQueryToTFRecordWithExeception() throws Exception {
  expectedEx.expect(RuntimeException.class);
  expectedEx.expectMessage("Unsupported type: BOLEAN");

  Long i1 = new Long(0);
  double f1 = 0.0d;
  String s1 = "";
  byte[] b1 = new byte[8];

  record.put("int1", i1);
  record.put("float1", f1);
  record.put("string1", s1);
  record.put("bytes1", b1);
  record.put("bool1", true);

  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  fields.add(new TableFieldSchema().setName("int1").setType("INTEGER"));
  fields.add(new TableFieldSchema().setName("float1").setType("FLOAT"));
  fields.add(new TableFieldSchema().setName("string1").setType("STRING"));
  fields.add(new TableFieldSchema().setName("bytes1").setType("BYTES"));
  fields.add(new TableFieldSchema().setName("bool1").setType("BOLEAN"));
  final TableSchema tableSchema = new TableSchema();
  tableSchema.setFields(fields);
  final SchemaAndRecord schemaAndRecord = new SchemaAndRecord(record, tableSchema);

  byte[] gotBytes = record2Example(schemaAndRecord);
}
 
Example 7
Source File: BigQueryLoader.java    From quetzal with Eclipse Public License 2.0 5 votes vote down vote up
public static TableSchema datatypeTable() {
	TableSchema x = new TableSchema();
	x.setFields(
			Arrays.asList(
					field("datatype_name", "STRING"),
					field("datatype_id", "INTEGER")));
	return x;
}
 
Example 8
Source File: BigQueryLoader.java    From quetzal with Eclipse Public License 2.0 5 votes vote down vote up
public static TableSchema predicateTable() {
	TableSchema x = new TableSchema();
	x.setFields(
			Arrays.asList(
					field("pred", "STRING"),
					field("onetoone", "BOOLEAN"),
					field("num_hashes", "INTEGER"),
					field("db2type", "STRING"),
					field("spills", "INTEGER"),
					field("hash0", "INTEGER"), 
					field("hash1", "INTEGER"), 
					field("hash2", "INTEGER")));
	return x;
}
 
Example 9
Source File: BeamBQOutputTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<HopRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamHop.init( transformPluginClasses, xpPluginClasses );

      // Inflate the metadata on the node where this is running...
      //
      IRowMeta rowMeta = JsonRowMeta.fromJson( rowMetaJson );


      // Which table do we write to?
      //
      TableReference tableReference = new TableReference();
      if ( StringUtils.isNotEmpty( projectId ) ) {
        tableReference.setProjectId( projectId );
      }
      tableReference.setDatasetId( datasetId );
      tableReference.setTableId( tableId );

      TableSchema tableSchema = new TableSchema();
      List<TableFieldSchema> schemaFields = new ArrayList<>();
      for ( IValueMeta valueMeta : rowMeta.getValueMetaList() ) {
        TableFieldSchema schemaField = new TableFieldSchema();
        schemaField.setName( valueMeta.getName() );
        switch(valueMeta.getType()){
          case IValueMeta.TYPE_STRING: schemaField.setType( "STRING" ); break;
          case IValueMeta.TYPE_INTEGER: schemaField.setType( "INTEGER" ); break;
          case IValueMeta.TYPE_DATE: schemaField.setType( "DATETIME" ); break;
          case IValueMeta.TYPE_BOOLEAN: schemaField.setType( "BOOLEAN" ); break;
          case IValueMeta.TYPE_NUMBER: schemaField.setType( "FLOAT" ); break;
          default:
            throw new RuntimeException( "Conversion from Hop value "+valueMeta.toString()+" to BigQuery TableRow isn't supported yet" );
        }
        schemaFields.add(schemaField);
      }
      tableSchema.setFields( schemaFields );

      SerializableFunction<HopRow, TableRow> formatFunction = new HopToBQTableRowFn( transformName, rowMetaJson, transformPluginClasses, xpPluginClasses );

      BigQueryIO.Write.CreateDisposition createDisposition;
      if (createIfNeeded) {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED;
      }  else {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_NEVER;
      }

      BigQueryIO.Write.WriteDisposition writeDisposition;
      if (truncateTable) {
        writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
      } else {
        if (failIfNotEmpty) {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_EMPTY;
        } else {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
        }
      }

      BigQueryIO.Write<HopRow> bigQueryWrite = BigQueryIO
        .<HopRow>write()
        .to( tableReference )
        .withSchema( tableSchema )
        .withCreateDisposition( createDisposition )
        .withWriteDisposition( writeDisposition )
        .withFormatFunction( formatFunction );

      // TODO: pass the results along the way at some point
      //
      input.apply( transformName, bigQueryWrite );

      // End of the line
      //
      return PDone.in( input.getPipeline() );

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in Beam BigQuery output transform", e );
      throw new RuntimeException( "Error in Beam BigQuery output transform", e );
    }
  }
 
Example 10
Source File: S3Import.java    From dlp-dataflow-deidentification with Apache License 2.0 4 votes vote down vote up
@Override
public TableSchema getSchema(KV<String, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  // When TableRow is created in earlier steps, setF() was
  // used to setup TableCells so that Table Schema can be constructed

  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {

    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    String type = Util.typeCheck(object.get(header).toString());
    LOG.debug("Type {}, header {}, value {}", type, header, object.get(header).toString());
    if (type.equals("RECORD")) {
      String keyValuePair = object.get(header).toString();
      String[] records = keyValuePair.split(",");
      List<TableFieldSchema> nestedFields = new ArrayList<TableFieldSchema>();

      for (int j = 0; j < records.length; j++) {
        String[] element = records[j].substring(1).split("=");
        String elementValue = element[1].substring(0, element[1].length() - 1);
        String elementType = Util.typeCheck(elementValue.trim());
        LOG.debug(
            "element header {} , element type {}, element Value {}",
            element[0],
            elementType,
            elementValue);
        nestedFields.add(new TableFieldSchema().setName(element[0]).setType(elementType));
      }
      fields.add(new TableFieldSchema().setName(header).setType(type).setFields(nestedFields));

    } else {
      fields.add(new TableFieldSchema().setName(header).setType(type));
    }
  }
  schema.setFields(fields);
  return schema;
}
 
Example 11
Source File: BeamBQOutputTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<KettleRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamKettle.init( stepPluginClasses, xpPluginClasses );

      // Inflate the metadata on the node where this is running...
      //
      RowMetaInterface rowMeta = JsonRowMeta.fromJson( rowMetaJson );


      // Which table do we write to?
      //
      TableReference tableReference = new TableReference();
      if ( StringUtils.isNotEmpty( projectId ) ) {
        tableReference.setProjectId( projectId );
      }
      tableReference.setDatasetId( datasetId );
      tableReference.setTableId( tableId );

      TableSchema tableSchema = new TableSchema();
      List<TableFieldSchema> schemaFields = new ArrayList<>();
      for ( ValueMetaInterface valueMeta : rowMeta.getValueMetaList() ) {
        TableFieldSchema schemaField = new TableFieldSchema();
        schemaField.setName( valueMeta.getName() );
        switch(valueMeta.getType()){
          case ValueMetaInterface.TYPE_STRING: schemaField.setType( "STRING" ); break;
          case ValueMetaInterface.TYPE_INTEGER: schemaField.setType( "INTEGER" ); break;
          case ValueMetaInterface.TYPE_DATE: schemaField.setType( "DATETIME" ); break;
          case ValueMetaInterface.TYPE_BOOLEAN: schemaField.setType( "BOOLEAN" ); break;
          case ValueMetaInterface.TYPE_NUMBER: schemaField.setType( "FLOAT" ); break;
          default:
            throw new RuntimeException( "Conversion from Kettle value "+valueMeta.toString()+" to BigQuery TableRow isn't supported yet" );
        }
        schemaFields.add(schemaField);
      }
      tableSchema.setFields( schemaFields );

      SerializableFunction<KettleRow, TableRow> formatFunction = new KettleToBQTableRowFn( stepname, rowMetaJson, stepPluginClasses, xpPluginClasses );

      BigQueryIO.Write.CreateDisposition createDisposition;
      if (createIfNeeded) {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED;
      }  else {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_NEVER;
      }

      BigQueryIO.Write.WriteDisposition writeDisposition;
      if (truncateTable) {
        writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
      } else {
        if (failIfNotEmpty) {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_EMPTY;
        } else {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
        }
      }

      BigQueryIO.Write<KettleRow> bigQueryWrite = BigQueryIO
        .<KettleRow>write()
        .to( tableReference )
        .withSchema( tableSchema )
        .withCreateDisposition( createDisposition )
        .withWriteDisposition( writeDisposition )
        .withFormatFunction( formatFunction );

      // TODO: pass the results along the way at some point
      //
      input.apply( stepname, bigQueryWrite );

      // End of the line
      //
      return PDone.in( input.getPipeline() );

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in Beam BigQuery output transform", e );
      throw new RuntimeException( "Error in Beam BigQuery output transform", e );
    }
  }
 
Example 12
Source File: BigQueryToTFRecordTest.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
/** Test {@link BigQueryToTFRecord} correctly outputs TFRecord. */
@Test
public void record2ExampleTest() throws InvalidProtocolBufferException {

  Long i1 = new Long(0);
  double f1 = 0.0d;
  String s1 = "";
  byte[] b1 = new byte[8];

  record.put("int1", i1);
  record.put("float1", f1);
  record.put("string1", s1);
  record.put("bytes1", b1);
  record.put("bool1", true);

  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  fields.add(new TableFieldSchema().setName("int1").setType("INTEGER"));
  fields.add(new TableFieldSchema().setName("float1").setType("FLOAT"));
  fields.add(new TableFieldSchema().setName("string1").setType("STRING"));
  fields.add(new TableFieldSchema().setName("bytes1").setType("BYTES"));
  fields.add(new TableFieldSchema().setName("bool1").setType("BOOLEAN"));
  final TableSchema tableSchema = new TableSchema();
  tableSchema.setFields(fields);
  final SchemaAndRecord schemaAndRecord = new SchemaAndRecord(record, tableSchema);

  Example.Builder example = Example.newBuilder();
  Features.Builder features = example.getFeaturesBuilder();
  Feature.Builder int1 = Feature.newBuilder();
  Feature.Builder float1 = Feature.newBuilder();
  Feature.Builder string1 = Feature.newBuilder();
  Feature.Builder bytes1 = Feature.newBuilder();
  Feature.Builder bool1 = Feature.newBuilder();

  int1.getInt64ListBuilder().addValue(i1);
  float1.getFloatListBuilder().addValue((float) f1);
  string1.getBytesListBuilder().addValue(ByteString.copyFromUtf8(s1));
  bytes1.getBytesListBuilder().addValue(ByteString.copyFrom(b1));
  bool1.getInt64ListBuilder().addValue(1);

  features.putFeature("int1", int1.build());
  features.putFeature("float1", float1.build());
  features.putFeature("string1", string1.build());
  features.putFeature("bytes1", bytes1.build());
  features.putFeature("bool1", bool1.build());

  byte[] gotBytes = record2Example(schemaAndRecord);
  Example gotExample = Example.parseFrom(gotBytes);

  Map<String, Feature> gotFeatures = gotExample.getFeatures().getFeatureMap();
  Feature[] got = new Feature[5];
  got[0] = gotFeatures.get("int1");
  got[1] = gotFeatures.get("float1");
  got[2] = gotFeatures.get("string1");
  got[3] = gotFeatures.get("bytes1");
  got[4] = gotFeatures.get("bool1");

  final Example wantExample = example.build();
  Map<String, Feature> wantFeatures = wantExample.getFeatures().getFeatureMap();
  Feature[] want = new Feature[5];
  want[0] = wantFeatures.get("int1");
  want[1] = wantFeatures.get("float1");
  want[2] = wantFeatures.get("string1");
  want[3] = wantFeatures.get("bytes1");
  want[4] = wantFeatures.get("bool1");

  for (int i = 0; i < 5; i++) {
    Assert.assertThat(got[i], equalTo(want[i]));
  }
}
 
Example 13
Source File: FieldSchemaListBuilder.java    From bigquery-etl-dataflow-sample with Apache License 2.0 4 votes vote down vote up
/**
 * Returns a TableSchema for this list of fields.
 * @return the BigQuery TableSchema object for this list of fields
 */
public TableSchema schema() {
  TableSchema result = new TableSchema();
  result.setFields(schemaFields);
  return result;
}
 
Example 14
Source File: BigQueryAvroUtilsTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvertBigQuerySchemaToAvroSchema() {
  TableSchema tableSchema = new TableSchema();
  tableSchema.setFields(fields);
  Schema avroSchema =
      BigQueryAvroUtils.toGenericAvroSchema("testSchema", tableSchema.getFields());

  assertThat(avroSchema.getField("number").schema(), equalTo(Schema.create(Type.LONG)));
  assertThat(
      avroSchema.getField("species").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
  assertThat(
      avroSchema.getField("quality").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.DOUBLE))));
  assertThat(
      avroSchema.getField("quantity").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG))));
  assertThat(
      avroSchema.getField("birthday").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG))));
  assertThat(
      avroSchema.getField("birthdayMoney").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BYTES))));
  assertThat(
      avroSchema.getField("flighted").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BOOLEAN))));
  assertThat(
      avroSchema.getField("sound").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BYTES))));
  assertThat(
      avroSchema.getField("anniversaryDate").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
  assertThat(
      avroSchema.getField("anniversaryDatetime").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
  assertThat(
      avroSchema.getField("anniversaryTime").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
  assertThat(
      avroSchema.getField("geoPositions").schema(),
      equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));

  assertThat(
      avroSchema.getField("scion").schema(),
      equalTo(
          Schema.createUnion(
              Schema.create(Type.NULL),
              Schema.createRecord(
                  "scion",
                  "Translated Avro Schema for scion",
                  "org.apache.beam.sdk.io.gcp.bigquery",
                  false,
                  ImmutableList.of(
                      new Field(
                          "species",
                          Schema.createUnion(
                              Schema.create(Type.NULL), Schema.create(Type.STRING)),
                          null,
                          (Object) null))))));
  assertThat(
      avroSchema.getField("associates").schema(),
      equalTo(
          Schema.createArray(
              Schema.createRecord(
                  "associates",
                  "Translated Avro Schema for associates",
                  "org.apache.beam.sdk.io.gcp.bigquery",
                  false,
                  ImmutableList.of(
                      new Field(
                          "species",
                          Schema.createUnion(
                              Schema.create(Type.NULL), Schema.create(Type.STRING)),
                          null,
                          (Object) null))))));
}