Java Code Examples for org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO#Write

The following examples show how to use org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO#Write . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BigQueryIOIT.java    From beam with Apache License 2.0 6 votes vote down vote up
private void testWrite(BigQueryIO.Write<byte[]> writeIO, String metricName) {
  Pipeline pipeline = Pipeline.create(options);

  BigQueryIO.Write.Method method = BigQueryIO.Write.Method.valueOf(options.getWriteMethod());
  pipeline
      .apply("Read from source", Read.from(new SyntheticBoundedSource(sourceOptions)))
      .apply("Gather time", ParDo.of(new TimeMonitor<>(NAMESPACE, metricName)))
      .apply("Map records", ParDo.of(new MapKVToV()))
      .apply(
          "Write to BQ",
          writeIO
              .to(tableQualifier)
              .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempRoot))
              .withMethod(method)
              .withSchema(
                  new TableSchema()
                      .setFields(
                          Collections.singletonList(
                              new TableFieldSchema().setName("data").setType("BYTES")))));

  PipelineResult pipelineResult = pipeline.run();
  pipelineResult.waitUntilFinish();
  extractAndPublishTime(pipelineResult, metricName);
}
 
Example 2
Source File: BigQueryOutputRuntime.java    From components with Apache License 2.0 6 votes vote down vote up
private BigQueryIO.Write setWriteOperation(BigQueryIO.Write bigQueryIOPTransform) {
    if (properties.tableOperation.getValue() == BigQueryOutputProperties.TableOperation.NONE
            || properties.tableOperation
                    .getValue() == BigQueryOutputProperties.TableOperation.CREATE_IF_NOT_EXISTS) {
        switch (properties.writeOperation.getValue()) {
        case APPEND:
            bigQueryIOPTransform =
                    bigQueryIOPTransform.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND);
            break;
        case WRITE_TO_EMPTY:
            bigQueryIOPTransform =
                    bigQueryIOPTransform.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY);
            break;
        default:
            throw new RuntimeException("To be implemented: " + properties.writeOperation.getValue());
        }
    } else {
        if (properties.writeOperation.getValue() != null) {
            LOG.info("Write operation " + properties.writeOperation.getValue()
                    + " be ignored when Table operation is " + properties.tableOperation.getValue());
        }
    }
    return bigQueryIOPTransform;
}
 
Example 3
Source File: NexmarkLauncher.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Send {@code formattedResults} to BigQuery. */
private void sinkResultsToBigQuery(
    PCollection<String> formattedResults, long now, String version) {
  String tableSpec = NexmarkUtils.tableSpec(options, queryName, now, version);
  TableSchema tableSchema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema().setName("result").setType("STRING"),
                  new TableFieldSchema()
                      .setName("records")
                      .setMode("REPEATED")
                      .setType("RECORD")
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("index").setType("INTEGER"),
                              new TableFieldSchema().setName("value").setType("STRING")))));
  NexmarkUtils.console("Writing results to BigQuery table %s", tableSpec);
  BigQueryIO.Write io =
      BigQueryIO.write()
          .to(tableSpec)
          .withSchema(tableSchema)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND);
  formattedResults
      .apply(queryName + ".StringToTableRow", ParDo.of(new StringToTableRow()))
      .apply(queryName + ".WriteBigQueryResults", io);
}
 
Example 4
Source File: BigQueryIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testJsonWrite() {
  BigQueryIO.Write<byte[]> writeIO =
      BigQueryIO.<byte[]>write()
          .withFormatFunction(
              input -> {
                TableRow tableRow = new TableRow();
                tableRow.set("data", input);
                return tableRow;
              });
  testWrite(writeIO, WRITE_TIME_METRIC_NAME);
}
 
Example 5
Source File: BigQueryIOIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testAvroWrite() {
  BigQueryIO.Write<byte[]> writeIO =
      BigQueryIO.<byte[]>write()
          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
          .withAvroFormatFunction(
              writeRequest -> {
                byte[] data = writeRequest.getElement();
                GenericRecord record = new GenericData.Record(writeRequest.getSchema());
                record.put("data", ByteBuffer.wrap(data));
                return record;
              });
  testWrite(writeIO, AVRO_WRITE_TIME_METRIC_NAME);
}
 
Example 6
Source File: BigQueryOutputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<IndexedRecord> in) {
    TableReference table = new TableReference();
    table.setProjectId(datastore.projectName.getValue());
    table.setDatasetId(dataset.bqDataset.getValue());
    table.setTableId(dataset.tableName.getValue());

    BigQueryIO.Write bigQueryIOPTransform = BigQueryIO.writeTableRows().to(table);

    bigQueryIOPTransform = setTableOperation(bigQueryIOPTransform);
    bigQueryIOPTransform = setWriteOperation(bigQueryIOPTransform);

    in.apply(ParDo.of(new IndexedRecordToTableRowFn())).apply(bigQueryIOPTransform);
    return PDone.in(in.getPipeline());
}
 
Example 7
Source File: BeamBQOutputTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<HopRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamHop.init( transformPluginClasses, xpPluginClasses );

      // Inflate the metadata on the node where this is running...
      //
      IRowMeta rowMeta = JsonRowMeta.fromJson( rowMetaJson );


      // Which table do we write to?
      //
      TableReference tableReference = new TableReference();
      if ( StringUtils.isNotEmpty( projectId ) ) {
        tableReference.setProjectId( projectId );
      }
      tableReference.setDatasetId( datasetId );
      tableReference.setTableId( tableId );

      TableSchema tableSchema = new TableSchema();
      List<TableFieldSchema> schemaFields = new ArrayList<>();
      for ( IValueMeta valueMeta : rowMeta.getValueMetaList() ) {
        TableFieldSchema schemaField = new TableFieldSchema();
        schemaField.setName( valueMeta.getName() );
        switch(valueMeta.getType()){
          case IValueMeta.TYPE_STRING: schemaField.setType( "STRING" ); break;
          case IValueMeta.TYPE_INTEGER: schemaField.setType( "INTEGER" ); break;
          case IValueMeta.TYPE_DATE: schemaField.setType( "DATETIME" ); break;
          case IValueMeta.TYPE_BOOLEAN: schemaField.setType( "BOOLEAN" ); break;
          case IValueMeta.TYPE_NUMBER: schemaField.setType( "FLOAT" ); break;
          default:
            throw new RuntimeException( "Conversion from Hop value "+valueMeta.toString()+" to BigQuery TableRow isn't supported yet" );
        }
        schemaFields.add(schemaField);
      }
      tableSchema.setFields( schemaFields );

      SerializableFunction<HopRow, TableRow> formatFunction = new HopToBQTableRowFn( transformName, rowMetaJson, transformPluginClasses, xpPluginClasses );

      BigQueryIO.Write.CreateDisposition createDisposition;
      if (createIfNeeded) {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED;
      }  else {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_NEVER;
      }

      BigQueryIO.Write.WriteDisposition writeDisposition;
      if (truncateTable) {
        writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
      } else {
        if (failIfNotEmpty) {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_EMPTY;
        } else {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
        }
      }

      BigQueryIO.Write<HopRow> bigQueryWrite = BigQueryIO
        .<HopRow>write()
        .to( tableReference )
        .withSchema( tableSchema )
        .withCreateDisposition( createDisposition )
        .withWriteDisposition( writeDisposition )
        .withFormatFunction( formatFunction );

      // TODO: pass the results along the way at some point
      //
      input.apply( transformName, bigQueryWrite );

      // End of the line
      //
      return PDone.in( input.getPipeline() );

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in Beam BigQuery output transform", e );
      throw new RuntimeException( "Error in Beam BigQuery output transform", e );
    }
  }
 
Example 8
Source File: BeamBQOutputTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<KettleRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamKettle.init( stepPluginClasses, xpPluginClasses );

      // Inflate the metadata on the node where this is running...
      //
      RowMetaInterface rowMeta = JsonRowMeta.fromJson( rowMetaJson );


      // Which table do we write to?
      //
      TableReference tableReference = new TableReference();
      if ( StringUtils.isNotEmpty( projectId ) ) {
        tableReference.setProjectId( projectId );
      }
      tableReference.setDatasetId( datasetId );
      tableReference.setTableId( tableId );

      TableSchema tableSchema = new TableSchema();
      List<TableFieldSchema> schemaFields = new ArrayList<>();
      for ( ValueMetaInterface valueMeta : rowMeta.getValueMetaList() ) {
        TableFieldSchema schemaField = new TableFieldSchema();
        schemaField.setName( valueMeta.getName() );
        switch(valueMeta.getType()){
          case ValueMetaInterface.TYPE_STRING: schemaField.setType( "STRING" ); break;
          case ValueMetaInterface.TYPE_INTEGER: schemaField.setType( "INTEGER" ); break;
          case ValueMetaInterface.TYPE_DATE: schemaField.setType( "DATETIME" ); break;
          case ValueMetaInterface.TYPE_BOOLEAN: schemaField.setType( "BOOLEAN" ); break;
          case ValueMetaInterface.TYPE_NUMBER: schemaField.setType( "FLOAT" ); break;
          default:
            throw new RuntimeException( "Conversion from Kettle value "+valueMeta.toString()+" to BigQuery TableRow isn't supported yet" );
        }
        schemaFields.add(schemaField);
      }
      tableSchema.setFields( schemaFields );

      SerializableFunction<KettleRow, TableRow> formatFunction = new KettleToBQTableRowFn( stepname, rowMetaJson, stepPluginClasses, xpPluginClasses );

      BigQueryIO.Write.CreateDisposition createDisposition;
      if (createIfNeeded) {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED;
      }  else {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_NEVER;
      }

      BigQueryIO.Write.WriteDisposition writeDisposition;
      if (truncateTable) {
        writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
      } else {
        if (failIfNotEmpty) {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_EMPTY;
        } else {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
        }
      }

      BigQueryIO.Write<KettleRow> bigQueryWrite = BigQueryIO
        .<KettleRow>write()
        .to( tableReference )
        .withSchema( tableSchema )
        .withCreateDisposition( createDisposition )
        .withWriteDisposition( writeDisposition )
        .withFormatFunction( formatFunction );

      // TODO: pass the results along the way at some point
      //
      input.apply( stepname, bigQueryWrite );

      // End of the line
      //
      return PDone.in( input.getPipeline() );

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in Beam BigQuery output transform", e );
      throw new RuntimeException( "Error in Beam BigQuery output transform", e );
    }
  }
 
Example 9
Source File: BigQueryOutputRuntime.java    From components with Apache License 2.0 4 votes vote down vote up
private BigQueryIO.Write setTableOperation(BigQueryIO.Write bigQueryIOPTransform) {
    TableSchema bqSchema = null;
    if (properties.tableOperation.getValue() == BigQueryOutputProperties.TableOperation.CREATE_IF_NOT_EXISTS
            || properties.tableOperation
                    .getValue() == BigQueryOutputProperties.TableOperation.DROP_IF_EXISTS_AND_CREATE) {
        Schema designSchema = properties.getDatasetProperties().main.schema.getValue();
        if (designSchema != null && !AvroUtils.isSchemaEmpty(designSchema)
                && !AvroUtils.isIncludeAllFields(designSchema)) {
            bqSchema = BigQueryAvroRegistry.get().guessBigQuerySchema(designSchema);
        }
        if (bqSchema == null) {
            throw new RuntimeException("Need to specify schema to create table");
        }
    }

    switch (properties.tableOperation.getValue()) {
    case NONE:
        bigQueryIOPTransform =
                bigQueryIOPTransform.withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER);
        break;
    case CREATE_IF_NOT_EXISTS:
        bigQueryIOPTransform = bigQueryIOPTransform
                .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                .withSchema(bqSchema);
        break;
    case DROP_IF_EXISTS_AND_CREATE:
        bigQueryIOPTransform = bigQueryIOPTransform
                .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
                .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                .withSchema(bqSchema);
        break;
    case TRUNCATE:
        bigQueryIOPTransform = bigQueryIOPTransform
                .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)
                .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER);
        break;
    default:
        throw new RuntimeException("To be implemented: " + properties.tableOperation.getValue());
    }
    return bigQueryIOPTransform;
}