Java Code Examples for com.google.api.services.bigquery.model.TableReference#setTableId()

The following examples show how to use com.google.api.services.bigquery.model.TableReference#setTableId() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BigQueryStrings.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
/**
 * Parses a string into a TableReference; projectId may be omitted if the caller defines a
 * "default" project; in such a case, getProjectId() of the returned TableReference will
 * return null.
 *
 * @param tableRefString A string of the form [projectId]:[datasetId].[tableId].
 * @return a TableReference with the parsed components.
 */
public static TableReference parseTableReference(String tableRefString) {
  // Logic mirrored from cloud/helix/clients/cli/bigquery_client.py.
  TableReference tableRef = new TableReference();
  int projectIdEnd = tableRefString.lastIndexOf(':');
  String datasetAndTableString = tableRefString;
  if (projectIdEnd != -1) {
    tableRef.setProjectId(tableRefString.substring(0, projectIdEnd));

    // Omit the ':' from the remaining datasetId.tableId substring.
    datasetAndTableString = tableRefString.substring(projectIdEnd + 1);
  }

  Preconditions.checkArgument(datasetAndTableString.matches(DATASET_AND_TABLE_REGEX),
      "Invalid datasetAndTableString '%s'; must match regex '%s'.",
      datasetAndTableString, DATASET_AND_TABLE_REGEX);

  List<String> idParts = DOT_SPLITTER.splitToList(datasetAndTableString);
  tableRef.setDatasetId(idParts.get(0));
  tableRef.setTableId(idParts.get(1));
  return tableRef;
}
 
Example 2
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateNeverWithStreaming() throws Exception {
  p.enableAbandonedNodeEnforcement(false);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId("dataset");
  tableRef.setTableId("sometable");

  PCollection<TableRow> tableRows =
      p.apply(GenerateSequence.from(0))
          .apply(
              MapElements.via(
                  new SimpleFunction<Long, TableRow>() {
                    @Override
                    public TableRow apply(Long input) {
                      return null;
                    }
                  }))
          .setCoder(TableRowJsonCoder.of());
  tableRows.apply(
      BigQueryIO.writeTableRows()
          .to(tableRef)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
          .withoutValidation());
}
 
Example 3
Source File: PartitionedTableRef.java    From dataflow-opinion-analysis with Apache License 2.0 6 votes vote down vote up
/**
 * input - a tupel that contains the data element (TableRow), the window, the timestamp, and the pane
 */

@Override
public TableDestination apply(ValueInSingleWindow<TableRow> input) {
    
	String partition;
	
	if (this.isTimeField) {
     String sTime = (String) input.getValue().get(this.fieldName);
     Instant time = Instant.parse(sTime);
     partition = time.toString(partitionFormatter);
	} else {
		partition = ((Integer) input.getValue().get(this.fieldName)).toString();
	}
	
    TableReference reference = new TableReference();
    reference.setProjectId(this.projectId);
    reference.setDatasetId(this.datasetId);
    reference.setTableId(this.partitionPrefix + partition);
    return new TableDestination(reference, null);
}
 
Example 4
Source File: TrafficRoutes.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException {
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractStationSpeedFn()))
      // map the incoming data stream into sliding windows.
      .apply(
          Window.into(
              SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                  .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new TrackSpeed())
      .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}
 
Example 5
Source File: BigQueryOutputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<IndexedRecord> in) {
    TableReference table = new TableReference();
    table.setProjectId(datastore.projectName.getValue());
    table.setDatasetId(dataset.bqDataset.getValue());
    table.setTableId(dataset.tableName.getValue());

    BigQueryIO.Write bigQueryIOPTransform = BigQueryIO.writeTableRows().to(table);

    bigQueryIOPTransform = setTableOperation(bigQueryIOPTransform);
    bigQueryIOPTransform = setWriteOperation(bigQueryIOPTransform);

    in.apply(ParDo.of(new IndexedRecordToTableRowFn())).apply(bigQueryIOPTransform);
    return PDone.in(in.getPipeline());
}
 
Example 6
Source File: BigQueryInputRuntime.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<IndexedRecord> expand(PBegin in) {
    BigQueryIO.TypedRead<TableRow> bigQueryIOPTransform;
    switch (dataset.sourceType.getValue()) {
    case TABLE_NAME: {
        TableReference table = new TableReference();
        table.setProjectId(datastore.projectName.getValue());
        table.setDatasetId(dataset.bqDataset.getValue());
        table.setTableId(dataset.tableName.getValue());
        // TODO use {@link #BigQueryIO.read(SerializableFunction)} instead of readTableRows for good performance
        // avoid redundance type convert, but take care of each filed type value when apply
        bigQueryIOPTransform = BigQueryIO.readTableRows().from(table);
        break;
    }
    case QUERY: {
        // TODO use {@link #BigQueryIO.read(SerializableFunction)} instead of readTableRows for good performance
        // reduce redundance type convert, but take care of each filed type value when apply
        bigQueryIOPTransform = BigQueryIO.readTableRows().fromQuery(dataset.query.getValue());
        if (!dataset.useLegacySql.getValue()) {
            bigQueryIOPTransform = bigQueryIOPTransform.usingStandardSql();
        } else {
            // need to consider flattenResults only for legacy sql,
            // stand sql don't support flatten result, legacy sql support flatten result by default
            // withoutResultFlattening on legacy sql is not working well till fix schema issue,
            // BigQueryDatasetRuntime.getSchema use flatten result indeed
            // bigQueryIOPTransform = bigQueryIOPTransform.withoutResultFlattening();
        }
        break;
    }
    default:
        throw new RuntimeException("To be implemented: " + dataset.sourceType.getValue());
    }

    return in
            .apply(bigQueryIOPTransform)
            .apply(ParDo.of(new TableRowToIndexedRecordFn(defaultOutputCoder.getSchema())))
            .setCoder(defaultOutputCoder);
}
 
Example 7
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testWriteValidatesDataset(boolean unbounded) throws Exception {
  TableReference tableRef = new TableReference();
  tableRef.setDatasetId("somedataset");
  tableRef.setTableId("sometable");

  PCollection<TableRow> tableRows;
  if (unbounded) {
    tableRows =
        p.apply(GenerateSequence.from(0))
            .apply(
                MapElements.via(
                    new SimpleFunction<Long, TableRow>() {
                      @Override
                      public TableRow apply(Long input) {
                        return null;
                      }
                    }))
            .setCoder(TableRowJsonCoder.of());
  } else {
    tableRows = p.apply(Create.empty(TableRowJsonCoder.of()));
  }

  thrown.expect(RuntimeException.class);
  // Message will be one of following depending on the execution environment.
  thrown.expectMessage(
      Matchers.either(Matchers.containsString("Unable to confirm BigQuery dataset presence"))
          .or(Matchers.containsString("BigQuery dataset not found for table")));
  tableRows.apply(
      BigQueryIO.writeTableRows()
          .to(tableRef)
          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
          .withSchema(new TableSchema())
          .withTestServices(fakeBqServices));
  p.run();
}
 
Example 8
Source File: TriggerExample.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Sets the table reference. */
private static TableReference getTableReference(String project, String dataset, String table) {
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(project);
  tableRef.setDatasetId(dataset);
  tableRef.setTableId(table);
  return tableRef;
}
 
Example 9
Source File: TrafficMaxLaneFlow.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException {
  // Using ExampleUtils to set up required resources.
  ExampleUtils exampleUtils = new ExampleUtils(options);
  exampleUtils.setup();

  Pipeline pipeline = Pipeline.create(options);
  TableReference tableRef = new TableReference();
  tableRef.setProjectId(options.getProject());
  tableRef.setDatasetId(options.getBigQueryDataset());
  tableRef.setTableId(options.getBigQueryTable());

  pipeline
      .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
      // row... => <station route, station speed> ...
      .apply(ParDo.of(new ExtractFlowInfoFn()))
      // map the incoming data stream into sliding windows.
      .apply(
          Window.into(
              SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration()))
                  .every(Duration.standardMinutes(options.getWindowSlideEvery()))))
      .apply(new MaxLaneFlow())
      .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema()));

  // Run the pipeline.
  PipelineResult result = pipeline.run();

  // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
  exampleUtils.waitToFinish(result);
}
 
Example 10
Source File: WriteToBigQuery.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Utility to construct an output table reference. */
static TableReference getTable(String projectId, String datasetId, String tableName) {
  TableReference table = new TableReference();
  table.setDatasetId(datasetId);
  table.setProjectId(projectId);
  table.setTableId(tableName);
  return table;
}
 
Example 11
Source File: OpinionAnalysisPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 5 votes vote down vote up
private static TableReference getSentimentTableReference(IndexerPipelineOptions options) {
	TableReference tableRef = new TableReference();
	tableRef.setProjectId(options.getProject());
	tableRef.setDatasetId(options.getBigQueryDataset());
	tableRef.setTableId(IndexerPipelineUtils.SENTIMENT_TABLE);
	return tableRef;
}
 
Example 12
Source File: OpinionAnalysisPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 5 votes vote down vote up
private static TableReference getDocumentTableReference(IndexerPipelineOptions options) {
	TableReference tableRef = new TableReference();
	tableRef.setProjectId(options.getProject());
	tableRef.setDatasetId(options.getBigQueryDataset());
	tableRef.setTableId(IndexerPipelineUtils.DOCUMENT_TABLE);
	return tableRef;
}
 
Example 13
Source File: OpinionAnalysisPipeline.java    From dataflow-opinion-analysis with Apache License 2.0 5 votes vote down vote up
private static TableReference getWebResourceTableReference(IndexerPipelineOptions options) {
	TableReference tableRef = new TableReference();
	tableRef.setProjectId(options.getProject());
	tableRef.setDatasetId(options.getBigQueryDataset());
	tableRef.setTableId(IndexerPipelineUtils.WEBRESOURCE_TABLE);
	return tableRef;
}
 
Example 14
Source File: WriteToBigQuery.java    From deployment-examples with MIT License 5 votes vote down vote up
/** Utility to construct an output table reference. */
static TableReference getTable(String projectId, String datasetId, String tableName) {
  TableReference table = new TableReference();
  table.setDatasetId(datasetId);
  table.setProjectId(projectId);
  table.setTableId(tableName);
  return table;
}
 
Example 15
Source File: BeamBQInputTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PCollection<HopRow> expand( PBegin input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamHop.init(transformPluginClasses, xpPluginClasses);

    // Function to convert from Avro to Hop rows
    //
    BQSchemaAndRecordToHopFn toHopFn = new BQSchemaAndRecordToHopFn( transformName, rowMetaJson, transformPluginClasses, xpPluginClasses );

    TableReference tableReference = new TableReference();
    if (StringUtils.isNotEmpty( projectId )) {
      tableReference.setProjectId( projectId );
    }
    tableReference.setDatasetId( datasetId );
    tableReference.setTableId( tableId );

    BigQueryIO.TypedRead<HopRow> bqTypedRead;

    if (StringUtils.isEmpty( query )) {
      bqTypedRead = BigQueryIO
        .read( toHopFn )
        .from( tableReference )
      ;
    } else {
      bqTypedRead = BigQueryIO
        .read( toHopFn )
        .fromQuery( query )
      ;
    }

    // Apply the function
    //
    PCollection<HopRow> output = input.apply( bqTypedRead );

    return output;

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in beam input transform", e );
    throw new RuntimeException( "Error in beam input transform", e );
  }
}
 
Example 16
Source File: BigQueryHelperTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws IOException {
  MockitoAnnotations.initMocks(this);
  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);

  // Create fake job reference.
  JobReference fakeJobReference = new JobReference().setProjectId(jobProjectId).setJobId(jobId);

  // Create the job result.
  jobStatus = new JobStatus();
  jobStatus.setState("DONE");
  jobStatus.setErrorResult(null);

  jobHandle = new Job();
  jobHandle.setStatus(jobStatus);
  jobHandle.setJobReference(fakeJobReference);

  // Mocks for Bigquery jobs.
  when(mockBigquery.jobs()).thenReturn(mockBigqueryJobs);

  // Mock getting Bigquery job.
  when(mockBigqueryJobs.get(any(String.class), any(String.class)))
      .thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.setLocation(any(String.class))).thenReturn(mockBigqueryJobsGet);

  // Mock inserting Bigquery job.
  when(mockBigqueryJobs.insert(any(String.class), any(Job.class)))
      .thenReturn(mockBigqueryJobsInsert);

  // Fake table.
  fakeTableSchema = new TableSchema();
  fakeTable = new Table().setSchema(fakeTableSchema).setLocation("test_location");

  // Mocks for Bigquery tables.
  when(mockBigquery.tables()).thenReturn(mockBigqueryTables);
  when(mockBigqueryTables.get(any(String.class), any(String.class), any(String.class)))
      .thenReturn(mockBigqueryTablesGet);

  Datasets datasets = Mockito.mock(Datasets.class);
  Datasets.Get datasetsGet = Mockito.mock(Datasets.Get.class);
  Dataset dataset = new Dataset().setLocation("test_location");
  when(mockBigquery.datasets()).thenReturn(datasets);
  when(datasets.get(any(String.class), any(String.class))).thenReturn(datasetsGet);
  when(datasetsGet.execute()).thenReturn(dataset);

  // Create table reference.
  tableRef = new TableReference();
  tableRef.setProjectId(projectId);
  tableRef.setDatasetId(datasetId);
  tableRef.setTableId(tableId);

  helper = new BigQueryHelper(mockBigquery);
  helper.setErrorExtractor(mockErrorExtractor);
}
 
Example 17
Source File: GsonBigQueryInputFormatTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Creates an in-memory GHFS.
 *
 * @throws IOException on IOError.
 */
@Before
public void setUp()
    throws IOException {
  MockitoAnnotations.initMocks(this);
  LoggerConfig.getConfig(GsonBigQueryInputFormat.class).setLevel(Level.FINE);

  // Set the Hadoop job configuration.
  config = new JobConf(InMemoryGoogleHadoopFileSystem.getSampleConfiguration());
  config.set(BigQueryConfiguration.PROJECT_ID.getKey(), jobProjectId);
  config.set(BigQueryConfiguration.INPUT_PROJECT_ID.getKey(), dataProjectId);
  config.set(BigQueryConfiguration.INPUT_DATASET_ID.getKey(), intermediateDataset);
  config.set(BigQueryConfiguration.INPUT_TABLE_ID.getKey(), intermediateTable);
  config.set(BigQueryConfiguration.TEMP_GCS_PATH.getKey(), "gs://test_bucket/other_path");
  config.setClass(
      INPUT_FORMAT_CLASS.getKey(),
      GsonBigQueryInputFormat.class,
      AbstractBigQueryInputFormat.class);
  config.setBoolean(BigQueryConfiguration.DELETE_EXPORT_FILES_FROM_GCS.getKey(), true);

  CredentialConfigurationUtil.addTestConfigurationSettings(config);

  // Create a GoogleHadoopFileSystem to use to initialize and write to
  // the in-memory GcsFs.
  ghfs = new InMemoryGoogleHadoopFileSystem();

  JobReference fakeJobReference =
      new JobReference()
          .setProjectId(jobProjectId)
          .setJobId("bigquery-job-1234")
          .setLocation("test-job-location");

  // Create the job result.
  jobStatus = new JobStatus();
  jobStatus.setState("DONE");
  jobStatus.setErrorResult(null);

  jobHandle = new Job();
  jobHandle.setStatus(jobStatus);
  jobHandle.setJobReference(fakeJobReference);

  // Create table reference.
  tableRef = new TableReference();
  tableRef.setProjectId(dataProjectId);
  tableRef.setDatasetId("test_dataset");
  tableRef.setTableId("test_table");

  table = new Table().setTableReference(tableRef).setLocation("test_location");

  when(mockBigQueryHelper.getRawBigquery())
      .thenReturn(mockBigquery);

  // Mocks for Bigquery jobs.
  when(mockBigquery.jobs())
      .thenReturn(mockBigqueryJobs);

  // Mock getting Bigquery job.
  when(mockBigqueryJobs.get(any(String.class), any(String.class)))
      .thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.setLocation(any(String.class))).thenReturn(mockBigqueryJobsGet);
  when(mockBigqueryJobsGet.execute())
      .thenReturn(jobHandle);

  // Mock inserting Bigquery job.
  when(mockBigqueryJobs.insert(any(String.class), any(Job.class)))
      .thenReturn(mockBigqueryJobsInsert);
  when(mockBigqueryJobsInsert.execute())
      .thenReturn(jobHandle);

  // Mocks for Bigquery tables.
  when(mockBigquery.tables())
      .thenReturn(mockBigqueryTables);

  // Mocks for getting Bigquery table.
  when(mockBigqueryTables.get(any(String.class), any(String.class), any(String.class)))
      .thenReturn(mockBigqueryTablesGet);
  when(mockBigqueryTablesGet.execute())
      .thenReturn(table);

  when(mockBigQueryHelper.getTable(any(TableReference.class)))
      .thenReturn(table);

  when(mockBigQueryHelper.createJobReference(
          any(String.class), any(String.class), any(String.class)))
      .thenReturn(fakeJobReference);
  when(mockBigQueryHelper.insertJobOrFetchDuplicate(any(String.class), any(Job.class)))
      .thenReturn(jobHandle);
}
 
Example 18
Source File: BeamBQOutputTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<KettleRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamKettle.init( stepPluginClasses, xpPluginClasses );

      // Inflate the metadata on the node where this is running...
      //
      RowMetaInterface rowMeta = JsonRowMeta.fromJson( rowMetaJson );


      // Which table do we write to?
      //
      TableReference tableReference = new TableReference();
      if ( StringUtils.isNotEmpty( projectId ) ) {
        tableReference.setProjectId( projectId );
      }
      tableReference.setDatasetId( datasetId );
      tableReference.setTableId( tableId );

      TableSchema tableSchema = new TableSchema();
      List<TableFieldSchema> schemaFields = new ArrayList<>();
      for ( ValueMetaInterface valueMeta : rowMeta.getValueMetaList() ) {
        TableFieldSchema schemaField = new TableFieldSchema();
        schemaField.setName( valueMeta.getName() );
        switch(valueMeta.getType()){
          case ValueMetaInterface.TYPE_STRING: schemaField.setType( "STRING" ); break;
          case ValueMetaInterface.TYPE_INTEGER: schemaField.setType( "INTEGER" ); break;
          case ValueMetaInterface.TYPE_DATE: schemaField.setType( "DATETIME" ); break;
          case ValueMetaInterface.TYPE_BOOLEAN: schemaField.setType( "BOOLEAN" ); break;
          case ValueMetaInterface.TYPE_NUMBER: schemaField.setType( "FLOAT" ); break;
          default:
            throw new RuntimeException( "Conversion from Kettle value "+valueMeta.toString()+" to BigQuery TableRow isn't supported yet" );
        }
        schemaFields.add(schemaField);
      }
      tableSchema.setFields( schemaFields );

      SerializableFunction<KettleRow, TableRow> formatFunction = new KettleToBQTableRowFn( stepname, rowMetaJson, stepPluginClasses, xpPluginClasses );

      BigQueryIO.Write.CreateDisposition createDisposition;
      if (createIfNeeded) {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED;
      }  else {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_NEVER;
      }

      BigQueryIO.Write.WriteDisposition writeDisposition;
      if (truncateTable) {
        writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
      } else {
        if (failIfNotEmpty) {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_EMPTY;
        } else {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
        }
      }

      BigQueryIO.Write<KettleRow> bigQueryWrite = BigQueryIO
        .<KettleRow>write()
        .to( tableReference )
        .withSchema( tableSchema )
        .withCreateDisposition( createDisposition )
        .withWriteDisposition( writeDisposition )
        .withFormatFunction( formatFunction );

      // TODO: pass the results along the way at some point
      //
      input.apply( stepname, bigQueryWrite );

      // End of the line
      //
      return PDone.in( input.getPipeline() );

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in Beam BigQuery output transform", e );
      throw new RuntimeException( "Error in Beam BigQuery output transform", e );
    }
  }
 
Example 19
Source File: BeamBQInputTransform.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
@Override public PCollection<KettleRow> expand( PBegin input ) {
  try {
    // Only initialize once on this node/vm
    //
    BeamKettle.init(stepPluginClasses, xpPluginClasses);

    // Function to convert from Avro to Kettle rows
    //
    BQSchemaAndRecordToKettleFn toKettleFn = new BQSchemaAndRecordToKettleFn( stepname, rowMetaJson, stepPluginClasses, xpPluginClasses );

    TableReference tableReference = new TableReference();
    if (StringUtils.isNotEmpty( projectId )) {
      tableReference.setProjectId( projectId );
    }
    tableReference.setDatasetId( datasetId );
    tableReference.setTableId( tableId );

    BigQueryIO.TypedRead<KettleRow> bqTypedRead;

    if (StringUtils.isEmpty( query )) {
      bqTypedRead = BigQueryIO
        .read( toKettleFn )
        .from( tableReference )
      ;
    } else {
      bqTypedRead = BigQueryIO
        .read( toKettleFn )
        .fromQuery( query )
      ;
    }

    // Apply the function
    //
    PCollection<KettleRow> output = input.apply( bqTypedRead );

    return output;

  } catch ( Exception e ) {
    numErrors.inc();
    LOG.error( "Error in beam input transform", e );
    throw new RuntimeException( "Error in beam input transform", e );
  }
}
 
Example 20
Source File: BeamBQOutputTransform.java    From hop with Apache License 2.0 4 votes vote down vote up
@Override public PDone expand( PCollection<HopRow> input ) {

    try {
      // Only initialize once on this node/vm
      //
      BeamHop.init( transformPluginClasses, xpPluginClasses );

      // Inflate the metadata on the node where this is running...
      //
      IRowMeta rowMeta = JsonRowMeta.fromJson( rowMetaJson );


      // Which table do we write to?
      //
      TableReference tableReference = new TableReference();
      if ( StringUtils.isNotEmpty( projectId ) ) {
        tableReference.setProjectId( projectId );
      }
      tableReference.setDatasetId( datasetId );
      tableReference.setTableId( tableId );

      TableSchema tableSchema = new TableSchema();
      List<TableFieldSchema> schemaFields = new ArrayList<>();
      for ( IValueMeta valueMeta : rowMeta.getValueMetaList() ) {
        TableFieldSchema schemaField = new TableFieldSchema();
        schemaField.setName( valueMeta.getName() );
        switch(valueMeta.getType()){
          case IValueMeta.TYPE_STRING: schemaField.setType( "STRING" ); break;
          case IValueMeta.TYPE_INTEGER: schemaField.setType( "INTEGER" ); break;
          case IValueMeta.TYPE_DATE: schemaField.setType( "DATETIME" ); break;
          case IValueMeta.TYPE_BOOLEAN: schemaField.setType( "BOOLEAN" ); break;
          case IValueMeta.TYPE_NUMBER: schemaField.setType( "FLOAT" ); break;
          default:
            throw new RuntimeException( "Conversion from Hop value "+valueMeta.toString()+" to BigQuery TableRow isn't supported yet" );
        }
        schemaFields.add(schemaField);
      }
      tableSchema.setFields( schemaFields );

      SerializableFunction<HopRow, TableRow> formatFunction = new HopToBQTableRowFn( transformName, rowMetaJson, transformPluginClasses, xpPluginClasses );

      BigQueryIO.Write.CreateDisposition createDisposition;
      if (createIfNeeded) {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED;
      }  else {
        createDisposition = BigQueryIO.Write.CreateDisposition.CREATE_NEVER;
      }

      BigQueryIO.Write.WriteDisposition writeDisposition;
      if (truncateTable) {
        writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
      } else {
        if (failIfNotEmpty) {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_EMPTY;
        } else {
          writeDisposition = BigQueryIO.Write.WriteDisposition.WRITE_APPEND;
        }
      }

      BigQueryIO.Write<HopRow> bigQueryWrite = BigQueryIO
        .<HopRow>write()
        .to( tableReference )
        .withSchema( tableSchema )
        .withCreateDisposition( createDisposition )
        .withWriteDisposition( writeDisposition )
        .withFormatFunction( formatFunction );

      // TODO: pass the results along the way at some point
      //
      input.apply( transformName, bigQueryWrite );

      // End of the line
      //
      return PDone.in( input.getPipeline() );

    } catch ( Exception e ) {
      numErrors.inc();
      LOG.error( "Error in Beam BigQuery output transform", e );
      throw new RuntimeException( "Error in Beam BigQuery output transform", e );
    }
  }