Java Code Examples for com.google.api.services.bigquery.model.TableRow

The following examples show how to use com.google.api.services.bigquery.model.TableRow. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: TopWikipediaSessions.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<TableRow> input) {
  return input
      .apply(ParDo.of(new ExtractUserAndTimestamp()))
      .apply(
          "SampleUsers",
          ParDo.of(
              new DoFn<String, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  if (Math.abs((long) c.element().hashCode())
                      <= Integer.MAX_VALUE * samplingThreshold) {
                    c.output(c.element());
                  }
                }
              }))
      .apply(new ComputeSessions())
      .apply("SessionsToStrings", ParDo.of(new SessionsToStringsDoFn()))
      .apply(new TopPerMonth())
      .apply("FormatOutput", ParDo.of(new FormatOutputDoFn()));
}
 
Example 2
Source Project: DataflowTemplates   Source File: ErrorConverters.java    License: Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  FailsafeElement<String, String> failsafeElement = context.element();
  final String message = failsafeElement.getOriginalPayload();

  // Format the timestamp for insertion
  String timestamp =
      TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC));

  // Build the table row
  final TableRow failedRow =
      new TableRow()
          .set("timestamp", timestamp)
          .set("errorMessage", failsafeElement.getErrorMessage())
          .set("stacktrace", failsafeElement.getStacktrace());

  // Only set the payload if it's populated on the message.
  if (message != null) {
    failedRow
        .set("payloadString", message)
        .set("payloadBytes", message.getBytes(StandardCharsets.UTF_8));
  }

  context.output(failedRow);
}
 
Example 3
Source Project: beam   Source File: BigQueryIOStorageQueryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testQuerySourceCreateReader() throws Exception {
  BigQueryStorageQuerySource<TableRow> querySource =
      BigQueryStorageQuerySource.create(
          /* stepUuid = */ "testStepUuid",
          ValueProvider.StaticValueProvider.of("SELECT * FROM `dataset.table`"),
          /* flattenResults = */ false,
          /* useLegacySql = */ false,
          /* priority = */ QueryPriority.INTERACTIVE,
          /* location = */ "asia-northeast1",
          /* queryTempDataset = */ null,
          /* kmsKey = */ null,
          new TableRowParser(),
          TableRowJsonCoder.of(),
          fakeBigQueryServices);

  thrown.expect(UnsupportedOperationException.class);
  thrown.expectMessage("BigQuery storage source must be split before reading");
  querySource.createReader(options);
}
 
Example 4
Source Project: beam   Source File: BigQueryInsertErrorCoderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDecodeEncodeEqual() throws Exception {
  BigQueryInsertError value =
      new BigQueryInsertError(
          new TableRow().setF(Collections.singletonList(new TableCell().setV("Value"))),
          new TableDataInsertAllResponse.InsertErrors()
              .setIndex(0L)
              .setErrors(
                  Collections.singletonList(
                      new ErrorProto()
                          .setReason("a Reason")
                          .setLocation("A location")
                          .setMessage("A message")
                          .setDebugInfo("The debug info"))),
          new TableReference()
              .setProjectId("dummy-project-id")
              .setDatasetId("dummy-dataset-id")
              .setTableId("dummy-table-id"));

  CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
}
 
Example 5
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteFailedJobs() throws Exception {
  p.apply(
          Create.of(
                  new TableRow().set("name", "a").set("number", 1),
                  new TableRow().set("name", "b").set("number", 2),
                  new TableRow().set("name", "c").set("number", 3))
              .withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Failed to create job with prefix");
  thrown.expectMessage("reached max retries");
  thrown.expectMessage("last failed job");

  p.run();
}
 
Example 6
@Override
public void processElement(ProcessContext c) {
  Double dollars = c.element();
  TableRow r = new TableRow();
  r.set("dollar_turnover", dollars);
  // the timing can be:
  // EARLY: the dollar amount is not yet final
  // ON_TIME: dataflow thinks the dollar amount is final but late data are still possible
  // LATE: late data has arrived
  r.set("dollar_timing", c.pane().getTiming()); // EARLY, ON_TIME or LATE
  r.set("dollar_window", ((IntervalWindow) c.window()).start().getMillis() / 1000.0 / 60.0); // timestamp in fractional minutes

  LOG.info("Outputting $ value {}} at {} with marker {} for window {}",
    dollars.toString(), new Date().getTime(), c.pane().getTiming().toString(), c.window().hashCode());
  c.output(r);
}
 
Example 7
Source Project: beam   Source File: FakeJobService.java    License: Apache License 2.0 6 votes vote down vote up
private long writeRows(
    String tableId, List<TableRow> rows, TableSchema schema, String destinationPattern)
    throws IOException {
  Schema avroSchema = BigQueryUtils.toGenericAvroSchema(tableId, schema.getFields());
  List<TableRow> rowsToWrite = Lists.newArrayList();
  int shard = 0;
  for (TableRow row : rows) {
    rowsToWrite.add(row);
    if (rowsToWrite.size() == 5) {
      writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
      rowsToWrite.clear();
    }
  }
  if (!rowsToWrite.isEmpty()) {
    writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
  }
  return shard;
}
 
Example 8
Source Project: beam   Source File: BigqueryMatcherTest.java    License: Apache License 2.0 6 votes vote down vote up
private QueryResponse createResponseContainingTestData() {
  TableCell field1 = new TableCell();
  field1.setV("abc");
  TableCell field2 = new TableCell();
  field2.setV("2");
  TableCell field3 = new TableCell();
  field3.setV("testing BigQuery matcher.");
  TableRow row = new TableRow();
  row.setF(Lists.newArrayList(field1, field2, field3));

  QueryResponse response = new QueryResponse();
  response.setJobComplete(true);
  response.setRows(Lists.newArrayList(row));
  response.setTotalRows(BigInteger.ONE);
  return response;
}
 
Example 9
Source Project: beam   Source File: TriggerExampleTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testTotalFlow() {
  PCollection<KV<String, Integer>> flow =
      pipeline
          .apply(Create.timestamped(TIME_STAMPED_INPUT))
          .apply(ParDo.of(new ExtractFlowInfo()));

  PCollection<TableRow> totalFlow =
      flow.apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))))
          .apply(new TotalFlow("default"));

  PCollection<String> results = totalFlow.apply(ParDo.of(new FormatResults()));

  PAssert.that(results)
      .containsInAnyOrder(canonicalFormat(OUT_ROW_1), canonicalFormat(OUT_ROW_2));
  pipeline.run().waitUntilFinish();
}
 
Example 10
Source Project: beam   Source File: FakeJobService.java    License: Apache License 2.0 6 votes vote down vote up
private boolean validateDispositions(
    Table table, CreateDisposition createDisposition, WriteDisposition writeDisposition)
    throws InterruptedException, IOException {
  if (table == null) {
    if (createDisposition == CreateDisposition.CREATE_NEVER) {
      return false;
    }
  } else if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
    datasetService.deleteTable(table.getTableReference());
  } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
    List<TableRow> allRows =
        datasetService.getAllRows(
            table.getTableReference().getProjectId(),
            table.getTableReference().getDatasetId(),
            table.getTableReference().getTableId());
    if (!allRows.isEmpty()) {
      return false;
    }
  }
  return true;
}
 
Example 11
Source Project: DataflowTemplates   Source File: MergeInfoMapper.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<MergeInfo> expand(PCollection<KV<TableId, TableRow>> input) {
  return input.apply(
      MapElements.into(TypeDescriptor.of(MergeInfo.class))
          .via(
              element -> {
                return MergeInfo.create(
                    METADATA_TIMESTAMP, // TODO should be list pulled from Datastream API
                    METADATA_DELETED,
                    String.format("%s.%s",
                        // Staging Table // TODO these should possibly be passed separately
                        BigQueryConverters
                            .formatStringTemplate(stagingDataset, element.getValue()),
                        BigQueryConverters
                            .formatStringTemplate(stagingTable, element.getValue())),
                    String.format("%s.%s", // Replica Table
                        BigQueryConverters
                            .formatStringTemplate(replicaDataset, element.getValue()),
                        BigQueryConverters
                            .formatStringTemplate(replicaTable, element.getValue())),
                    ImmutableList.copyOf(element.getValue().keySet()),
                    ImmutableList.of("ID"));
              }));
}
 
Example 12
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example 13
@Test
public void test_parse_CSV_format_successfully_with_tablerow() throws Exception {

    List<String> input = new ArrayList<>();

    input.add("2018,8,13,Wikinews,English,Spanish football: Sevilla signs Aleix Vidal from FC Barcelona,12331");

    List<TableRow> output = fnTester.processBundle(input);

    Assert.assertThat(output, is(not(empty())));

    Assert.assertThat(output.get(0).get("year"), is(equalTo("2018")));
    Assert.assertThat(output.get(0).get("month"), is(equalTo("8")));
    Assert.assertThat(output.get(0).get("day"), is(equalTo("13")));
    Assert.assertThat(output.get(0).get("wikimedia_project"), is(equalTo("Wikinews")));
    Assert.assertThat(output.get(0).get("language"), is(equalTo("English")));
    Assert.assertThat(output.get(0).get("title"), is(equalTo("Spanish football: Sevilla signs Aleix Vidal from FC Barcelona")));
    Assert.assertThat(output.get(0).get("views"), is(equalTo("12331")));
}
 
Example 14
Source Project: DataflowTemplates   Source File: BigQueryMapper.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Extracts and applies new column information to BigQuery by comparing the TableRow against the
 * BigQuery Table. Retries the supplied number of times before failing.
 *
 * @param tableId a TableId referencing the BigQuery table to be loaded to.
 * @param row a TableRow with the raw data to be loaded into BigQuery.
 * @param inputSchema The source schema lookup to be used in mapping.
 * @param retries Number of remaining retries before error is raised.
 */
private void applyMapperToTableRow(
    TableId tableId, TableRow row, Map<String, LegacySQLTypeName> inputSchema, int retries) {
  try {
    updateTableIfRequired(tableId, row, inputSchema);
  } catch (Exception e) {
    if (retries > 0) {
      LOG.info("RETRY TABLE UPDATE - enter: {}", String.valueOf(retries));
      try {
        Thread.sleep(2000);
      } catch (InterruptedException i) {
        throw e;
      }
      LOG.info("RETRY TABLE UPDATE - apply: {}", String.valueOf(retries));
      applyMapperToTableRow(tableId, row, inputSchema, retries - 1);
    } else {
      LOG.info("RETRY TABLE UPDATE - throw: {}", String.valueOf(retries));
      throw e;
    }
  }
}
 
Example 15
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteUnknown() throws Exception {
  p.apply(
          Create.of(
                  new TableRow().set("name", "a").set("number", 1),
                  new TableRow().set("name", "b").set("number", 2),
                  new TableRow().set("name", "c").set("number", 3))
              .withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Failed to create job");
  p.run();
}
 
Example 16
Source Project: flink-dataflow   Source File: JoinExamples.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
	Pipeline p = Pipeline.create(options);
	// the following two 'applys' create multiple inputs to our pipeline, one for each
	// of our two input sources.
	PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
	PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
	PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
	formattedResults.apply(TextIO.Write.to(options.getOutput()));
	p.run();
}
 
Example 17
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
    if (c.element().equalsIgnoreCase(HEADER)) return;
    String[] split = c.element().split(",");
    if (split.length > 7) return;
    TableRow row = new TableRow();
    for (int i = 0; i < split.length; i++) {
        TableFieldSchema col = getTableSchema().getFields().get(i);
        row.set(col.getName(), split[i]);
    }
    c.output(row);
}
 
Example 18
Source Project: components   Source File: BigQueryInputRuntime.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<IndexedRecord> expand(PBegin in) {
    BigQueryIO.TypedRead<TableRow> bigQueryIOPTransform;
    switch (dataset.sourceType.getValue()) {
    case TABLE_NAME: {
        TableReference table = new TableReference();
        table.setProjectId(datastore.projectName.getValue());
        table.setDatasetId(dataset.bqDataset.getValue());
        table.setTableId(dataset.tableName.getValue());
        // TODO use {@link #BigQueryIO.read(SerializableFunction)} instead of readTableRows for good performance
        // avoid redundance type convert, but take care of each filed type value when apply
        bigQueryIOPTransform = BigQueryIO.readTableRows().from(table);
        break;
    }
    case QUERY: {
        // TODO use {@link #BigQueryIO.read(SerializableFunction)} instead of readTableRows for good performance
        // reduce redundance type convert, but take care of each filed type value when apply
        bigQueryIOPTransform = BigQueryIO.readTableRows().fromQuery(dataset.query.getValue());
        if (!dataset.useLegacySql.getValue()) {
            bigQueryIOPTransform = bigQueryIOPTransform.usingStandardSql();
        } else {
            // need to consider flattenResults only for legacy sql,
            // stand sql don't support flatten result, legacy sql support flatten result by default
            // withoutResultFlattening on legacy sql is not working well till fix schema issue,
            // BigQueryDatasetRuntime.getSchema use flatten result indeed
            // bigQueryIOPTransform = bigQueryIOPTransform.withoutResultFlattening();
        }
        break;
    }
    default:
        throw new RuntimeException("To be implemented: " + dataset.sourceType.getValue());
    }

    return in
            .apply(bigQueryIOPTransform)
            .apply(ParDo.of(new TableRowToIndexedRecordFn(defaultOutputCoder.getSchema())))
            .setCoder(defaultOutputCoder);
}
 
Example 19
Source Project: beam   Source File: HealthcareIOErrorToTableRow.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TableRow apply(HealthcareIOError<T> err) {
  TableRow out = new TableRow();
  out.set("dataElement", err.getDataResource().toString());
  out.set(TIMESTAMP_FIELD_NAME, err.getObservedTime().toString(DATETIME_FORMATTER));
  out.set("message", err.getErrorMessage());
  out.set("stacktrace", err.getStackTrace());
  out.set("statusCode", err.getStatusCode());
  return out;
}
 
Example 20
Source Project: beam   Source File: BigQueryRowCountIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNonEmptyTable() {
  BigQueryTableProvider provider = new BigQueryTableProvider();
  Table table = getTable("testTable", bigQuery.tableSpec());

  pipeline
      .apply(
          Create.of(
                  new TableRow().set("id", 1).set("name", "name1"),
                  new TableRow().set("id", 2).set("name", "name2"),
                  new TableRow().set("id", 3).set("name", "name3"))
              .withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to(bigQuery.tableSpec())
              .withSchema(
                  new TableSchema()
                      .setFields(
                          ImmutableList.of(
                              new TableFieldSchema().setName("id").setType("INTEGER"),
                              new TableFieldSchema().setName("name").setType("STRING"))))
              .withoutValidation());
  pipeline.run().waitUntilFinish();

  BeamSqlTable sqlTable = provider.buildBeamSqlTable(table);
  BeamTableStatistics size1 = sqlTable.getTableStatistics(TestPipeline.testingPipelineOptions());

  assertNotNull(size1);
  assertEquals(3d, size1.getRowCount(), 0.1);
}
 
Example 21
@Override
public TableDestination getTable(KV<String, TableRow> destination) {
  TableDestination dest =
      new TableDestination(destination.getKey(), "pii-tokenized output data from dataflow");
  LOG.debug("Table Destination {}", dest.getTableSpec());
  return dest;
}
 
Example 22
Source Project: DataflowTemplates   Source File: ErrorConverters.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Converts a {@link PubsubMessage} attribute map into {@link TableRow} records which can be saved
 * to BigQuery. Each entry within the attribute map is converted into a row object containing two
 * columns: "key" & "value". This allows for the attribute map to be saved to BigQuery without
 * needing to handle schema changes due to new attributes.
 *
 * @param attributeMap A key-value map of attributes from a {@link PubsubMessage}
 * @return A list of {@link TableRow} objects, one for each map entry.
 */
private static List<TableRow> attributeMapToTableRows(Map<String, String> attributeMap) {
  final List<TableRow> attributeTableRows = Lists.newArrayList();
  if (attributeMap != null) {
    attributeMap.forEach(
        (key, value) ->
            attributeTableRows.add(new TableRow().set("key", key).set("value", value)));
  }

  return attributeTableRows;
}
 
Example 23
Source Project: beam   Source File: BigQueryInsertErrorCoder.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BigQueryInsertError decode(InputStream inStream) throws IOException {
  TableDataInsertAllResponse.InsertErrors err =
      MAPPER.readValue(
          StringUtf8Coder.of().decode(inStream), TableDataInsertAllResponse.InsertErrors.class);
  TableRow row = TableRowJsonCoder.of().decode(inStream);
  TableReference ref = BigQueryHelpers.parseTableSpec(StringUtf8Coder.of().decode(inStream));
  return new BigQueryInsertError(row, err, ref);
}
 
Example 24
/**
 * Given a KV containing a destination and a message, return the message content as a {@link
 * TableRow} ready to pass to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}.
 */
public TableRow kvToTableRow(KV<TableDestination, PubsubMessage> kv) {
  if (format == null) {
    format = createFormat();
  }
  final TableReference ref = kv.getKey().getTableReference();
  final TableId tableId = TableId.of(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
  final PubsubMessage message = kv.getValue();
  return Json.asTableRow(format.apply(tableId, message.getAttributeMap(), message.getPayload()));
}
 
Example 25
Source Project: beam   Source File: MaxPerKeyExamples.java    License: Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  TableRow row =
      new TableRow()
          .set("month", c.element().getKey())
          .set("max_mean_temp", c.element().getValue());
  c.output(row);
}
 
Example 26
Source Project: beam   Source File: BigQueryIOStorageReadTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testStreamSourceEstimatedSizeBytes() throws Exception {

  BigQueryStorageStreamSource<TableRow> streamSource =
      BigQueryStorageStreamSource.create(
          ReadSession.getDefaultInstance(),
          Stream.getDefaultInstance(),
          TABLE_SCHEMA,
          new TableRowParser(),
          TableRowJsonCoder.of(),
          new FakeBigQueryServices());

  assertEquals(0, streamSource.getEstimatedSizeBytes(options));
}
 
Example 27
Source Project: DataflowTemplates   Source File: BigQueryConverters.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<TableRow> expand(PCollection<String> stringPCollection) {
  return stringPCollection.apply(
      "JsonToTableRow",
      MapElements.via(
          new SimpleFunction<String, TableRow>() {
            @Override
            public TableRow apply(String json) {
              return convertJsonToTableRow(json);
            }
          }));
}
 
Example 28
Source Project: deployment-examples   Source File: WriteToBigQuery.java    License: MIT License 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) {

  TableRow row = new TableRow();
  for (Map.Entry<String, FieldInfo<InputT>> entry : fieldInfo.entrySet()) {
    String key = entry.getKey();
    FieldInfo<InputT> fcnInfo = entry.getValue();
    FieldFn<InputT> fcn = fcnInfo.getFieldFn();
    row.set(key, fcn.apply(c, window));
  }
  c.output(row);
}
 
Example 29
Source Project: deployment-examples   Source File: WriteWindowedToBigQuery.java    License: MIT License 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) {

  TableRow row = new TableRow();
  for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
    String key = entry.getKey();
    FieldInfo<T> fcnInfo = entry.getValue();
    row.set(key, fcnInfo.getFieldFn().apply(c, window));
  }
  c.output(row);
}
 
Example 30
Source Project: beam   Source File: FakeJobService.java    License: Apache License 2.0 5 votes vote down vote up
private List<TableRow> readAvroTableRows(String filename, TableSchema tableSchema)
    throws IOException {
  List<TableRow> tableRows = Lists.newArrayList();
  FileReader<GenericRecord> dfr =
      DataFileReader.openReader(new File(filename), new GenericDatumReader<>());

  while (dfr.hasNext()) {
    GenericRecord record = dfr.next(null);
    tableRows.add(BigQueryUtils.convertGenericRecordToTableRow(record, tableSchema));
  }
  return tableRows;
}