com.google.api.services.bigquery.model.TableRow Java Examples

The following examples show how to use com.google.api.services.bigquery.model.TableRow. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TopWikipediaSessions.java    From beam with Apache License 2.0 7 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<TableRow> input) {
  return input
      .apply(ParDo.of(new ExtractUserAndTimestamp()))
      .apply(
          "SampleUsers",
          ParDo.of(
              new DoFn<String, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  if (Math.abs((long) c.element().hashCode())
                      <= Integer.MAX_VALUE * samplingThreshold) {
                    c.output(c.element());
                  }
                }
              }))
      .apply(new ComputeSessions())
      .apply("SessionsToStrings", ParDo.of(new SessionsToStringsDoFn()))
      .apply(new TopPerMonth())
      .apply("FormatOutput", ParDo.of(new FormatOutputDoFn()));
}
 
Example #2
Source File: ErrorConverters.java    From DataflowTemplates with Apache License 2.0 7 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  FailsafeElement<String, String> failsafeElement = context.element();
  final String message = failsafeElement.getOriginalPayload();

  // Format the timestamp for insertion
  String timestamp =
      TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC));

  // Build the table row
  final TableRow failedRow =
      new TableRow()
          .set("timestamp", timestamp)
          .set("errorMessage", failsafeElement.getErrorMessage())
          .set("stacktrace", failsafeElement.getStacktrace());

  // Only set the payload if it's populated on the message.
  if (message != null) {
    failedRow
        .set("payloadString", message)
        .set("payloadBytes", message.getBytes(StandardCharsets.UTF_8));
  }

  context.output(failedRow);
}
 
Example #3
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteFailedJobs() throws Exception {
  p.apply(
          Create.of(
                  new TableRow().set("name", "a").set("number", 1),
                  new TableRow().set("name", "b").set("number", 2),
                  new TableRow().set("name", "c").set("number", 3))
              .withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Failed to create job with prefix");
  thrown.expectMessage("reached max retries");
  thrown.expectMessage("last failed job");

  p.run();
}
 
Example #4
Source File: FakeJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
private long writeRows(
    String tableId, List<TableRow> rows, TableSchema schema, String destinationPattern)
    throws IOException {
  Schema avroSchema = BigQueryUtils.toGenericAvroSchema(tableId, schema.getFields());
  List<TableRow> rowsToWrite = Lists.newArrayList();
  int shard = 0;
  for (TableRow row : rows) {
    rowsToWrite.add(row);
    if (rowsToWrite.size() == 5) {
      writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
      rowsToWrite.clear();
    }
  }
  if (!rowsToWrite.isEmpty()) {
    writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
  }
  return shard;
}
 
Example #5
Source File: TemplatePipelineTest.java    From gcp-batch-ingestion-bigquery with Apache License 2.0 6 votes vote down vote up
@Test
public void test_parse_CSV_format_successfully_with_tablerow() throws Exception {

    List<String> input = new ArrayList<>();

    input.add("2018,8,13,Wikinews,English,Spanish football: Sevilla signs Aleix Vidal from FC Barcelona,12331");

    List<TableRow> output = fnTester.processBundle(input);

    Assert.assertThat(output, is(not(empty())));

    Assert.assertThat(output.get(0).get("year"), is(equalTo("2018")));
    Assert.assertThat(output.get(0).get("month"), is(equalTo("8")));
    Assert.assertThat(output.get(0).get("day"), is(equalTo("13")));
    Assert.assertThat(output.get(0).get("wikimedia_project"), is(equalTo("Wikinews")));
    Assert.assertThat(output.get(0).get("language"), is(equalTo("English")));
    Assert.assertThat(output.get(0).get("title"), is(equalTo("Spanish football: Sevilla signs Aleix Vidal from FC Barcelona")));
    Assert.assertThat(output.get(0).get("views"), is(equalTo("12331")));
}
 
Example #6
Source File: ExactDollarRides.java    From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 6 votes vote down vote up
@Override
public void processElement(ProcessContext c) {
  Double dollars = c.element();
  TableRow r = new TableRow();
  r.set("dollar_turnover", dollars);
  // the timing can be:
  // EARLY: the dollar amount is not yet final
  // ON_TIME: dataflow thinks the dollar amount is final but late data are still possible
  // LATE: late data has arrived
  r.set("dollar_timing", c.pane().getTiming()); // EARLY, ON_TIME or LATE
  r.set("dollar_window", ((IntervalWindow) c.window()).start().getMillis() / 1000.0 / 60.0); // timestamp in fractional minutes

  LOG.info("Outputting $ value {}} at {} with marker {} for window {}",
    dollars.toString(), new Date().getTime(), c.pane().getTiming().toString(), c.window().hashCode());
  c.output(r);
}
 
Example #7
Source File: BigQueryIOStorageQueryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testQuerySourceCreateReader() throws Exception {
  BigQueryStorageQuerySource<TableRow> querySource =
      BigQueryStorageQuerySource.create(
          /* stepUuid = */ "testStepUuid",
          ValueProvider.StaticValueProvider.of("SELECT * FROM `dataset.table`"),
          /* flattenResults = */ false,
          /* useLegacySql = */ false,
          /* priority = */ QueryPriority.INTERACTIVE,
          /* location = */ "asia-northeast1",
          /* queryTempDataset = */ null,
          /* kmsKey = */ null,
          new TableRowParser(),
          TableRowJsonCoder.of(),
          fakeBigQueryServices);

  thrown.expect(UnsupportedOperationException.class);
  thrown.expectMessage("BigQuery storage source must be split before reading");
  querySource.createReader(options);
}
 
Example #8
Source File: BigQueryMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Extracts and applies new column information to BigQuery by comparing the TableRow against the
 * BigQuery Table. Retries the supplied number of times before failing.
 *
 * @param tableId a TableId referencing the BigQuery table to be loaded to.
 * @param row a TableRow with the raw data to be loaded into BigQuery.
 * @param inputSchema The source schema lookup to be used in mapping.
 * @param retries Number of remaining retries before error is raised.
 */
private void applyMapperToTableRow(
    TableId tableId, TableRow row, Map<String, LegacySQLTypeName> inputSchema, int retries) {
  try {
    updateTableIfRequired(tableId, row, inputSchema);
  } catch (Exception e) {
    if (retries > 0) {
      LOG.info("RETRY TABLE UPDATE - enter: {}", String.valueOf(retries));
      try {
        Thread.sleep(2000);
      } catch (InterruptedException i) {
        throw e;
      }
      LOG.info("RETRY TABLE UPDATE - apply: {}", String.valueOf(retries));
      applyMapperToTableRow(tableId, row, inputSchema, retries - 1);
    } else {
      LOG.info("RETRY TABLE UPDATE - throw: {}", String.valueOf(retries));
      throw e;
    }
  }
}
 
Example #9
Source File: BigQueryDynamicConverters.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public TableSchema getSchema(KV<TableId, TableRow> destination) {

  TableRow bqRow = destination.getValue();
  TableSchema schema = new TableSchema();
  List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>();
  List<TableCell> cells = bqRow.getF();
  for (int i = 0; i < cells.size(); i++) {
    Map<String, Object> object = cells.get(i);
    String header = object.keySet().iterator().next();
    /** currently all BQ data types are set to String */
    // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in
    // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING"));
    fields.add(new TableFieldSchema().setName(header).setType("STRING"));
  }

  schema.setFields(fields);
  return schema;
}
 
Example #10
Source File: BigqueryMatcherTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private QueryResponse createResponseContainingTestData() {
  TableCell field1 = new TableCell();
  field1.setV("abc");
  TableCell field2 = new TableCell();
  field2.setV("2");
  TableCell field3 = new TableCell();
  field3.setV("testing BigQuery matcher.");
  TableRow row = new TableRow();
  row.setF(Lists.newArrayList(field1, field2, field3));

  QueryResponse response = new QueryResponse();
  response.setJobComplete(true);
  response.setRows(Lists.newArrayList(row));
  response.setTotalRows(BigInteger.ONE);
  return response;
}
 
Example #11
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteUnknown() throws Exception {
  p.apply(
          Create.of(
                  new TableRow().set("name", "a").set("number", 1),
                  new TableRow().set("name", "b").set("number", 2),
                  new TableRow().set("name", "c").set("number", 3))
              .withCoder(TableRowJsonCoder.of()))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id")
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Failed to create job");
  p.run();
}
 
Example #12
Source File: TriggerExampleTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testTotalFlow() {
  PCollection<KV<String, Integer>> flow =
      pipeline
          .apply(Create.timestamped(TIME_STAMPED_INPUT))
          .apply(ParDo.of(new ExtractFlowInfo()));

  PCollection<TableRow> totalFlow =
      flow.apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))))
          .apply(new TotalFlow("default"));

  PCollection<String> results = totalFlow.apply(ParDo.of(new FormatResults()));

  PAssert.that(results)
      .containsInAnyOrder(canonicalFormat(OUT_ROW_1), canonicalFormat(OUT_ROW_2));
  pipeline.run().waitUntilFinish();
}
 
Example #13
Source File: MergeInfoMapper.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<MergeInfo> expand(PCollection<KV<TableId, TableRow>> input) {
  return input.apply(
      MapElements.into(TypeDescriptor.of(MergeInfo.class))
          .via(
              element -> {
                return MergeInfo.create(
                    METADATA_TIMESTAMP, // TODO should be list pulled from Datastream API
                    METADATA_DELETED,
                    String.format("%s.%s",
                        // Staging Table // TODO these should possibly be passed separately
                        BigQueryConverters
                            .formatStringTemplate(stagingDataset, element.getValue()),
                        BigQueryConverters
                            .formatStringTemplate(stagingTable, element.getValue())),
                    String.format("%s.%s", // Replica Table
                        BigQueryConverters
                            .formatStringTemplate(replicaDataset, element.getValue()),
                        BigQueryConverters
                            .formatStringTemplate(replicaTable, element.getValue())),
                    ImmutableList.copyOf(element.getValue().keySet()),
                    ImmutableList.of("ID"));
              }));
}
 
Example #14
Source File: FakeJobService.java    From beam with Apache License 2.0 6 votes vote down vote up
private boolean validateDispositions(
    Table table, CreateDisposition createDisposition, WriteDisposition writeDisposition)
    throws InterruptedException, IOException {
  if (table == null) {
    if (createDisposition == CreateDisposition.CREATE_NEVER) {
      return false;
    }
  } else if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
    datasetService.deleteTable(table.getTableReference());
  } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
    List<TableRow> allRows =
        datasetService.getAllRows(
            table.getTableReference().getProjectId(),
            table.getTableReference().getDatasetId(),
            table.getTableReference().getTableId());
    if (!allRows.isEmpty()) {
      return false;
    }
  }
  return true;
}
 
Example #15
Source File: BigQueryInsertErrorCoderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecodeEncodeEqual() throws Exception {
  BigQueryInsertError value =
      new BigQueryInsertError(
          new TableRow().setF(Collections.singletonList(new TableCell().setV("Value"))),
          new TableDataInsertAllResponse.InsertErrors()
              .setIndex(0L)
              .setErrors(
                  Collections.singletonList(
                      new ErrorProto()
                          .setReason("a Reason")
                          .setLocation("A location")
                          .setMessage("A message")
                          .setDebugInfo("The debug info"))),
          new TableReference()
              .setProjectId("dummy-project-id")
              .setDatasetId("dummy-dataset-id")
              .setTableId("dummy-table-id"));

  CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
}
 
Example #16
Source File: JoinExamples.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(ProcessContext c) {
	TableRow row = c.element();
	String countryCode = (String) row.get("FIPSCC");
	String countryName = (String) row.get("HumanName");
	c.output(KV.of(countryCode, countryName));
}
 
Example #17
Source File: BigQueryToTableIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void verifyNewTypesQueryRes(String outputTable) throws Exception {
  List<String> newTypeQueryExpectedRes =
      ImmutableList.of(
          "abc=,2000-01-01,00:00:00",
          "dec=,3000-12-31,23:59:59.990000",
          "xyw=,2011-01-01,23:59:59.999999");
  QueryResponse response =
      BQ_CLIENT.queryWithRetries(
          String.format("SELECT bytes, date, time FROM [%s];", outputTable), project);
  List<TableRow> tableRows =
      getTableRowsFromQuery(
          String.format("SELECT bytes, date, time FROM [%s];", outputTable), MAX_RETRY);
  List<String> tableResult =
      tableRows.stream()
          .map(
              row -> {
                String res = "";
                for (TableCell cell : row.getF()) {
                  if (res.isEmpty()) {
                    res = cell.getV().toString();
                  } else {
                    res = res + "," + cell.getV().toString();
                  }
                }
                return res;
              })
          .sorted()
          .collect(Collectors.toList());
  assertEquals(newTypeQueryExpectedRes, tableResult);
}
 
Example #18
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteWithBrokenGetTable() throws Exception {
  p.apply(Create.<TableRow>of(new TableRow().set("foo", "bar")))
      .apply(
          BigQueryIO.writeTableRows()
              .to(input -> null)
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER)
              .withTestServices(fakeBqServices)
              .withoutValidation());

  thrown.expectMessage("result of tableFunction can not be null");
  thrown.expectMessage("foo");
  p.run();
}
 
Example #19
Source File: FilterRides.java    From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(ProcessContext c) {
  TableRow ride = c.element();

  // filter rides in lower Manhattan only
  float lat = Float.parseFloat(ride.get("latitude").toString());
  float lon = Float.parseFloat(ride.get("longitude").toString());
  if (lon > -74.747 && lon < -73.969)
    if (lat > 40.699 && lat < 40.720) {
      c.output(ride);
      LOG.info("Accepted ride lat: {} lon: {} ", lat, lon);
      return;
    }
}
 
Example #20
Source File: BigqueryMatcher.java    From beam with Apache License 2.0 5 votes vote down vote up
private String generateHash(@Nonnull List<TableRow> rows) {
  List<HashCode> rowHashes = Lists.newArrayList();
  for (TableRow row : rows) {
    List<String> cellsInOneRow = Lists.newArrayList();
    for (TableCell cell : row.getF()) {
      cellsInOneRow.add(Objects.toString(cell.getV()));
      Collections.sort(cellsInOneRow);
    }
    rowHashes.add(Hashing.sha1().hashString(cellsInOneRow.toString(), StandardCharsets.UTF_8));
  }
  return Hashing.combineUnordered(rowHashes).toString();
}
 
Example #21
Source File: BigQueryTornadoesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(ValidatesRunner.class)
public void testFormatCounts() {
  PCollection<KV<Integer, Long>> inputs =
      p.apply(Create.of(KV.of(3, 0L), KV.of(4, Long.MAX_VALUE), KV.of(5, Long.MIN_VALUE)));
  PCollection<TableRow> result = inputs.apply(ParDo.of(new FormatCountsFn()));
  PAssert.that(result)
      .containsInAnyOrder(
          new TableRow().set("month", 3).set("tornado_count", 0),
          new TableRow().set("month", 4).set("tornado_count", Long.MAX_VALUE),
          new TableRow().set("month", 5).set("tornado_count", Long.MIN_VALUE));
  p.run().waitUntilFinish();
}
 
Example #22
Source File: TemplatePipeline.java    From gcp-batch-ingestion-bigquery with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
    if (c.element().equalsIgnoreCase(HEADER)) return;
    String[] split = c.element().split(",");
    if (split.length > 7) return;
    TableRow row = new TableRow();
    for (int i = 0; i < split.length; i++) {
        TableFieldSchema col = getTableSchema().getFields().get(i);
        row.set(col.getName(), split[i]);
    }
    c.output(row);
}
 
Example #23
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testEvaluatedDisplayData() {
  DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
  BigQueryIO.TypedRead<TableRow> typedRead =
      BigQueryIO.read(new TableRowParser())
          .withCoder(TableRowJsonCoder.of())
          .withMethod(Method.DIRECT_READ)
          .from("foo.com:project:dataset.table");
  Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(typedRead);
  assertThat(displayData, hasItem(hasDisplayItem("table")));
}
 
Example #24
Source File: BigQueryConverters.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  TableRow row = context.element();
  try {
    context.output(FailsafeElement.of(row, tableRowToJson(row)));
    successCounter.inc();
  } catch (Exception e) {
    context.output(
        this.transformDeadletterOutTag,
        FailsafeElement.of(row, row.toString())
            .setErrorMessage(e.getMessage())
            .setStacktrace(Throwables.getStackTraceAsString(e)));
    failedCounter.inc();
  }
}
 
Example #25
Source File: BigQueryHllSketchCompatibilityIT.java    From beam with Apache License 2.0 5 votes vote down vote up
private void writeSketchToBigQuery(List<String> testData, String expectedChecksum) {
  String tableSpec = String.format("%s.%s", DATASET_ID, SKETCH_TABLE_ID);
  String query =
      String.format("SELECT HLL_COUNT.EXTRACT(%s) FROM %s", SKETCH_FIELD_NAME, tableSpec);
  TableSchema tableSchema =
      new TableSchema()
          .setFields(
              Collections.singletonList(
                  new TableFieldSchema().setName(SKETCH_FIELD_NAME).setType(SKETCH_FIELD_TYPE)));

  TestPipelineOptions options =
      TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of(testData).withType(TypeDescriptor.of(String.class)))
      .apply(HllCount.Init.forStrings().globally())
      .apply(
          BigQueryIO.<byte[]>write()
              .to(tableSpec)
              .withSchema(tableSchema)
              .withFormatFunction(
                  sketch ->
                      // Empty sketch is represented by empty byte array in Beam and by null in
                      // BigQuery
                      new TableRow().set(SKETCH_FIELD_NAME, sketch.length == 0 ? null : sketch))
              .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
  p.run().waitUntilFinish();

  // BigqueryMatcher will send a query to retrieve the estimated count and verifies its
  // correctness using checksum.
  assertThat(
      createQueryUsingStandardSql(APP_NAME, PROJECT_ID, query),
      queryResultHasChecksum(expectedChecksum));
}
 
Example #26
Source File: WriteResult.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link PCollection} containing the {@link TableRow}s that didn't made it to BQ.
 *
 * <p>Only use this method if you haven't enabled {@link
 * BigQueryIO.Write#withExtendedErrorInfo()}. Otherwise use {@link
 * WriteResult#getFailedInsertsWithErr()}
 */
public PCollection<TableRow> getFailedInserts() {
  checkArgument(
      failedInsertsTag != null,
      "Cannot use getFailedInserts as this WriteResult uses extended errors"
          + " information. Use getFailedInsertsWithErr instead");
  return failedInserts;
}
 
Example #27
Source File: ErrorConverters.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  FailsafeElement<KV<String, String>, String> failsafeElement = context.element();
  KV<String, String> message = failsafeElement.getOriginalPayload();

  // Format the timestamp for insertion
  String timestamp =
      TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC));

  String payloadString =
      "key: "
          + (message.getKey() == null ? "" : message.getKey())
          + "value: "
          + (message.getValue() == null ? "" : message.getValue());

  byte[] payloadBytes =
      (message.getValue() == null
          ? "".getBytes(StandardCharsets.UTF_8)
          : message.getValue().getBytes(StandardCharsets.UTF_8));

  // Build the table row
  TableRow failedRow =
      new TableRow()
          .set("timestamp", timestamp)
          .set("errorMessage", failsafeElement.getErrorMessage())
          .set("stacktrace", failsafeElement.getStacktrace())
          .set("payloadString", payloadString)
          .set("payloadBytes", payloadBytes);

  context.output(failedRow);
}
 
Example #28
Source File: TrafficRoutes.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  RouteInfo routeInfo = c.element().getValue();
  TableRow row =
      new TableRow()
          .set("avg_speed", routeInfo.getAvgSpeed())
          .set("slowdown_event", routeInfo.getSlowdownEvent())
          .set("route", c.element().getKey())
          .set("window_timestamp", c.timestamp().toString());
  c.output(row);
}
 
Example #29
Source File: StreamingWriteTables.java    From beam with Apache License 2.0 5 votes vote down vote up
StreamingWriteTables<ElementT> withToTableRow(
    SerializableFunction<ElementT, TableRow> toTableRow) {
  return new StreamingWriteTables<>(
      bigQueryServices,
      retryPolicy,
      extendedErrorInfo,
      skipInvalidRows,
      ignoreUnknownValues,
      ignoreInsertIds,
      elementCoder,
      toTableRow);
}
 
Example #30
Source File: JoinExamples.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);
  // the following two 'applys' create multiple inputs to our pipeline, one for each
  // of our two input sources.
  PCollection<TableRow> eventsTable =
      p.apply(BigQueryIO.readTableRows().from(GDELT_EVENTS_TABLE));
  PCollection<TableRow> countryCodes = p.apply(BigQueryIO.readTableRows().from(COUNTRY_CODES));
  PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
  formattedResults.apply(TextIO.write().to(options.getOutput()));
  p.run().waitUntilFinish();
}