Java Code Examples for org.apache.beam.sdk.extensions.sql.SqlTransform

The following examples show how to use org.apache.beam.sdk.extensions.sql.SqlTransform. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: ExternalSqlTransformRegistrar.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PTransform<PInput, PCollection<Row>> buildExternal(Configuration configuration) {
  SqlTransform transform = SqlTransform.query(configuration.query);
  if (configuration.dialect != null) {
    Class<? extends QueryPlanner> queryPlanner =
        DIALECTS.get(configuration.dialect.toLowerCase());
    if (queryPlanner == null) {
      throw new IllegalArgumentException(
          String.format(
              "Received unknown SQL Dialect '%s'. Known dialects: %s",
              configuration.dialect, DIALECTS.keySet()));
    }
    transform = transform.withQueryPlannerClass(queryPlanner);
  }
  return transform;
}
 
Example 2
Source Project: beam   Source File: BeamSqlDataCatalogExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  LOG.info("Args: {}", Arrays.asList(args));
  DCExamplePipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).as(DCExamplePipelineOptions.class);
  LOG.info("Query: {}\nOutput: {}", options.getQueryString(), options.getOutputFilePrefix());

  Pipeline pipeline = Pipeline.create(options);

  validateArgs(options);

  try (DataCatalogTableProvider tableProvider =
      DataCatalogTableProvider.create(options.as(DataCatalogPipelineOptions.class))) {
    pipeline
        .apply(
            "SQL Query",
            SqlTransform.query(options.getQueryString())
                .withDefaultTableProvider("datacatalog", tableProvider))
        .apply("Convert to Strings", rowsToStrings())
        .apply("Write output", TextIO.write().to(options.getOutputFilePrefix()));

    pipeline.run().waitUntilFinish();
  }
}
 
Example 3
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testJoinPCollectionWithHCatalog() throws Exception {
  initializeHCatalog();

  PCollection<Row> inputMain =
      pipeline.apply("pcollection", create(row(1, "pcollection_1"), row(2, "pcollection_2")));

  PCollection<Row> result =
      inputMain.apply(
          SqlTransform.query(
                  "SELECT hive.f_int, (hive.f_str || ' ' || pcollection.f_string) AS f_string \n"
                      + "FROM `hive`.`default`.`mytable` AS hive \n"
                      + "   INNER JOIN \n"
                      + " PCOLLECTION AS pcollection \n"
                      + "   ON pcollection.f_int = hive.f_int")
              .withTableProvider("hive", hiveTableProvider()));

  PAssert.that(result)
      .containsInAnyOrder(row(1, "record 1 pcollection_1"), row(2, "record 2 pcollection_2"));
  pipeline.run();
}
 
Example 4
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleId() {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable", row(1, "one"), row(2, "two"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testtable")
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 5
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleIdWithExplicitDefaultSchema() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable", row(1, "one"), row(2, "two"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider.testtable")
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 6
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleIdWithExplicitDefaultSchemaWithMultipleProviders() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable", row(1, "one"), row(2, "two"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder().name("testtable2").schema(BASIC_SCHEMA).type("test").build());
  tableProvider2.addRows("testtable2", row(3, "three"), row(4, "four"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider2.testtable2")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(3, "three"), row(4, "four"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 7
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleIdWithExplicitNonDefaultSchema() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable", row(1, "one"), row(2, "two"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder().name("testtable2").schema(BASIC_SCHEMA).type("test").build());
  tableProvider2.addRows("testtable2", row(3, "three"), row(4, "four"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider2.testtable2")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(3, "three"), row(4, "four"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 8
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCompoundIdInDefaultSchema() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah", row(1, "one"), row(2, "two"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testtable.blah")
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 9
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCompoundIdInExplicitDefaultSchema() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah", row(1, "one"), row(2, "two"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider.testtable.blah")
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 10
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLongCompoundIdInDefaultSchema() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(1, "one"), row(2, "two"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testtable.blah.foo.bar")
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 11
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLongCompoundIdInDefaultSchemaWithMultipleProviders() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(1, "one"), row(2, "two"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider2.addRows("testtable_blah_foo_bar", row(3, "three"), row(4, "four"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testtable.blah.foo.bar")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 12
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLongCompoundIdInExplicitDefaultSchema() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(1, "one"), row(2, "two"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider.testtable.blah.foo.bar")
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(1, "one"), row(2, "two"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 13
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLongCompoundIdInNonDefaultSchemaSameTableNames() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(1, "one"), row(2, "two"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider2.addRows("testtable_blah_foo_bar", row(3, "three"), row(4, "four"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider2.testtable.blah.foo.bar")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(3, "three"), row(4, "four"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 14
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLongCompoundIdInNonDefaultSchemaDifferentNames() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(1, "one"), row(2, "two"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder()
          .name("testtable2_blah2_foo2_bar2")
          .schema(BASIC_SCHEMA)
          .type("test")
          .build());
  tableProvider2.addRows("testtable2_blah2_foo2_bar2", row(3, "three"), row(4, "four"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query("SELECT id, name FROM testprovider2.testtable2.blah2.foo2.bar2")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(3, "three"), row(4, "four"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 15
@Override
public PDone expand(PBegin begin) {
  PCollection<Boolean> result =
      begin
          .apply(Create.of(DUMMY_ROW).withRowSchema(DUMMY_SCHEMA))
          .apply(SqlTransform.query("SELECT " + expr))
          .apply(MapElements.into(TypeDescriptors.booleans()).via(row -> row.getBoolean(0)));

  PAssert.that(result)
      .satisfies(
          input -> {
            assertTrue("Test expression is false: " + expr, Iterables.getOnlyElement(input));
            return null;
          });
  return PDone.in(begin.getPipeline());
}
 
Example 16
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIsInf() throws Exception {
  Schema resultType =
      Schema.builder()
          .addBooleanField("field_1")
          .addBooleanField("field_2")
          .addBooleanField("field_3")
          .addBooleanField("field_4")
          .build();
  Row resultRow = Row.withSchema(resultType).addValues(true, true, true, true).build();

  String sql =
      "SELECT IS_INF(f_float_1), IS_INF(f_double_1), IS_INF(f_float_2), IS_INF(f_double_2) FROM PCOLLECTION";
  PCollection<Row> result = boundedInputFloatDouble.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow);
  pipeline.run().waitUntilFinish();
}
 
Example 17
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIsNan() throws Exception {
  Schema resultType =
      Schema.builder()
          .addBooleanField("field_1")
          .addBooleanField("field_2")
          .addBooleanField("field_3")
          .addBooleanField("field_4")
          .build();
  Row resultRow = Row.withSchema(resultType).addValues(false, false, true, true).build();

  String sql =
      "SELECT IS_NAN(f_float_2), IS_NAN(f_double_2), IS_NAN(f_float_3), IS_NAN(f_double_3) FROM PCOLLECTION";
  PCollection<Row> result = boundedInputFloatDouble.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow);
  pipeline.run().waitUntilFinish();
}
 
Example 18
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMd5() throws Exception {
  Schema resultType = Schema.builder().addByteArrayField("field").build();
  Row resultRow1 =
      Row.withSchema(resultType).addValues(DigestUtils.md5("foobar".getBytes(UTF_8))).build();
  Row resultRow2 =
      Row.withSchema(resultType).addValues(DigestUtils.md5(" ".getBytes(UTF_8))).build();
  Row resultRow3 =
      Row.withSchema(resultType)
          .addValues(DigestUtils.md5("abcABCжщфЖЩФ".getBytes(UTF_8)))
          .build();
  String sql = "SELECT MD5(f_bytes) FROM PCOLLECTION WHERE f_func = 'HashingFn'";
  PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow1, resultRow2, resultRow3);
  pipeline.run().waitUntilFinish();
}
 
Example 19
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSHA1() throws Exception {
  Schema resultType = Schema.builder().addByteArrayField("field").build();
  Row resultRow1 =
      Row.withSchema(resultType).addValues(DigestUtils.sha1("foobar".getBytes(UTF_8))).build();
  Row resultRow2 =
      Row.withSchema(resultType).addValues(DigestUtils.sha1(" ".getBytes(UTF_8))).build();
  Row resultRow3 =
      Row.withSchema(resultType)
          .addValues(DigestUtils.sha1("abcABCжщфЖЩФ".getBytes(UTF_8)))
          .build();
  String sql = "SELECT SHA1(f_bytes) FROM PCOLLECTION WHERE f_func = 'HashingFn'";
  PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow1, resultRow2, resultRow3);
  pipeline.run().waitUntilFinish();
}
 
Example 20
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSHA256() throws Exception {
  Schema resultType = Schema.builder().addByteArrayField("field").build();
  Row resultRow1 =
      Row.withSchema(resultType).addValues(DigestUtils.sha256("foobar".getBytes(UTF_8))).build();
  Row resultRow2 =
      Row.withSchema(resultType).addValues(DigestUtils.sha256(" ".getBytes(UTF_8))).build();
  Row resultRow3 =
      Row.withSchema(resultType)
          .addValues(DigestUtils.sha256("abcABCжщфЖЩФ".getBytes(UTF_8)))
          .build();
  String sql = "SELECT SHA256(f_bytes) FROM PCOLLECTION WHERE f_func = 'HashingFn'";
  PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow1, resultRow2, resultRow3);
  pipeline.run().waitUntilFinish();
}
 
Example 21
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSHA512() throws Exception {
  Schema resultType = Schema.builder().addByteArrayField("field").build();
  Row resultRow1 =
      Row.withSchema(resultType).addValues(DigestUtils.sha512("foobar".getBytes(UTF_8))).build();
  Row resultRow2 =
      Row.withSchema(resultType).addValues(DigestUtils.sha512(" ".getBytes(UTF_8))).build();
  Row resultRow3 =
      Row.withSchema(resultType)
          .addValues(DigestUtils.sha512("abcABCжщфЖЩФ".getBytes(UTF_8)))
          .build();
  String sql = "SELECT SHA512(f_bytes) FROM PCOLLECTION WHERE f_func = 'HashingFn'";
  PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow1, resultRow2, resultRow3);
  pipeline.run().waitUntilFinish();
}
 
Example 22
Source Project: beam   Source File: SqlQuery3.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) {
  PCollection<Event> windowed =
      allEvents.apply(
          Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec))));

  String auctionName = Auction.class.getSimpleName();
  PCollection<Row> auctions =
      windowed
          .apply(getName() + ".Filter." + auctionName, Filter.by(e1 -> e1.newAuction != null))
          .apply(getName() + ".ToRecords." + auctionName, new SelectEvent(Type.AUCTION));

  String personName = Person.class.getSimpleName();
  PCollection<Row> people =
      windowed
          .apply(getName() + ".Filter." + personName, Filter.by(e -> e.newPerson != null))
          .apply(getName() + ".ToRecords." + personName, new SelectEvent(Type.PERSON));

  PCollectionTuple inputStreams =
      PCollectionTuple.of(new TupleTag<>("Auction"), auctions)
          .and(new TupleTag<>("Person"), people);

  return inputStreams
      .apply(SqlTransform.query(QUERY).withQueryPlannerClass(plannerClass))
      .apply(Convert.fromRows(NameCityStateId.class));
}
 
Example 23
Source Project: beam   Source File: DataCatalogBigQueryIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRead() throws Exception {
  TableReference bqTable = bigQuery.tableReference();

  // Streaming inserts do not work with DIRECT_READ mode, there is a several hour lag.
  PCollection<Row> data =
      writePipeline.apply(Create.of(row(1, "name1"), row(2, "name2"), row(3, "name3")));
  data.apply(
      BigQueryIO.<Row>write()
          .withSchema(BigQueryUtils.toTableSchema(ID_NAME_SCHEMA))
          .withFormatFunction(BigQueryUtils.toTableRow())
          .withMethod(Method.FILE_LOADS)
          .to(bqTable));
  writePipeline.run().waitUntilFinish(Duration.standardMinutes(2));

  String tableId =
      String.format(
          "bigquery.`table`.`%s`.`%s`.`%s`",
          bqTable.getProjectId(), bqTable.getDatasetId(), bqTable.getTableId());

  readPipeline
      .getOptions()
      .as(BeamSqlPipelineOptions.class)
      .setPlannerName(queryPlanner.getCanonicalName());

  try (DataCatalogTableProvider tableProvider =
      DataCatalogTableProvider.create(
          readPipeline.getOptions().as(DataCatalogPipelineOptions.class))) {
    PCollection<Row> result =
        readPipeline.apply(
            "query",
            SqlTransform.query("SELECT id, name FROM " + tableId)
                .withDefaultTableProvider("datacatalog", tableProvider));

    PAssert.that(result).containsInAnyOrder(row(1, "name1"), row(2, "name2"), row(3, "name3"));
    readPipeline.run().waitUntilFinish(Duration.standardMinutes(2));
  }
}
 
Example 24
Source Project: beam   Source File: DataCatalogGCSIT.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReadFromGCS() throws Exception {
  String gcsEntryId =
      "`datacatalog`" // this is part of the resource name in DataCatalog, so it has to be
          + ".`entry`" // different from the table provider name ("dc" in this test)
          + ".`apache-beam-testing`"
          + ".`us-central1`"
          + ".`samples`"
          + ".`integ_test_small_csv_test_1`";

  try (DataCatalogTableProvider tableProvider =
      DataCatalogTableProvider.create(
          pipeline.getOptions().as(DataCatalogPipelineOptions.class))) {
    PCollection<Row> result =
        pipeline.apply(
            "query",
            SqlTransform.query("SELECT id, name, type FROM " + gcsEntryId)
                .withDefaultTableProvider("dc", tableProvider));

    pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(true);
    PAssert.that(result)
        .containsInAnyOrder(
            row(1, "customer1", "test"),
            row(2, "customer2", "test"),
            row(3, "customer1", "test"),
            row(4, "customer2", "test"));
    pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
  }
}
 
Example 25
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectFromHCatalog() throws Exception {
  initializeHCatalog();

  PCollection<KV<String, Integer>> output =
      readAfterWritePipeline
          .apply(
              SqlTransform.query(
                      String.format(
                          "SELECT f_str, f_int FROM `hive`.`%s`.`%s`", TEST_DATABASE, TEST_TABLE))
                  .withTableProvider("hive", hiveTableProvider()))
          .apply(ParDo.of(new ToKV()));
  PAssert.that(output).containsInAnyOrder(getExpectedRecordsAsKV(TEST_RECORDS_COUNT));
  readAfterWritePipeline.run();
}
 
Example 26
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectFromImplicitDefaultDb() throws Exception {
  initializeHCatalog();

  PCollection<KV<String, Integer>> output =
      readAfterWritePipeline
          .apply(
              SqlTransform.query(
                      String.format("SELECT f_str, f_int FROM `hive`.`%s`", TEST_TABLE))
                  .withTableProvider("hive", hiveTableProvider()))
          .apply(ParDo.of(new ToKV()));
  PAssert.that(output).containsInAnyOrder(getExpectedRecordsAsKV(TEST_RECORDS_COUNT));
  readAfterWritePipeline.run();
}
 
Example 27
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectFromImplicitDefaultSchema() throws Exception {
  initializeHCatalog();

  PCollection<KV<String, Integer>> output =
      readAfterWritePipeline
          .apply(
              SqlTransform.query(
                      String.format(
                          "SELECT f_str, f_int FROM `%s`.`%s`", TEST_DATABASE, TEST_TABLE))
                  .withDefaultTableProvider("hive", hiveTableProvider()))
          .apply(ParDo.of(new ToKV()));
  PAssert.that(output).containsInAnyOrder(getExpectedRecordsAsKV(TEST_RECORDS_COUNT));
  readAfterWritePipeline.run();
}
 
Example 28
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectFromImplicitDefaultSchemaAndDB() throws Exception {
  initializeHCatalog();

  PCollection<KV<String, Integer>> output =
      readAfterWritePipeline
          .apply(
              SqlTransform.query(String.format("SELECT f_str, f_int FROM `%s`", TEST_TABLE))
                  .withDefaultTableProvider("hive", hiveTableProvider()))
          .apply(ParDo.of(new ToKV()));
  PAssert.that(output).containsInAnyOrder(getExpectedRecordsAsKV(TEST_RECORDS_COUNT));
  readAfterWritePipeline.run();
}
 
Example 29
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinMultipleExtraProvidersWithMain() throws Exception {
  initializeHCatalog();

  PCollection<Row> inputMain =
      pipeline.apply("mainInput", create(row(1, "pcollection_1"), row(2, "pcollection_2")));

  PCollection<Row> inputExtra =
      pipeline.apply("extraInput", create(row(1, "_extra_table_1"), row(2, "_extra_table_2")));

  PCollection<Row> result =
      inputMain.apply(
          SqlTransform.query(
                  "SELECT \n"
                      + "   x_tbl.f_int as f_int, \n"
                      + "   (p_tbl.f_string || x_tbl.f_string || ' ' || h_tbl.f_str) AS f_string \n"
                      + "FROM \n"
                      + "     `extraSchema`.`extraTable` AS x_tbl \n"
                      + "  INNER JOIN \n"
                      + "     `hive`.`default`.`mytable` AS h_tbl \n"
                      + "        ON h_tbl.f_int = x_tbl.f_int \n"
                      + "  INNER JOIN \n"
                      + "     PCOLLECTION AS p_tbl \n"
                      + "        ON p_tbl.f_int = x_tbl.f_int")
              .withTableProvider("extraSchema", extraTableProvider("extraTable", inputExtra))
              .withTableProvider("hive", hiveTableProvider()));

  PAssert.that(result)
      .containsInAnyOrder(
          row(1, "pcollection_1_extra_table_1 record 1"),
          row(2, "pcollection_2_extra_table_2 record 2"));
  pipeline.run();
}
 
Example 30
Source Project: beam   Source File: BeamSqlHiveSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinMultipleExtraProvidersWithImplicitHiveDB() throws Exception {
  initializeHCatalog();

  PCollection<Row> inputMain =
      pipeline.apply("mainInput", create(row(1, "pcollection_1"), row(2, "pcollection_2")));

  PCollection<Row> inputExtra =
      pipeline.apply("extraInput", create(row(1, "_extra_table_1"), row(2, "_extra_table_2")));

  PCollection<Row> result =
      inputMain.apply(
          SqlTransform.query(
                  "SELECT \n"
                      + "   x_tbl.f_int as f_int, \n"
                      + "   (p_tbl.f_string || x_tbl.f_string || ' ' || h_tbl.f_str) AS f_string \n"
                      + "FROM \n"
                      + "     `extraSchema`.`extraTable` AS x_tbl \n"
                      + "  INNER JOIN \n"
                      + "     `hive`.`mytable` AS h_tbl \n"
                      + "        ON h_tbl.f_int = x_tbl.f_int \n"
                      + "  INNER JOIN \n"
                      + "     PCOLLECTION AS p_tbl \n"
                      + "        ON p_tbl.f_int = x_tbl.f_int")
              .withTableProvider("extraSchema", extraTableProvider("extraTable", inputExtra))
              .withTableProvider("hive", hiveTableProvider()));

  PAssert.that(result)
      .containsInAnyOrder(
          row(1, "pcollection_1_extra_table_1 record 1"),
          row(2, "pcollection_2_extra_table_2 record 2"));
  pipeline.run();
}