Java Code Examples for org.apache.beam.sdk.values.Row

The following examples show how to use org.apache.beam.sdk.values.Row. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: RowJsonTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testThrowsForMismatchedArrayField() throws Exception {

  Schema schema =
      Schema.builder()
          .addArrayField("f_arrayOfIntArrays", FieldType.array(FieldType.INT32))
          .build();

  String rowString =
      "{\n"
          + "\"f_arrayOfIntArrays\" : { }\n" // expect array, get object
          + "}";

  thrown.expect(UnsupportedRowJsonException.class);
  thrown.expectMessage("Expected JSON array");

  newObjectMapperWith(
          RowJsonSerializer.forSchema(schema), RowJsonDeserializer.forSchema(schema))
      .readValue(rowString, Row.class);
}
 
Example 2
Source Project: beam   Source File: BeamSqlDslExistsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testExistsSubquery() {
  String sql =
      "select * from CUSTOMER "
          + " where exists ( "
          + " select * from ORDERS "
          + " where o_custkey = c_custkey )";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT32, "c_custkey",
                  Schema.FieldType.DOUBLE, "c_acctbal",
                  Schema.FieldType.STRING, "c_city")
              .addRows(1, 1.0, "Seattle")
              .getRows());

  pipeline.run().waitUntilFinish();
}
 
Example 3
Source Project: beam   Source File: Group.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Row> expand(PCollection<InputT> input) {
  Schema schema = input.getSchema();
  Schema keySchema = getKeySchema(schema);
  Schema outputSchema =
      Schema.builder()
          .addRowField(getKeyField(), keySchema)
          .addIterableField(getValueField(), FieldType.row(schema))
          .build();

  return input
      .apply("ToKvs", getToKvs())
      .apply(
          "ToRow",
          ParDo.of(
              new DoFn<KV<Row, Iterable<Row>>, Row>() {
                @ProcessElement
                public void process(@Element KV<Row, Iterable<Row>> e, OutputReceiver<Row> o) {
                  o.output(
                      Row.withSchema(outputSchema)
                          .attachValues(Lists.newArrayList(e.getKey(), e.getValue())));
                }
              }))
      .setRowSchema(outputSchema);
}
 
Example 4
Source Project: beam   Source File: JavaFieldSchemaTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIterableFieldFromRow() throws NoSuchSchemaException {
  SchemaRegistry registry = SchemaRegistry.createDefault();
  Schema schema = registry.getSchema(PojoWithIterable.class);
  SchemaTestUtils.assertSchemaEquivalent(POJO_WITH_ITERABLE, schema);

  List<String> list = Lists.newArrayList("one", "two");
  Row iterableRow = Row.withSchema(POJO_WITH_ITERABLE).attachValues((Object) list);
  PojoWithIterable converted =
      registry.getFromRowFunction(PojoWithIterable.class).apply(iterableRow);
  assertEquals(list, Lists.newArrayList(converted.strings));

  // Make sure that the captured Iterable is backed by the previous one.
  list.add("three");
  assertEquals(list, Lists.newArrayList(converted.strings));
}
 
Example 5
@Test
void testSchemasEmittedOnlyOnChanges() {
  TestStream<Row> testSream = TestStream
      .create(SerializableCoder.of(Row.class))
      .addElements(testInsertRecord(
          Row.withSchema(RECORD_SCHEMA1)
              .addValues("k1", 1, DateTime.now(), "bytes".getBytes()).build()),
          testInsertRecord(
              Row.withSchema(RECORD_SCHEMA1)
                  .addValues("k1", 2, DateTime.now(), "bytes".getBytes()).build()))
      .advanceWatermarkTo(Instant.now())
      .advanceWatermarkToInfinity();

  Pipeline p = Pipeline.create();

  PCollection<Row> input = p.apply(testSream).setRowSchema(UPDATE_RECORD_SCHEMA);

  PCollection<KV<String, KV<Schema, Schema>>> tableSchemaCollection =
      BigQueryChangeApplier.buildTableSchemaCollection(input);

  PAssert.that(tableSchemaCollection).containsInAnyOrder(
      KV.of(TABLE_NAME, KV.of(KEY_SCHEMA, RECORD_SCHEMA1)));
  p.run().waitUntilFinish();
}
 
Example 6
Source Project: beam   Source File: SqlQuery0.java    License: Apache License 2.0 6 votes vote down vote up
private PTransform<? super PCollection<Row>, PCollection<Row>> logBytesMetric(
    final Coder<Row> coder) {

  return ParDo.of(
      new DoFn<Row, Row>() {
        private final Counter bytesMetric = Metrics.counter(name, "bytes");

        @ProcessElement
        public void processElement(@Element Row element, OutputReceiver<Row> o)
            throws IOException {
          ByteArrayOutputStream outStream = new ByteArrayOutputStream();
          coder.encode(element, outStream, Coder.Context.OUTER);
          byte[] byteArray = outStream.toByteArray();
          bytesMetric.inc((long) byteArray.length);
          ByteArrayInputStream inStream = new ByteArrayInputStream(byteArray);
          Row row = coder.decode(inStream, Coder.Context.OUTER);
          o.output(row);
        }
      });
}
 
Example 7
Source Project: beam   Source File: JdbcIOTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRowsWithDataSourceConfiguration() {
  PCollection<Row> rows =
      pipeline.apply(
          JdbcIO.readRows()
              .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource))
              .withQuery(String.format("select name,id from %s where name = ?", readTableName))
              .withStatementPreparator(
                  preparedStatement ->
                      preparedStatement.setString(1, TestRow.getNameForSeed(1))));

  Schema expectedSchema =
      Schema.of(
          Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500))
              .withNullable(true),
          Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));

  assertEquals(expectedSchema, rows.getSchema());

  PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
  PAssert.that(output)
      .containsInAnyOrder(
          ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));

  pipeline.run();
}
 
Example 8
Source Project: beam   Source File: RowJsonTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRequireMissingAcceptsMissingField() throws Exception {
  Schema schema =
      Schema.builder()
          .addByteField("f_byte")
          .addNullableField("f_string", FieldType.STRING)
          .build();

  String rowString = "{\"f_byte\": 12}";

  assertThat(
      newObjectMapperWith(
              RowJsonDeserializer.forSchema(schema)
                  .withNullBehavior(NullBehavior.REQUIRE_MISSING))
          .readValue(rowString, Row.class),
      equalTo(
          Row.withSchema(schema)
              .withFieldValue("f_byte", (byte) 12)
              .withFieldValue("f_string", null)
              .build()));
}
 
Example 9
Source Project: beam   Source File: GroupTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testOutputCoders() {
  Schema keySchema = Schema.builder().addStringField("field1").build();
  Schema outputSchema =
      Schema.builder()
          .addRowField("key", keySchema)
          .addIterableField("value", FieldType.row(BASIC_SCHEMA))
          .build();

  PCollection<Row> grouped =
      pipeline
          .apply(Create.of(Basic.of("key1", 1, "value1")))
          .apply(Group.byFieldNames("field1"));

  assertTrue(grouped.getSchema().equivalent(outputSchema));

  pipeline.run();
}
 
Example 10
@Test
public void testIOSourceRel_withSupportedAndUnsupportedPredicate() {
  String selectTableStatement = "SELECT name FROM TEST where id+unused1=101 and id=1";

  BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
  PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);

  assertThat(beamRelNode, instanceOf(BeamCalcRel.class));
  assertThat(beamRelNode.getInput(0), instanceOf(BeamIOSourceRel.class));
  assertEquals(
      "BeamPushDownIOSourceRel.BEAM_LOGICAL(table=[beam, TEST],usedFields=[name, id, unused1],TestTableFilter=[supported{=($1, 1)}, unsupported{=(+($1, $0), 101)}])",
      beamRelNode.getInput(0).getDigest());
  // Make sure project push-down was done
  List<String> a = beamRelNode.getInput(0).getRowType().getFieldNames();
  assertThat(a, containsInAnyOrder("name", "id", "unused1"));

  assertEquals(Schema.builder().addStringField("name").build(), result.getSchema());
  PAssert.that(result).containsInAnyOrder(row(result.getSchema(), "one"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 11
Source Project: beam   Source File: SchemaUtilTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBeamRowMapperArray() throws Exception {
  ResultSet mockArrayElementsResultSet = mock(ResultSet.class);
  when(mockArrayElementsResultSet.next()).thenReturn(true, true, true, false);
  when(mockArrayElementsResultSet.getInt(eq(1))).thenReturn(10, 20, 30);

  Array mockArray = mock(Array.class);
  when(mockArray.getResultSet()).thenReturn(mockArrayElementsResultSet);

  ResultSet mockResultSet = mock(ResultSet.class);
  when(mockResultSet.getArray(eq(1))).thenReturn(mockArray);

  Schema wantSchema =
      Schema.builder().addField("array", Schema.FieldType.array(Schema.FieldType.INT32)).build();
  Row wantRow =
      Row.withSchema(wantSchema).addValues((Object) ImmutableList.of(10, 20, 30)).build();

  SchemaUtil.BeamRowMapper beamRowMapper = SchemaUtil.BeamRowMapper.of(wantSchema);
  Row haveRow = beamRowMapper.mapRow(mockResultSet);

  assertEquals(wantRow, haveRow);
}
 
Example 12
Source Project: beam   Source File: BeamSqlDslUdfUdafTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test that correct exception is thrown when subclass of {@link CombineFn} is not parameterized.
 * BEAM-3777
 */
@Test
public void testRawCombineFnSubclass() {
  exceptions.expect(ParseException.class);
  exceptions.expectCause(hasMessage(containsString("CombineFn must be parameterized")));
  pipeline.enableAbandonedNodeEnforcement(false);

  Schema resultType = Schema.builder().addInt32Field("f_int2").addInt32Field("squaresum").build();

  Row row = Row.withSchema(resultType).addValues(0, 354).build();

  String sql1 =
      "SELECT f_int2, squaresum(f_int) AS `squaresum`" + " FROM PCOLLECTION GROUP BY f_int2";
  PCollection<Row> result1 =
      boundedInput1.apply(
          "testUdaf", SqlTransform.query(sql1).registerUdaf("squaresum", new RawCombineFn()));
}
 
Example 13
Source Project: beam   Source File: DataStoreTableTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRowToEntityConverterWithoutKey() {
  Schema schemaWithoutKey =
      Schema.builder()
          .addFields(
              SCHEMA.getFields().stream()
                  .filter(f -> !f.getName().equals("__key__"))
                  .collect(Collectors.toList()))
          .build();
  Row rowWithoutKey =
      Row.withSchema(schemaWithoutKey)
          .addValues(
              schemaWithoutKey.getFieldNames().stream()
                  .map(ROW::getValue)
                  .collect(Collectors.toList()))
          .build();
  PCollection<Entity> result =
      pipeline
          .apply(Create.of(rowWithoutKey))
          .setRowSchema(schemaWithoutKey)
          .apply(RowToEntity.createTest(UUID_VALUE, "__key__", KIND));

  PAssert.that(result).containsInAnyOrder(ENTITY);

  pipeline.run().waitUntilFinish();
}
 
Example 14
Source Project: beam   Source File: Cast.java    License: Apache License 2.0 6 votes vote down vote up
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) {
  if (input == null) {
    return null;
  }

  Row.Builder output = Row.withSchema(outputSchema);
  for (int i = 0; i < outputSchema.getFieldCount(); i++) {
    Schema.Field outputField = outputSchema.getField(i);

    int fromFieldIdx = inputSchema.indexOf(outputField.getName());
    Schema.Field inputField = inputSchema.getField(fromFieldIdx);

    Object inputValue = input.getValue(fromFieldIdx);
    Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType());

    output.addValue(outputValue);
  }

  return output.build();
}
 
Example 15
Source Project: beam   Source File: StreamingSQLTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTVFTumbleAggregation() {
  String sql =
      "SELECT COUNT(*) as field_count, "
          + "window_start "
          + "FROM TUMBLE((select * from KeyValue), descriptor(ts), 'INTERVAL 1 SECOND') "
          + "GROUP BY window_start";
  ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
  BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql);

  PCollection<Row> stream = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);

  final Schema schema =
      Schema.builder().addInt64Field("field_count").addDateTimeField("window_start").build();
  PAssert.that(stream)
      .containsInAnyOrder(
          Row.withSchema(schema)
              .addValues(1L, new DateTime(2018, 7, 1, 21, 26, 7, ISOChronology.getInstanceUTC()))
              .build(),
          Row.withSchema(schema)
              .addValues(1L, new DateTime(2018, 7, 1, 21, 26, 6, ISOChronology.getInstanceUTC()))
              .build());
  pipeline.run().waitUntilFinish(Duration.standardMinutes(PIPELINE_EXECUTION_WAITTIME_MINUTES));
}
 
Example 16
Source Project: beam   Source File: BeamCoGBKJoinRelBoundedVsBoundedTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInnerJoin() throws Exception {
  String sql =
      "SELECT *  "
          + "FROM ORDER_DETAILS1 o1"
          + " JOIN ORDER_DETAILS2 o2"
          + " on "
          + " o1.order_id=o2.site_id AND o2.price=o1.site_id";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.builder()
                      .addField("order_id", Schema.FieldType.INT32)
                      .addField("site_id", Schema.FieldType.INT32)
                      .addField("price", Schema.FieldType.INT32)
                      .addField("order_id0", Schema.FieldType.INT32)
                      .addField("site_id0", Schema.FieldType.INT32)
                      .addField("price0", Schema.FieldType.INT32)
                      .build())
              .addRows(2, 3, 3, 1, 2, 3)
              .getRows());
  pipeline.run();
}
 
Example 17
Source Project: beam   Source File: BeamSalUhfSpecialTypeAndValueTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSHA512() throws Exception {
  Schema resultType = Schema.builder().addByteArrayField("field").build();
  Row resultRow1 =
      Row.withSchema(resultType).addValues(DigestUtils.sha512("foobar".getBytes(UTF_8))).build();
  Row resultRow2 =
      Row.withSchema(resultType).addValues(DigestUtils.sha512(" ".getBytes(UTF_8))).build();
  Row resultRow3 =
      Row.withSchema(resultType)
          .addValues(DigestUtils.sha512("abcABCжщфЖЩФ".getBytes(UTF_8)))
          .build();
  String sql = "SELECT SHA512(f_bytes) FROM PCOLLECTION WHERE f_func = 'HashingFn'";
  PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow1, resultRow2, resultRow3);
  pipeline.run().waitUntilFinish();
}
 
Example 18
Source Project: beam   Source File: TestTableProviderWithFilterPushDown.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIOSourceRel_selectAll_withSupportedFilter_shouldDropCalc() {
  String selectTableStatement = "SELECT * FROM TEST where name='two'";

  BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
  PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);

  // Calc is dropped, because all fields are projected in the same order and filter is
  // pushed-down.
  assertThat(beamRelNode, instanceOf(BeamIOSourceRel.class));

  List<String> projects = beamRelNode.getRowType().getFieldNames();
  assertThat(projects, containsInAnyOrder("unused1", "id", "name", "unused2", "b"));

  assertEquals(BASIC_SCHEMA, result.getSchema());
  PAssert.that(result)
      .containsInAnyOrder(row(result.getSchema(), 200, 2, "two", (short) 200, false));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 19
Source Project: beam   Source File: BeamCoGBKJoinRelBoundedVsBoundedTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRightOuterJoin() throws Exception {
  String sql =
      "SELECT *  "
          + "FROM ORDER_DETAILS1 o1"
          + " RIGHT OUTER JOIN ORDER_DETAILS2 o2"
          + " on "
          + " o1.order_id=o2.site_id AND o2.price=o1.site_id";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.builder()
                      .addNullableField("order_id", Schema.FieldType.INT32)
                      .addNullableField("site_id", Schema.FieldType.INT32)
                      .addNullableField("price", Schema.FieldType.INT32)
                      .addField("order_id0", Schema.FieldType.INT32)
                      .addField("site_id0", Schema.FieldType.INT32)
                      .addField("price0", Schema.FieldType.INT32)
                      .build())
              .addRows(2, 3, 3, 1, 2, 3, null, null, null, 2, 3, 3, null, null, null, 3, 4, 5)
              .getRows());
  pipeline.run();
}
 
Example 20
Source Project: beam   Source File: BeamIntersectRelTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIntersect() {
  String sql = "";
  sql +=
      "SELECT order_id, site_id, price "
          + "FROM ORDER_DETAILS1 "
          + " INTERSECT "
          + "SELECT order_id, site_id, price "
          + "FROM ORDER_DETAILS2 ";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT64, "order_id",
                  Schema.FieldType.INT32, "site_id",
                  Schema.FieldType.DECIMAL, "price")
              .addRows(1L, 1, new BigDecimal(1.0), 2L, 2, new BigDecimal(2.0))
              .getRows());

  pipeline.run().waitUntilFinish();
}
 
Example 21
Source Project: beam   Source File: JavaBeanSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAnnotations() throws NoSuchSchemaException {
  SchemaRegistry registry = SchemaRegistry.createDefault();
  Schema schema = registry.getSchema(SimpleBeanWithAnnotations.class);
  SchemaTestUtils.assertSchemaEquivalent(SIMPLE_BEAN_SCHEMA, schema);

  SimpleBeanWithAnnotations pojo = createAnnotated("string");
  Row row = registry.getToRowFunction(SimpleBeanWithAnnotations.class).apply(pojo);
  assertEquals(12, row.getFieldCount());
  assertEquals("string", row.getString("str"));
  assertEquals((byte) 1, (Object) row.getByte("aByte"));
  assertEquals((short) 2, (Object) row.getInt16("aShort"));
  assertEquals((int) 3, (Object) row.getInt32("anInt"));
  assertEquals((long) 4, (Object) row.getInt64("aLong"));
  assertTrue(row.getBoolean("aBoolean"));
  assertEquals(DATE.toInstant(), row.getDateTime("dateTime"));
  assertEquals(DATE.toInstant(), row.getDateTime("instant"));
  assertArrayEquals(BYTE_ARRAY, row.getBytes("bytes"));
  assertArrayEquals(BYTE_ARRAY, row.getBytes("byteBuffer"));
  assertEquals(BigDecimal.ONE, row.getDecimal("bigDecimal"));
  assertEquals("stringbuilder", row.getString("stringBuilder"));

  SimpleBeanWithAnnotations pojo2 =
      registry
          .getFromRowFunction(SimpleBeanWithAnnotations.class)
          .apply(createSimpleRow("string"));
  assertEquals(pojo, pojo2);
}
 
Example 22
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithLongCompoundIds() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(3, "customer"), row(2, "nobody"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder().name("testtable_blah_foo_bar2").schema(BASIC_SCHEMA).type("test").build());
  tableProvider2.addRows("testtable_blah_foo_bar2", row(4, "customer"), row(1, "nobody"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query(
                  "SELECT testprovider2.testtable.blah.foo.bar2.id, testtable.blah.foo.bar.name \n"
                      + "FROM \n"
                      + "  testprovider2.testtable.blah.foo.bar2 \n"
                      + "JOIN \n"
                      + "  testtable.blah.foo.bar \n"
                      + "USING(name)")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result).containsInAnyOrder(row(4, "customer"), row(1, "nobody"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 23
Source Project: beam   Source File: AvroTable.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public PDone buildIOWriter(PCollection<Row> input) {
  PTransform<PCollection<Row>, PCollection<GenericRecord>> writeConverter =
      GenericRecordWriteConverter.builder().beamSchema(schema).build();

  return input
      .apply("GenericRecordToRow", writeConverter)
      .apply(
          "AvroIOWrite",
          AvroIO.writeGenericRecords(AvroUtils.toAvroSchema(schema, tableName, null))
              .to(filePattern)
              .withoutSharding());
}
 
Example 24
Source Project: beam   Source File: CustomTableResolverTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testUnionWithLongCompoundIds() throws Exception {
  CustomResolutionTestTableProvider tableProvider = new CustomResolutionTestTableProvider();
  tableProvider.createTable(
      Table.builder().name("testtable_blah_foo_bar").schema(BASIC_SCHEMA).type("test").build());
  tableProvider.addRows("testtable_blah_foo_bar", row(3, "customer"), row(2, "nobody"));

  CustomResolutionTestTableProvider tableProvider2 = new CustomResolutionTestTableProvider();
  tableProvider2.createTable(
      Table.builder().name("testtable_blah_foo_bar2").schema(BASIC_SCHEMA).type("test").build());
  tableProvider2.addRows("testtable_blah_foo_bar2", row(4, "customer"), row(1, "nobody"));

  PCollection<Row> result =
      pipeline.apply(
          SqlTransform.query(
                  "SELECT id, name \n"
                      + "FROM \n"
                      + "  testprovider2.testtable.blah.foo.bar2 \n"
                      + "UNION \n"
                      + "    SELECT id, name \n"
                      + "      FROM \n"
                      + "        testtable.blah.foo.bar \n")
              .withTableProvider("testprovider2", tableProvider2)
              .withDefaultTableProvider("testprovider", tableProvider));

  PAssert.that(result)
      .containsInAnyOrder(
          row(4, "customer"), row(1, "nobody"), row(3, "customer"), row(2, "nobody"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example 25
Source Project: beam   Source File: DoFnOutputReceivers.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> OutputReceiver<Row> getRowReceiver(TupleTag<T> tag) {
  Coder<T> outputCoder = (Coder<T>) checkNotNull(outputCoders).get(tag);
  checkState(outputCoder != null, "No output tag for " + tag);
  checkState(
      outputCoder instanceof SchemaCoder,
      "Output with tag " + tag + " must have a schema in order to call " + " getRowReceiver");
  return DoFnOutputReceivers.rowReceiver(context, tag, (SchemaCoder<T>) outputCoder);
}
 
Example 26
Source Project: beam   Source File: ParDoSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testNoSchema() {
  thrown.expect(IllegalArgumentException.class);
  pipeline
      .apply(Create.of("a", "b", "c"))
      .apply(
          ParDo.of(
              new DoFn<String, Void>() {
                @ProcessElement
                public void process(@Element Row row) {}
              }));
  pipeline.run();
}
 
Example 27
Source Project: beam   Source File: Create.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given
 * {@code Schema} to represent objects.
 */
@Experimental(Kind.SCHEMAS)
public TimestampedValues<T> withSchema(
    Schema schema,
    TypeDescriptor<T> typeDescriptor,
    SerializableFunction<T, Row> toRowFunction,
    SerializableFunction<Row, T> fromRowFunction) {
  return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction));
}
 
Example 28
Source Project: beam   Source File: SchemaRegistryTest.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public <T> SerializableFunction<T, Row> toRowFunction(TypeDescriptor<T> typeDescriptor) {
  if (typeDescriptor.equals(TypeDescriptor.of(TestSchemaClass.class))) {
    return v -> Row.withSchema(EMPTY_SCHEMA).build();
  }
  return null;
}
 
Example 29
Source Project: beam   Source File: SchemaTestUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean matches(Object item) {
  if (!(item instanceof Row)) {
    return false;
  }
  Row row = (Row) item;
  switch (fieldType.getTypeName()) {
    case ROW:
      if (!row.getSchema().getField(fieldIndex).getType().getTypeName().isCompositeType()) {
        return false;
      }
      Row actualRow = row.getRow(fieldIndex);
      return new RowEquivalent((Row) expected).matches(actualRow);
    case ARRAY:
      Row[] expectedArray = ((List<Row>) expected).toArray(new Row[0]);

      return containsInAnyOrder(expectedArray).matches(row.getArray(fieldIndex));
    case ITERABLE:
      Row[] expectedIterable = Iterables.toArray((Iterable<Row>) expected, Row.class);
      List<Row> actualIterable = Lists.newArrayList(row.getIterable(fieldIndex));
      return containsInAnyOrder(expectedIterable).matches(actualIterable);
    case MAP:
      throw new RuntimeException("Not yet implemented for maps");
    default:
      return equalTo(expected).matches(row.getValue(fieldIndex));
  }
}
 
Example 30
Source Project: beam   Source File: AddFieldsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void addNonNullableField() {
  Schema schema = Schema.builder().addStringField("field1").build();
  thrown.expect(IllegalArgumentException.class);
  pipeline
      .apply(Create.of(Row.withSchema(schema).addValue("value").build()).withRowSchema(schema))
      .apply(AddFields.<Row>create().field("field2", Schema.FieldType.INT32, null));
  pipeline.run();
}