Java Code Examples for org.apache.beam.sdk.schemas.Schema#of()

The following examples show how to use org.apache.beam.sdk.schemas.Schema#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BeamSqlDslProjectTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Trivial programs project precisely their input fields, without dropping or re-ordering them.
 *
 * @see <a href="https://issues.apache.org/jira/browse/BEAM-6810">BEAM-6810</a>
 */
@Test
public void testTrivialProjection() {
  String sql = "SELECT c_int64 as abc FROM PCOLLECTION";
  Schema inputSchema = Schema.of(Schema.Field.of("c_int64", Schema.FieldType.INT64));
  Schema outputSchema = Schema.of(Schema.Field.of("abc", Schema.FieldType.INT64));

  PCollection<Row> input =
      pipeline.apply(
          Create.of(Row.withSchema(inputSchema).addValue(42L).build())
              .withRowSchema(inputSchema));

  PCollection<Row> result = input.apply(SqlTransform.query(sql));

  Assert.assertEquals(outputSchema, result.getSchema());

  PAssert.that(result).containsInAnyOrder(Row.withSchema(outputSchema).addValue(42L).build());

  pipeline.run();
}
 
Example 2
Source File: ClickHouseIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testInt64WithDefault() throws Exception {
  Schema schema = Schema.of(Schema.Field.nullable("f0", FieldType.INT64));
  Row row1 = Row.withSchema(schema).addValue(1L).build();
  Row row2 = Row.withSchema(schema).addValue(null).build();
  Row row3 = Row.withSchema(schema).addValue(3L).build();

  executeSql("CREATE TABLE test_int64_with_default (f0 Int64 DEFAULT -1) ENGINE=Log");

  pipeline
      .apply(Create.of(row1, row2, row3).withRowSchema(schema))
      .apply(write("test_int64_with_default"));

  pipeline.run().waitUntilFinish();

  long sum = executeQueryAsLong("SELECT SUM(f0) FROM test_int64_with_default");

  assertEquals(3L, sum);
}
 
Example 3
Source File: JdbcIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRowsWithDataSourceConfiguration() {
  PCollection<Row> rows =
      pipeline.apply(
          JdbcIO.readRows()
              .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource))
              .withQuery(String.format("select name,id from %s where name = ?", readTableName))
              .withStatementPreparator(
                  preparedStatement ->
                      preparedStatement.setString(1, TestRow.getNameForSeed(1))));

  Schema expectedSchema =
      Schema.of(
          Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500))
              .withNullable(true),
          Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));

  assertEquals(expectedSchema, rows.getSchema());

  PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
  PAssert.that(output)
      .containsInAnyOrder(
          ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));

  pipeline.run();
}
 
Example 4
Source File: ClickHouseIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNullableInt64() throws Exception {
  Schema schema = Schema.of(Schema.Field.nullable("f0", FieldType.INT64));
  Row row1 = Row.withSchema(schema).addValue(1L).build();
  Row row2 = Row.withSchema(schema).addValue(null).build();
  Row row3 = Row.withSchema(schema).addValue(3L).build();

  executeSql("CREATE TABLE test_nullable_int64 (f0 Nullable(Int64)) ENGINE=Log");

  pipeline
      .apply(Create.of(row1, row2, row3).withRowSchema(schema))
      .apply(write("test_nullable_int64"));

  pipeline.run().waitUntilFinish();

  long sum = executeQueryAsLong("SELECT SUM(f0) FROM test_nullable_int64");
  long count0 = executeQueryAsLong("SELECT COUNT(*) FROM test_nullable_int64");
  long count1 = executeQueryAsLong("SELECT COUNT(f0) FROM test_nullable_int64");

  assertEquals(4L, sum);
  assertEquals(3L, count0);
  assertEquals(2L, count1);
}
 
Example 5
Source File: CastTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testTypeWiden() {
  Schema inputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT16),
          Schema.Field.of("f1", Schema.FieldType.INT32));

  Schema outputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT32),
          Schema.Field.of("f1", Schema.FieldType.INT64));

  Row input = Row.withSchema(inputSchema).addValues((short) 1, 2).build();
  Row expected = Row.withSchema(outputSchema).addValues(1, 2L).build();

  PCollection<Row> output =
      pipeline
          .apply(Create.of(input).withRowSchema(inputSchema))
          .apply(Cast.widening(outputSchema));

  PAssert.that(output).containsInAnyOrder(expected);

  pipeline.run();
}
 
Example 6
Source File: RowCoderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testConsistentWithEqualsArrayOfArrayOfBytes() throws Exception {
  FieldType fieldType = FieldType.array(FieldType.array(FieldType.BYTES));
  Schema schema = Schema.of(Schema.Field.of("f1", fieldType));
  RowCoder coder = RowCoder.of(schema);

  List<byte[]> innerList1 = Collections.singletonList(new byte[] {1, 2, 3, 4});
  List<List<byte[]>> list1 = Collections.singletonList(innerList1);
  Row row1 = Row.withSchema(schema).addValue(list1).build();

  List<byte[]> innerList2 = Collections.singletonList(new byte[] {1, 2, 3, 4});
  List<List<byte[]>> list2 = Collections.singletonList(innerList2);
  Row row2 = Row.withSchema(schema).addValue(list2).build();

  Assume.assumeTrue(coder.consistentWithEquals());

  CoderProperties.coderConsistentWithEquals(coder, row1, row2);
}
 
Example 7
Source File: ClickHouseIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testArrayOfArrayOfInt64() throws Exception {
  Schema schema =
      Schema.of(Schema.Field.of("f0", FieldType.array(FieldType.array(FieldType.INT64))));
  Row row1 =
      Row.withSchema(schema)
          .addValue(
              Arrays.asList(Arrays.asList(1L, 2L), Arrays.asList(2L, 3L), Arrays.asList(3L, 4L)))
          .build();

  executeSql("CREATE TABLE test_array_of_array_of_int64 (f0 Array(Array(Int64))) ENGINE=Log");

  pipeline
      .apply(Create.of(row1).withRowSchema(schema))
      .apply(write("test_array_of_array_of_int64"));

  pipeline.run().waitUntilFinish();

  long sum0 =
      executeQueryAsLong(
          "SELECT SUM(arraySum(arrayMap(x -> arraySum(x), f0))) "
              + "FROM test_array_of_array_of_int64");

  assertEquals(15L, sum0);
}
 
Example 8
Source File: CastTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCastInnerRowFail() {
  Schema innerInputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT16),
          Schema.Field.of("f1", Schema.FieldType.INT64));

  Schema inputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.row(innerInputSchema)),
          Schema.Field.of("f1", Schema.FieldType.INT32));

  Schema innerOutputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT32),
          Schema.Field.of("f1", Schema.FieldType.INT32));

  Schema outputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.row(innerOutputSchema)),
          Schema.Field.of("f1", Schema.FieldType.INT64));

  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage(containsString("f0.f1: Can't cast 'INT64' to 'INT32'"));

  Cast.widening(outputSchema).verifyCompatibility(inputSchema);
}
 
Example 9
Source File: JdbcIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadWithSchema() {
  SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> dataSource;
  JdbcIO.RowMapper<RowWithSchema> rowMapper =
      rs -> new RowWithSchema(rs.getString("NAME"), rs.getInt("ID"));
  pipeline.getSchemaRegistry().registerJavaBean(RowWithSchema.class);

  PCollection<RowWithSchema> rows =
      pipeline.apply(
          JdbcIO.<RowWithSchema>read()
              .withDataSourceProviderFn(dataSourceProvider)
              .withQuery(String.format("select name,id from %s where name = ?", readTableName))
              .withRowMapper(rowMapper)
              .withCoder(SerializableCoder.of(RowWithSchema.class))
              .withStatementPreparator(
                  preparedStatement ->
                      preparedStatement.setString(1, TestRow.getNameForSeed(1))));

  Schema expectedSchema =
      Schema.of(
          Schema.Field.of("name", Schema.FieldType.STRING),
          Schema.Field.of("id", Schema.FieldType.INT32));

  assertEquals(expectedSchema, rows.getSchema());

  PCollection<Row> output = rows.apply(Select.fieldNames("name", "id"));
  PAssert.that(output)
      .containsInAnyOrder(
          ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));

  pipeline.run();
}
 
Example 10
Source File: CastTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testIgnoreNullable() {
  // the opposite of testWeakenNullable

  Schema inputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT32),
          Schema.Field.nullable("f1", Schema.FieldType.INT64));

  Schema outputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT16),
          Schema.Field.nullable("f1", Schema.FieldType.INT32));

  Row input = Row.withSchema(inputSchema).addValues(1, 2L).build();
  Row expected = Row.withSchema(outputSchema).addValues((short) 1, 2).build();

  PCollection<Row> output =
      pipeline
          .apply(Create.of(input).withRowSchema(inputSchema))
          .apply(Cast.narrowing(outputSchema));

  PAssert.that(output).containsInAnyOrder(expected);

  pipeline.run();
}
 
Example 11
Source File: CastTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testTypeNarrow() {
  // the same as testTypeWiden, but to casting to the opposite direction

  Schema inputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT32),
          Schema.Field.of("f1", Schema.FieldType.INT64));

  Schema outputSchema =
      Schema.of(
          Schema.Field.of("f0", Schema.FieldType.INT16),
          Schema.Field.of("f1", Schema.FieldType.INT32));

  Row input = Row.withSchema(inputSchema).addValues(1, 2L).build();
  Row expected = Row.withSchema(outputSchema).addValues((short) 1, 2).build();

  PCollection<Row> output =
      pipeline
          .apply(Create.of(input).withRowSchema(inputSchema))
          .apply(Cast.narrowing(outputSchema));

  PAssert.that(output).containsInAnyOrder(expected);

  pipeline.run();
}
 
Example 12
Source File: AddFieldsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void recursivelyAddNestedFields() {
  Schema schema = Schema.of();

  Row row = Row.withSchema(schema).build();
  PCollection<Row> added =
      pipeline
          .apply(Create.of(row).withRowSchema(schema))
          .apply(
              AddFields.<Row>create()
                  .field("nested.field1", Schema.FieldType.STRING, "value")
                  .field("nested.field2", Schema.FieldType.INT32)
                  .field("nested.field3", Schema.FieldType.array(Schema.FieldType.STRING))
                  .field("nested.field4", Schema.FieldType.iterable(Schema.FieldType.STRING)));

  Schema expectedNestedSchema =
      Schema.builder()
          .addStringField("field1")
          .addNullableField("field2", Schema.FieldType.INT32)
          .addNullableField("field3", Schema.FieldType.array(Schema.FieldType.STRING))
          .addNullableField("field4", Schema.FieldType.iterable(Schema.FieldType.STRING))
          .build();
  Schema expectedSchema =
      Schema.builder()
          .addNullableField("nested", Schema.FieldType.row(expectedNestedSchema))
          .build();
  assertEquals(expectedSchema, added.getSchema());

  Row expectedNested =
      Row.withSchema(expectedNestedSchema).addValues("value", null, null, null).build();
  Row expected = Row.withSchema(expectedSchema).addValue(expectedNested).build();

  PAssert.that(added).containsInAnyOrder(expected);
  pipeline.run();
}
 
Example 13
Source File: AtomicInsertTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Row> expand(PBegin input) {
  Schema schema = Schema.of(Schema.Field.of("f0", Schema.FieldType.INT64));
  Iterable<Row> bundle =
      IntStream.range(0, size)
          .mapToObj(x -> Row.withSchema(schema).addValue((long) x).build())
          .collect(Collectors.toList());

  // make sure we get one big bundle
  return input
      .getPipeline()
      .apply(Create.<Iterable<Row>>of(bundle).withCoder(IterableCoder.of(RowCoder.of(schema))))
      .apply(Flatten.iterables())
      .setRowSchema(schema);
}
 
Example 14
Source File: JdbcIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadRowsWithoutStatementPreparator() {
  SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> dataSource;
  String name = TestRow.getNameForSeed(1);
  PCollection<Row> rows =
      pipeline.apply(
          JdbcIO.readRows()
              .withDataSourceProviderFn(dataSourceProvider)
              .withQuery(
                  String.format(
                      "select name,id from %s where name = '%s'", readTableName, name)));

  Schema expectedSchema =
      Schema.of(
          Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500))
              .withNullable(true),
          Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));

  assertEquals(expectedSchema, rows.getSchema());

  PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
  PAssert.that(output)
      .containsInAnyOrder(
          ImmutableList.of(Row.withSchema(expectedSchema).addValues(name, 1).build()));

  pipeline.run();
}
 
Example 15
Source File: RowTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testByteBufferEquality() {
  byte[] a0 = new byte[] {1, 2, 3, 4};
  byte[] b0 = new byte[] {1, 2, 3, 4};

  Schema schema = Schema.of(Schema.Field.of("bytes", Schema.FieldType.BYTES));

  Row a = Row.withSchema(schema).addValue(ByteBuffer.wrap(a0)).build();
  Row b = Row.withSchema(schema).addValue(ByteBuffer.wrap(b0)).build();

  assertEquals(a, b);
}
 
Example 16
Source File: RowTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testByteArrayEquality() {
  byte[] a0 = new byte[] {1, 2, 3, 4};
  byte[] b0 = new byte[] {1, 2, 3, 4};

  Schema schema = Schema.of(Schema.Field.of("bytes", Schema.FieldType.BYTES));

  Row a = Row.withSchema(schema).addValue(a0).build();
  Row b = Row.withSchema(schema).addValue(b0).build();

  assertEquals(a, b);
}
 
Example 17
Source File: SqlBoundedSideInputJoin.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<Bid> expand(PCollection<Event> events) {
  PCollection<Row> bids =
      events
          .apply(Filter.by(NexmarkQueryUtil.IS_BID))
          .apply(getName() + ".SelectEvent", new SelectEvent(Event.Type.BID));

  checkState(getSideInput() != null, "Configuration error: side input is null");

  TupleTag<Row> sideTag = new TupleTag<Row>("side") {};
  TupleTag<Row> bidTag = new TupleTag<Row>("bid") {};

  Schema schema =
      Schema.of(
          Schema.Field.of("id", Schema.FieldType.INT64),
          Schema.Field.of("extra", Schema.FieldType.STRING));

  PCollection<Row> sideRows =
      getSideInput()
          .setSchema(
              schema,
              TypeDescriptors.kvs(TypeDescriptors.longs(), TypeDescriptors.strings()),
              kv -> Row.withSchema(schema).addValues(kv.getKey(), kv.getValue()).build(),
              row -> KV.of(row.getInt64("id"), row.getString("extra")))
          .apply("SideToRows", Convert.toRows());

  return PCollectionTuple.of(bidTag, bids)
      .and(sideTag, sideRows)
      .apply(
          SqlTransform.query(String.format(query, configuration.sideInputRowCount))
              .withQueryPlannerClass(plannerClass))
      .apply("ResultToBid", Convert.fromRows(Bid.class));
}
 
Example 18
Source File: BeamSqlDslProjectTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBytesLiteral() {
  Schema outputSchema = Schema.of(Schema.Field.of("c_bytes", Schema.FieldType.BYTES));

  PCollection<Row> result =
      PCollectionTuple.empty(pipeline).apply(SqlTransform.query("SELECT x'baadcafe' as c_bytes"));

  PAssert.that(result)
      .containsInAnyOrder(
          Row.withSchema(outputSchema).addValue(new byte[] {-70, -83, -54, -2}).build());

  pipeline.run();
}
 
Example 19
Source File: CastValidatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private void testWideningOrder(TypeName input, TypeName output) {
  Schema inputSchema = Schema.of(Schema.Field.of("f0", FieldType.of(input)));
  Schema outputSchema = Schema.of(Schema.Field.of("f0", FieldType.of(output)));

  List<Cast.CompatibilityError> errors = Cast.Widening.of().apply(inputSchema, outputSchema);

  if (NUMERIC_ORDER.indexOf(input) <= NUMERIC_ORDER.indexOf(output)) {
    assertThat(input + " is before " + output, errors, empty());
  } else {
    assertThat(input + " is after " + output, errors, not(empty()));
  }
}
 
Example 20
Source File: ClickHouseIOTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testArrayOfPrimitiveTypes() throws Exception {
  Schema schema =
      Schema.of(
          Schema.Field.of("f0", FieldType.array(FieldType.DATETIME)),
          Schema.Field.of("f1", FieldType.array(FieldType.DATETIME)),
          Schema.Field.of("f2", FieldType.array(FieldType.FLOAT)),
          Schema.Field.of("f3", FieldType.array(FieldType.DOUBLE)),
          Schema.Field.of("f4", FieldType.array(FieldType.BYTE)),
          Schema.Field.of("f5", FieldType.array(FieldType.INT16)),
          Schema.Field.of("f6", FieldType.array(FieldType.INT32)),
          Schema.Field.of("f7", FieldType.array(FieldType.INT64)),
          Schema.Field.of("f8", FieldType.array(FieldType.STRING)),
          Schema.Field.of("f9", FieldType.array(FieldType.INT16)),
          Schema.Field.of("f10", FieldType.array(FieldType.INT32)),
          Schema.Field.of("f11", FieldType.array(FieldType.INT64)),
          Schema.Field.of("f12", FieldType.array(FieldType.INT64)),
          Schema.Field.of("f13", FieldType.array(FieldType.STRING)),
          Schema.Field.of("f14", FieldType.array(FieldType.STRING)));
  Row row1 =
      Row.withSchema(schema)
          .addArray(
              new DateTime(2030, 10, 1, 0, 0, 0, DateTimeZone.UTC),
              new DateTime(2031, 10, 1, 0, 0, 0, DateTimeZone.UTC))
          .addArray(
              new DateTime(2030, 10, 9, 8, 7, 6, DateTimeZone.UTC),
              new DateTime(2031, 10, 9, 8, 7, 6, DateTimeZone.UTC))
          .addArray(2.2f, 3.3f)
          .addArray(3.3, 4.4)
          .addArray((byte) 4, (byte) 5)
          .addArray((short) 5, (short) 6)
          .addArray(6, 7)
          .addArray(7L, 8L)
          .addArray("eight", "nine")
          .addArray((short) 9, (short) 10)
          .addArray(10, 11)
          .addArray(11L, 12L)
          .addArray(12L, 13L)
          .addArray("abc", "cde")
          .addArray("cde", "abc")
          .build();

  executeSql(
      "CREATE TABLE test_array_of_primitive_types ("
          + "f0  Array(Date),"
          + "f1  Array(DateTime),"
          + "f2  Array(Float32),"
          + "f3  Array(Float64),"
          + "f4  Array(Int8),"
          + "f5  Array(Int16),"
          + "f6  Array(Int32),"
          + "f7  Array(Int64),"
          + "f8  Array(String),"
          + "f9  Array(UInt8),"
          + "f10 Array(UInt16),"
          + "f11 Array(UInt32),"
          + "f12 Array(UInt64),"
          + "f13 Array(Enum8('abc' = 1, 'cde' = 2)),"
          + "f14 Array(Enum16('abc' = -1, 'cde' = -2))"
          + ") ENGINE=Log");

  pipeline
      .apply(Create.of(row1).withRowSchema(schema))
      .apply(write("test_array_of_primitive_types"));

  pipeline.run().waitUntilFinish();

  try (ResultSet rs = executeQuery("SELECT * FROM test_array_of_primitive_types")) {
    rs.next();

    assertEquals("['2030-10-01','2031-10-01']", rs.getString("f0"));
    assertEquals("['2030-10-09 08:07:06','2031-10-09 08:07:06']", rs.getString("f1"));
    assertEquals("[2.2,3.3]", rs.getString("f2"));
    assertEquals("[3.3,4.4]", rs.getString("f3"));
    assertEquals("[4,5]", rs.getString("f4"));
    assertEquals("[5,6]", rs.getString("f5"));
    assertEquals("[6,7]", rs.getString("f6"));
    assertEquals("[7,8]", rs.getString("f7"));
    assertEquals("['eight','nine']", rs.getString("f8"));
    assertEquals("[9,10]", rs.getString("f9"));
    assertEquals("[10,11]", rs.getString("f10"));
    assertEquals("[11,12]", rs.getString("f11"));
    assertEquals("[12,13]", rs.getString("f12"));
    assertEquals("['abc','cde']", rs.getString("f13"));
    assertEquals("['cde','abc']", rs.getString("f14"));
  }
}