org.apache.beam.sdk.schemas.Schema Java Examples

The following examples show how to use org.apache.beam.sdk.schemas.Schema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Cast.java    From beam with Apache License 2.0 6 votes vote down vote up
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) {
  if (input == null) {
    return null;
  }

  Row.Builder output = Row.withSchema(outputSchema);
  for (int i = 0; i < outputSchema.getFieldCount(); i++) {
    Schema.Field outputField = outputSchema.getField(i);

    int fromFieldIdx = inputSchema.indexOf(outputField.getName());
    Schema.Field inputField = inputSchema.getField(fromFieldIdx);

    Object inputValue = input.getValue(fromFieldIdx);
    Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType());

    output.addValue(outputValue);
  }

  return output.build();
}
 
Example #2
Source File: BeamSideInputJoinRelTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRightOuterJoin() throws Exception {
  String sql =
      "SELECT o1.order_id, o1.sum_site_id, o2.buyer FROM "
          + " ORDER_DETAILS1 o2 "
          + " RIGHT OUTER JOIN "
          + "(select order_id, sum(site_id) as sum_site_id FROM ORDER_DETAILS "
          + "          GROUP BY order_id, TUMBLE(order_time, INTERVAL '1' HOUR)) o1 "
          + " on "
          + " o1.order_id=o2.order_id";
  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows.apply(ParDo.of(new TestUtils.BeamSqlRow2StringDoFn())))
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.builder()
                      .addField("order_id", Schema.FieldType.INT32)
                      .addField("sum_site_id", Schema.FieldType.INT32)
                      .addNullableField("buyer", Schema.FieldType.STRING)
                      .build())
              .addRows(1, 3, "james", 2, 5, "bond", 3, 3, null)
              .getStringRows());
  pipeline.run();
}
 
Example #3
Source File: RowUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Row processRow(
    RowPosition rowPosition, Schema schema, Row value, RowFieldMatcher matcher) {
  FieldOverride override = override(rowPosition);
  Row retValue = value;
  if (override != null) {
    retValue = (Row) override.getOverrideValue();
  } else if (fieldOverrides.hasOverrideBelow(rowPosition.descriptor)) {
    List<Object> values = Lists.newArrayListWithCapacity(schema.getFieldCount());
    for (int i = 0; i < schema.getFieldCount(); ++i) {
      FieldAccessDescriptor nestedDescriptor =
          FieldAccessDescriptor.withFieldIds(rowPosition.descriptor, i).resolve(topSchema);
      Object fieldValue = (value != null) ? value.getValue(i) : null;
      values.add(
          matcher.match(
              this,
              schema.getField(i).getType(),
              new RowPosition(nestedDescriptor),
              fieldValue));
    }
    retValue = new RowWithStorage(schema, values);
  }
  return retValue;
}
 
Example #4
Source File: SelectHelpersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSelectMapOfRowSelectAll() {
  FieldAccessDescriptor fieldAccessDescriptor =
      FieldAccessDescriptor.withFieldNames("map{}.*").resolve(MAP_SCHEMA);
  Schema outputSchema = SelectHelpers.getOutputSchema(MAP_SCHEMA, fieldAccessDescriptor);
  Schema expectedSchema =
      Schema.builder()
          .addMapField("field1", FieldType.INT32, FieldType.STRING)
          .addMapField("field2", FieldType.INT32, FieldType.INT32)
          .addMapField("field3", FieldType.INT32, FieldType.DOUBLE)
          .addMapField("field_extra", FieldType.INT32, FieldType.STRING)
          .build();
  assertEquals(expectedSchema, outputSchema);

  Row row = selectRow(MAP_SCHEMA, fieldAccessDescriptor, MAP_ROW);
  Row expectedRow =
      Row.withSchema(expectedSchema)
          .addValue(ImmutableMap.of(1, FLAT_ROW.getValue(0)))
          .addValue(ImmutableMap.of(1, FLAT_ROW.getValue(1)))
          .addValue(ImmutableMap.of(1, FLAT_ROW.getValue(2)))
          .addValue(ImmutableMap.of(1, FLAT_ROW.getValue(3)))
          .build();
  assertEquals(expectedRow, row);
}
 
Example #5
Source File: PubsubJsonTableProviderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreatesTable() {
  PubsubJsonTableProvider provider = new PubsubJsonTableProvider();
  Schema messageSchema =
      Schema.builder()
          .addDateTimeField("event_timestamp")
          .addMapField("attributes", VARCHAR, VARCHAR)
          .addRowField("payload", Schema.builder().build())
          .build();

  Table tableDefinition = tableDefinition().schema(messageSchema).build();

  BeamSqlTable pubsubTable = provider.buildBeamSqlTable(tableDefinition);

  assertNotNull(pubsubTable);
  assertEquals(messageSchema, pubsubTable.getSchema());
}
 
Example #6
Source File: BeamSalUhfSpecialTypeAndValueTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testIsInf() throws Exception {
  Schema resultType =
      Schema.builder()
          .addBooleanField("field_1")
          .addBooleanField("field_2")
          .addBooleanField("field_3")
          .addBooleanField("field_4")
          .build();
  Row resultRow = Row.withSchema(resultType).addValues(true, true, true, true).build();

  String sql =
      "SELECT IS_INF(f_float_1), IS_INF(f_double_1), IS_INF(f_float_2), IS_INF(f_double_2) FROM PCOLLECTION";
  PCollection<Row> result = boundedInputFloatDouble.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow);
  pipeline.run().waitUntilFinish();
}
 
Example #7
Source File: SchemaTestUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
static boolean iterablesEquivalent(
    Iterable<Object> expected, Iterable<Object> actual, Schema.FieldType elementType) {
  if (expected == actual) {
    return true;
  }
  Iterator<Object> actualIter = actual.iterator();
  for (Object currentExpected : expected) {
    if (!actualIter.hasNext()) {
      return false;
    }
    if (!fieldsEquivalent(currentExpected, actualIter.next(), elementType)) {
      return false;
    }
  }
  return !actualIter.hasNext();
}
 
Example #8
Source File: TestBigQuery.java    From beam with Apache License 2.0 6 votes vote down vote up
@Experimental(Kind.SCHEMAS)
public TableDataInsertAllResponse insertRows(Schema rowSchema, Row... rows) throws IOException {
  List<Rows> bqRows =
      Arrays.stream(rows)
          .map(row -> new Rows().setJson(BigQueryUtils.toTableRow(row)))
          .collect(ImmutableList.toImmutableList());
  Bigquery bq = newBigQueryClient(pipelineOptions);

  return bq.tabledata()
      .insertAll(
          pipelineOptions.getProject(),
          pipelineOptions.getTargetDataset(),
          table.getTableReference().getTableId(),
          new TableDataInsertAllRequest().setRows(bqRows))
      .execute();
}
 
Example #9
Source File: BeamSqlDslExistsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testExistsSubquery() {
  String sql =
      "select * from CUSTOMER "
          + " where exists ( "
          + " select * from ORDERS "
          + " where o_custkey = c_custkey )";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT32, "c_custkey",
                  Schema.FieldType.DOUBLE, "c_acctbal",
                  Schema.FieldType.STRING, "c_city")
              .addRows(1, 1.0, "Seattle")
              .getRows());

  pipeline.run().waitUntilFinish();
}
 
Example #10
Source File: RowCoderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testConsistentWithEqualsArrayOfArrayOfBytes() throws Exception {
  FieldType fieldType = FieldType.array(FieldType.array(FieldType.BYTES));
  Schema schema = Schema.of(Schema.Field.of("f1", fieldType));
  RowCoder coder = RowCoder.of(schema);

  List<byte[]> innerList1 = Collections.singletonList(new byte[] {1, 2, 3, 4});
  List<List<byte[]>> list1 = Collections.singletonList(innerList1);
  Row row1 = Row.withSchema(schema).addValue(list1).build();

  List<byte[]> innerList2 = Collections.singletonList(new byte[] {1, 2, 3, 4});
  List<List<byte[]>> list2 = Collections.singletonList(innerList2);
  Row row2 = Row.withSchema(schema).addValue(list2).build();

  Assume.assumeTrue(coder.consistentWithEquals());

  CoderProperties.coderConsistentWithEquals(coder, row1, row2);
}
 
Example #11
Source File: TestTableProviderWithFilterAndProjectPushDown.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testIOSourceRel_withSupportedAndUnsupportedPredicate() {
  String selectTableStatement = "SELECT name FROM TEST where id+unused1=101 and id=1";

  BeamRelNode beamRelNode = sqlEnv.parseQuery(selectTableStatement);
  PCollection<Row> result = BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);

  assertThat(beamRelNode, instanceOf(BeamCalcRel.class));
  assertThat(beamRelNode.getInput(0), instanceOf(BeamIOSourceRel.class));
  assertEquals(
      "BeamPushDownIOSourceRel.BEAM_LOGICAL(table=[beam, TEST],usedFields=[name, id, unused1],TestTableFilter=[supported{=($1, 1)}, unsupported{=(+($1, $0), 101)}])",
      beamRelNode.getInput(0).getDigest());
  // Make sure project push-down was done
  List<String> a = beamRelNode.getInput(0).getRowType().getFieldNames();
  assertThat(a, containsInAnyOrder("name", "id", "unused1"));

  assertEquals(Schema.builder().addStringField("name").build(), result.getSchema());
  PAssert.that(result).containsInAnyOrder(row(result.getSchema(), "one"));

  pipeline.run().waitUntilFinish(Duration.standardMinutes(2));
}
 
Example #12
Source File: BeamSalUhfSpecialTypeAndValueTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMd5() throws Exception {
  Schema resultType = Schema.builder().addByteArrayField("field").build();
  Row resultRow1 =
      Row.withSchema(resultType).addValues(DigestUtils.md5("foobar".getBytes(UTF_8))).build();
  Row resultRow2 =
      Row.withSchema(resultType).addValues(DigestUtils.md5(" ".getBytes(UTF_8))).build();
  Row resultRow3 =
      Row.withSchema(resultType)
          .addValues(DigestUtils.md5("abcABCжщфЖЩФ".getBytes(UTF_8)))
          .build();
  String sql = "SELECT MD5(f_bytes) FROM PCOLLECTION WHERE f_func = 'HashingFn'";
  PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql));
  PAssert.that(result).containsInAnyOrder(resultRow1, resultRow2, resultRow3);
  pipeline.run().waitUntilFinish();
}
 
Example #13
Source File: SelectHelpersTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testArrayRowArray() {
  Schema f1 = Schema.builder().addStringField("f0").build();
  Schema f2 = Schema.builder().addArrayField("f1", FieldType.row(f1)).build();
  Schema f3 = Schema.builder().addRowField("f2", f2).build();
  Schema f4 = Schema.builder().addArrayField("f3", FieldType.row(f3)).build();

  Row r1 = Row.withSchema(f1).addValue("first").build();
  Row r2 = Row.withSchema(f2).addArray(r1, r1).build();
  Row r3 = Row.withSchema(f3).addValue(r2).build();
  Row r4 = Row.withSchema(f4).addArray(r3, r3).build();

  FieldAccessDescriptor fieldAccessDescriptor =
      FieldAccessDescriptor.withFieldNames("f3.f2.f1.f0").resolve(f4);
  Schema outputSchema = SelectHelpers.getOutputSchema(f4, fieldAccessDescriptor);
  Schema expectedSchema =
      Schema.builder().addArrayField("f0", FieldType.array(FieldType.STRING)).build();
  assertEquals(expectedSchema, outputSchema);
  Row out = selectRow(f4, fieldAccessDescriptor, r4);
  Row expected =
      Row.withSchema(outputSchema)
          .addArray(Lists.newArrayList("first", "first"), Lists.newArrayList("first", "first"))
          .build();
  assertEquals(expected, out);
}
 
Example #14
Source File: BeamSortRelTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testOrderBy_basic() {
  String sql =
      "INSERT INTO SUB_ORDER_RAM(order_id, site_id, price)  SELECT "
          + " order_id, site_id, price "
          + "FROM ORDER_DETAILS "
          + "ORDER BY order_id asc, site_id desc limit 4";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT64, "order_id",
                  Schema.FieldType.INT32, "site_id",
                  Schema.FieldType.DOUBLE, "price")
              .addRows(1L, 2, 1.0, 1L, 1, 2.0, 2L, 4, 3.0, 2L, 1, 4.0)
              .getRows());
  pipeline.run().waitUntilFinish();
}
 
Example #15
Source File: BeamSortRelTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testOrderBy_bigFetch() {
  String sql =
      "INSERT INTO SUB_ORDER_RAM(order_id, site_id, price)  SELECT "
          + " order_id, site_id, price "
          + "FROM ORDER_DETAILS "
          + "ORDER BY order_id asc, site_id desc limit 11";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT64, "order_id",
                  Schema.FieldType.INT32, "site_id",
                  Schema.FieldType.DOUBLE, "price")
              .addRows(
                  1L, 2, 1.0, 1L, 1, 2.0, 2L, 4, 3.0, 2L, 1, 4.0, 5L, 5, 5.0, 6L, 6, 6.0, 7L, 7,
                  7.0, 8L, 8888, 8.0, 8L, 999, 9.0, 10L, 100, 10.0)
              .getRows());
  pipeline.run().waitUntilFinish();
}
 
Example #16
Source File: BeamSqlMapTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testAccessMapElement() {
  PCollection<Row> input = pCollectionOf2Elements();

  Schema resultType =
      Schema.builder().addNullableField("f_mapElem", Schema.FieldType.INT32).build();

  PCollection<Row> result =
      input.apply(
          "sqlQuery", SqlTransform.query("SELECT f_intStringMap['key11'] FROM PCOLLECTION"));

  PAssert.that(result)
      .containsInAnyOrder(
          Row.withSchema(resultType).addValues(11).build(),
          Row.withSchema(resultType).addValue(null).build());

  pipeline.run();
}
 
Example #17
Source File: BeamSqlDslProjectTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Trivial programs project precisely their input fields, without dropping or re-ordering them.
 *
 * @see <a href="https://issues.apache.org/jira/browse/BEAM-6810">BEAM-6810</a>
 */
@Test
public void testTrivialProjection() {
  String sql = "SELECT c_int64 as abc FROM PCOLLECTION";
  Schema inputSchema = Schema.of(Schema.Field.of("c_int64", Schema.FieldType.INT64));
  Schema outputSchema = Schema.of(Schema.Field.of("abc", Schema.FieldType.INT64));

  PCollection<Row> input =
      pipeline.apply(
          Create.of(Row.withSchema(inputSchema).addValue(42L).build())
              .withRowSchema(inputSchema));

  PCollection<Row> result = input.apply(SqlTransform.query(sql));

  Assert.assertEquals(outputSchema, result.getSchema());

  PAssert.that(result).containsInAnyOrder(Row.withSchema(outputSchema).addValue(42L).build());

  pipeline.run();
}
 
Example #18
Source File: Row.java    From beam with Apache License 2.0 6 votes vote down vote up
public static boolean deepEquals(Object a, Object b, Schema.FieldType fieldType) {
  if (a == null || b == null) {
    return a == b;
  } else if (fieldType.getTypeName() == TypeName.LOGICAL_TYPE) {
    return deepEquals(a, b, fieldType.getLogicalType().getBaseType());
  } else if (fieldType.getTypeName() == Schema.TypeName.BYTES) {
    return Arrays.equals((byte[]) a, (byte[]) b);
  } else if (fieldType.getTypeName() == TypeName.ARRAY) {
    return deepEqualsForCollection(
        (Collection<Object>) a, (Collection<Object>) b, fieldType.getCollectionElementType());
  } else if (fieldType.getTypeName() == TypeName.ITERABLE) {
    return deepEqualsForIterable(
        (Iterable<Object>) a, (Iterable<Object>) b, fieldType.getCollectionElementType());
  } else if (fieldType.getTypeName() == Schema.TypeName.MAP) {
    return deepEqualsForMap(
        (Map<Object, Object>) a, (Map<Object, Object>) b, fieldType.getMapValueType());
  } else {
    return Objects.equals(a, b);
  }
}
 
Example #19
Source File: BeamBuiltinAggregations.java    From beam with Apache License 2.0 6 votes vote down vote up
/** {@link CombineFn} for Sum based on {@link Sum} and {@link Combine.BinaryCombineFn}. */
static CombineFn createSum(Schema.FieldType fieldType) {
  switch (fieldType.getTypeName()) {
    case INT32:
      return Sum.ofIntegers();
    case INT16:
      return new ShortSum();
    case BYTE:
      return new ByteSum();
    case INT64:
      return Sum.ofLongs();
    case FLOAT:
      return new FloatSum();
    case DOUBLE:
      return Sum.ofDoubles();
    case DECIMAL:
      return new BigDecimalSum();
    default:
      throw new UnsupportedOperationException(
          String.format("[%s] is not supported in SUM", fieldType));
  }
}
 
Example #20
Source File: SelectHelpersTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSelectMapOfRowSelectSingle() {
  FieldAccessDescriptor fieldAccessDescriptor =
      FieldAccessDescriptor.withFieldNames("map{}.field1").resolve(MAP_SCHEMA);
  Schema outputSchema = SelectHelpers.getOutputSchema(MAP_SCHEMA, fieldAccessDescriptor);

  Schema expectedValueSchema = Schema.builder().addStringField("field1").build();
  Schema expectedSchema =
      Schema.builder().addMapField("field1", FieldType.INT32, FieldType.STRING).build();
  assertEquals(expectedSchema, outputSchema);

  Row row = selectRow(MAP_SCHEMA, fieldAccessDescriptor, MAP_ROW);
  Row expectedRow = Row.withSchema(expectedSchema).addValue(ImmutableMap.of(1, "first")).build();
  assertEquals(expectedRow, row);
}
 
Example #21
Source File: AddFields.java    From beam with Apache License 2.0 5 votes vote down vote up
private static AddFieldsInformation getAddFieldsInformation(
    Schema.FieldType inputFieldType, Collection<NewField> nestedFields) {
  AddFieldsInformation addFieldsInformation;
  Schema.FieldType fieldType;
  switch (inputFieldType.getTypeName()) {
    case ROW:
      addFieldsInformation =
          getAddFieldsInformation(inputFieldType.getRowSchema(), nestedFields);
      fieldType = addFieldsInformation.getOutputFieldType();
      break;

    case ARRAY:
      addFieldsInformation =
          getAddFieldsInformation(inputFieldType.getCollectionElementType(), nestedFields);
      fieldType = Schema.FieldType.array(addFieldsInformation.getOutputFieldType());
      break;

    case ITERABLE:
      addFieldsInformation =
          getAddFieldsInformation(inputFieldType.getCollectionElementType(), nestedFields);
      fieldType = Schema.FieldType.iterable(addFieldsInformation.getOutputFieldType());
      break;

    case MAP:
      addFieldsInformation =
          getAddFieldsInformation(inputFieldType.getMapValueType(), nestedFields);
      fieldType =
          Schema.FieldType.map(
              inputFieldType.getMapKeyType(), addFieldsInformation.getOutputFieldType());
      break;

    default:
      throw new RuntimeException("Cannot select a subfield of a non-composite type.");
  }
  fieldType = fieldType.withNullable(inputFieldType.getNullable());
  return addFieldsInformation.toBuilder().setOutputFieldType(fieldType).build();
}
 
Example #22
Source File: CassandraRowMapperFnTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapDoubleColumn() {
  Double value = Double.MIN_VALUE;
  primeWithType(value, DOUBLE);
  ResultSet resultSet = getResultSet();

  Schema schema = Schema.builder().addNullableField("col", FieldType.DOUBLE).build();
  Row expected = Row.withSchema(schema).addValue(value).build();

  assertEquals(expected, cassandraRowMapper.map(resultSet).next());
}
 
Example #23
Source File: BeamValuesRelTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testValues_castInt() throws Exception {
  String sql = "insert into int_table (c0, c1) values(cast(1 as int), cast(2 as int))";
  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.FieldType.INT32, "c0",
                  Schema.FieldType.INT32, "c1")
              .addRows(1, 2)
              .getRows());
  pipeline.run();
}
 
Example #24
Source File: RowTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreatesArrayOfMap() {
  List<Map<Integer, String>> data =
      ImmutableList.<Map<Integer, String>>builder()
          .add(ImmutableMap.of(1, "value1"))
          .add(ImmutableMap.of(2, "value2"))
          .build();
  Schema type =
      Stream.of(
              Schema.Field.of(
                  "array", FieldType.array(FieldType.map(FieldType.INT32, FieldType.STRING))))
          .collect(toSchema());
  Row row = Row.withSchema(type).addArray(data).build();
  assertEquals(data, row.getArray("array"));
}
 
Example #25
Source File: BeamSortRelTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testOrderBy_nullsFirst() {
  Schema schema =
      Schema.builder()
          .addField("order_id", Schema.FieldType.INT64)
          .addNullableField("site_id", Schema.FieldType.INT32)
          .addField("price", Schema.FieldType.DOUBLE)
          .build();

  registerTable(
      "ORDER_DETAILS",
      TestBoundedTable.of(schema)
          .addRows(1L, 2, 1.0, 1L, null, 2.0, 2L, 1, 3.0, 2L, null, 4.0, 5L, 5, 5.0));
  registerTable("SUB_ORDER_RAM", TestBoundedTable.of(schema));

  String sql =
      "INSERT INTO SUB_ORDER_RAM(order_id, site_id, price)  SELECT "
          + " order_id, site_id, price "
          + "FROM ORDER_DETAILS "
          + "ORDER BY order_id asc, site_id desc NULLS FIRST limit 4";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(schema)
              .addRows(1L, null, 2.0, 1L, 2, 1.0, 2L, null, 4.0, 2L, 1, 3.0)
              .getRows());
  pipeline.run().waitUntilFinish();
}
 
Example #26
Source File: JdbcIO.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<OutputT> expand(PCollection<ParameterT> input) {
  PCollection<OutputT> output =
      input
          .apply(
              ParDo.of(
                  new ReadFn<>(
                      getDataSourceProviderFn(),
                      getQuery(),
                      getParameterSetter(),
                      getRowMapper(),
                      getFetchSize())))
          .setCoder(getCoder());

  if (getOutputParallelization()) {
    output = output.apply(new Reparallelize<>());
  }

  try {
    TypeDescriptor<OutputT> typeDesc = getCoder().getEncodedTypeDescriptor();
    SchemaRegistry registry = input.getPipeline().getSchemaRegistry();
    Schema schema = registry.getSchema(typeDesc);
    output.setSchema(
        schema,
        typeDesc,
        registry.getToRowFunction(typeDesc),
        registry.getFromRowFunction(typeDesc));
  } catch (NoSuchSchemaException e) {
    // ignore
  }

  return output;
}
 
Example #27
Source File: BeamCoGBKJoinRelBoundedVsBoundedTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testFullOuterJoin() throws Exception {
  String sql =
      "SELECT *  "
          + "FROM ORDER_DETAILS1 o1"
          + " FULL OUTER JOIN ORDER_DETAILS2 o2"
          + " on "
          + " o1.order_id=o2.site_id AND o2.price=o1.site_id";

  PCollection<Row> rows = compilePipeline(sql, pipeline);
  PAssert.that(rows)
      .containsInAnyOrder(
          TestUtils.RowsBuilder.of(
                  Schema.builder()
                      .addNullableField("order_id", Schema.FieldType.INT32)
                      .addNullableField("site_id", Schema.FieldType.INT32)
                      .addNullableField("price", Schema.FieldType.INT32)
                      .addNullableField("order_id0", Schema.FieldType.INT32)
                      .addNullableField("site_id0", Schema.FieldType.INT32)
                      .addNullableField("price0", Schema.FieldType.INT32)
                      .build())
              .addRows(
                  2, 3, 3, 1, 2, 3, 1, 2, 3, null, null, null, 3, 4, 5, null, null, null, null,
                  null, null, 2, 3, 3, null, null, null, 3, 4, 5)
              .getRows());
  pipeline.run();
}
 
Example #28
Source File: DataStoreV1Table.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Converts an entire {@code Row} to an appropriate DataStore {@code Entity.Builder}.
 *
 * @param row {@code Row} to convert.
 * @return resulting {@code Entity.Builder}.
 */
private Entity.Builder constructEntityFromRow(Schema schema, Row row) {
  Entity.Builder entityBuilder = Entity.newBuilder();
  for (Schema.Field field : schema.getFields()) {
    Value val = mapObjectToValue(row.getValue(field.getName()));
    entityBuilder.putProperties(field.getName(), val);
  }
  return entityBuilder;
}
 
Example #29
Source File: PubsubJsonTableProviderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreatesFlatTable() {
  PubsubJsonTableProvider provider = new PubsubJsonTableProvider();
  Schema messageSchema =
      Schema.builder().addDateTimeField("event_timestamp").addStringField("someField").build();

  Table tableDefinition = tableDefinition().schema(messageSchema).build();

  BeamSqlTable pubsubTable = provider.buildBeamSqlTable(tableDefinition);

  assertNotNull(pubsubTable);
  assertEquals(messageSchema, pubsubTable.getSchema());
}
 
Example #30
Source File: CassandraRowMapperFnTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testType4UUIDColumn() {
  UUID value = java.util.UUID.randomUUID();
  primeWithType(value, UUID);
  ResultSet resultSet = getResultSet();

  Schema schema = Schema.builder().addNullableField("col", FieldType.STRING).build();
  Row expected = Row.withSchema(schema).addValue(value.toString()).build();

  assertEquals(expected, cassandraRowMapper.map(resultSet).next());
}