Java Code Examples for org.apache.beam.sdk.values.Row#Builder

The following examples show how to use org.apache.beam.sdk.values.Row#Builder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during
 * conversion. If Schema is not provided, one is inferred from the AVRO schema.
 */
public static Row toBeamRowStrict(GenericRecord record, @Nullable Schema schema) {
  if (schema == null) {
    schema = toBeamSchema(record.getSchema());
  }

  Row.Builder builder = Row.withSchema(schema);
  org.apache.avro.Schema avroSchema = record.getSchema();

  for (Schema.Field field : schema.getFields()) {
    Object value = record.get(field.getName());
    org.apache.avro.Schema fieldAvroSchema = avroSchema.getField(field.getName()).schema();
    builder.addValue(convertAvroFieldStrict(value, fieldAvroSchema, field.getType()));
  }

  return builder.build();
}
 
Example 2
Source File: Cast.java    From beam with Apache License 2.0 6 votes vote down vote up
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) {
  if (input == null) {
    return null;
  }

  Row.Builder output = Row.withSchema(outputSchema);
  for (int i = 0; i < outputSchema.getFieldCount(); i++) {
    Schema.Field outputField = outputSchema.getField(i);

    int fromFieldIdx = inputSchema.indexOf(outputField.getName());
    Schema.Field inputField = inputSchema.getField(fromFieldIdx);

    Object inputValue = input.getValue(fromFieldIdx);
    Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType());

    output.addValue(outputValue);
  }

  return output.build();
}
 
Example 3
Source File: BeamTableFunctionScanRel.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(
    @Element KV<Row, Iterable<Row>> element, BoundedWindow window, OutputReceiver<Row> out) {
  IntervalWindow intervalWindow = (IntervalWindow) window;
  for (Row cur : element.getValue()) {
    Row.Builder builder =
        Row.withSchema(outputSchema)
            .addValues(cur.getValues())
            .addValue(intervalWindow.start())
            .addValue(intervalWindow.end());
    out.output(builder.build());
  }
}
 
Example 4
Source File: SchemaUtil.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Row mapRow(ResultSet rs) throws Exception {
  Row.Builder rowBuilder = Row.withSchema(schema);
  for (int i = 0; i < schema.getFieldCount(); i++) {
    rowBuilder.addValue(fieldExtractors.get(i).extract(rs, i + 1));
  }
  return rowBuilder.build();
}
 
Example 5
Source File: SchemaTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
public static Object rowFromProto(SchemaApi.Row row, FieldType fieldType) {
  Row.Builder builder = Row.withSchema(fieldType.getRowSchema());
  for (int i = 0; i < row.getValuesCount(); ++i) {
    builder.addValue(
        fieldValueFromProto(fieldType.getRowSchema().getField(i).getType(), row.getValues(i)));
  }
  return builder.build();
}
 
Example 6
Source File: OneOfType.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Row toBaseType(Value input) {
  EnumerationType.Value caseType = input.getCaseType();
  int setFieldIndex = oneOfSchema.indexOf(enumerationType.toString(caseType));
  Row.Builder builder = Row.withSchema(oneOfSchema);
  for (int i = 0; i < oneOfSchema.getFieldCount(); ++i) {
    Object value = (i == setFieldIndex) ? input.getValue() : null;
    builder = builder.addValue(value);
  }
  return builder.build();
}
 
Example 7
Source File: SelectHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Select out of a given {@link Row} object. */
private static void selectIntoRow(
    Schema inputSchema,
    Row input,
    Row.Builder output,
    FieldAccessDescriptor fieldAccessDescriptor) {
  if (fieldAccessDescriptor.getAllFields()) {
    List<Object> values =
        (input != null)
            ? input.getValues()
            : Collections.nCopies(inputSchema.getFieldCount(), null);
    output.addValues(values);
    return;
  }

  for (int fieldId : fieldAccessDescriptor.fieldIdsAccessed()) {
    // TODO: Once we support specific qualifiers (like array slices), extract them here.
    output.addValue((input != null) ? input.getValue(fieldId) : null);
  }

  Schema outputSchema = output.getSchema();
  for (Map.Entry<FieldDescriptor, FieldAccessDescriptor> nested :
      fieldAccessDescriptor.getNestedFieldsAccessed().entrySet()) {
    FieldDescriptor field = nested.getKey();
    FieldAccessDescriptor nestedAccess = nested.getValue();
    FieldType nestedInputType = inputSchema.getField(field.getFieldId()).getType();
    FieldType nestedOutputType = outputSchema.getField(output.nextFieldId()).getType();
    selectIntoRowWithQualifiers(
        field.getQualifiers(),
        0,
        input.getValue(field.getFieldId()),
        output,
        nestedAccess,
        nestedInputType,
        nestedOutputType);
  }
}
 
Example 8
Source File: SelectHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Select a sub Row from an input Row. */
private static Row selectRow(
    Row input,
    FieldAccessDescriptor fieldAccessDescriptor,
    Schema inputSchema,
    Schema outputSchema) {
  if (fieldAccessDescriptor.getAllFields()) {
    return input;
  }

  Row.Builder output = Row.withSchema(outputSchema);
  selectIntoRow(inputSchema, input, output, fieldAccessDescriptor);
  return output.build();
}
 
Example 9
Source File: SchemaAggregateFn.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Row extractOutput(Object[] accumulator) {
  // Build a row containing a field for every aggregate that was registered.
  CoCombineResult coCombineResult = getComposedCombineFn().extractOutput(accumulator);
  Row.Builder output = Row.withSchema(getOutputSchema());
  for (FieldAggregation fieldAggregation : getFieldAggregations()) {
    Object aggregate = coCombineResult.get(fieldAggregation.combineTag);
    output.addValue(aggregate);
  }
  return output.build();
}
 
Example 10
Source File: BeamTableFunctionScanRel.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  Row row = c.element();
  Collection<IntervalWindow> windows =
      windowFn.assignWindows(row.getDateTime(windowFieldIndex).toInstant());
  for (IntervalWindow window : windows) {
    Row.Builder builder = Row.withSchema(outputSchema);
    builder.addValues(row.getValues());
    builder.addValue(window.start());
    builder.addValue(window.end());
    c.output(builder.build());
  }
}
 
Example 11
Source File: BeamTableFunctionScanRel.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  Row row = c.element();
  IntervalWindow window = windowFn.assignWindow(row.getDateTime(windowFieldIndex).toInstant());
  Row.Builder builder = Row.withSchema(outputSchema);
  builder.addValues(row.getValues());
  builder.addValue(window.start());
  builder.addValue(window.end());
  c.output(builder.build());
}
 
Example 12
Source File: BeamTableFunctionScanRel.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  Row row = c.element();
  Row.Builder builder = Row.withSchema(keySchema);
  for (Integer i : keyIndex) {
    builder.addValue(row.getValue(i));
  }
  Row keyRow = builder.build();
  c.output(KV.of(keyRow, row));
}
 
Example 13
Source File: DataStoreV1Table.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Converts all properties of an {@code Entity} to Beam {@code Row}.
 *
 * @param schema Target row {@code Schema}.
 * @param values A map of property names and values.
 * @return resulting Beam {@code Row}.
 */
private Row extractRowFromProperties(Schema schema, Map<String, Value> values) {
  Row.Builder builder = Row.withSchema(schema);
  // It is not a guarantee that the values will be in the same order as the schema.
  // Maybe metadata:
  // https://cloud.google.com/appengine/docs/standard/python/datastore/metadataqueries
  // TODO: figure out in what order the elements are in (without relying on Beam schema).
  for (Schema.Field field : schema.getFields()) {
    Value val = values.get(field.getName());
    builder.addValue(convertValueToObject(field.getType(), val));
  }
  return builder.build();
}
 
Example 14
Source File: ProtoDynamicMessageSchema.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Row apply(T input) {
  Schema schema = context.getSchema();
  Row.Builder builder = Row.withSchema(schema);
  for (Convert convert : converters) {
    builder.addValue(convert.getFromProtoMessage((Message) input));
  }
  return builder.build();
}
 
Example 15
Source File: DebeziumSourceRecordToDataflowCdcFormatTranslator.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
public Row translate(SourceRecord record) {
  LOG.debug("Source Record from Debezium: {}", record);

  String qualifiedTableName = record.topic();

  Struct recordValue = (Struct) record.value();
  if (recordValue == null) {
    return null;
  }

  // TODO: Consider including before value in the Row.
  Struct afterValue = recordValue.getStruct("after");
  Row afterValueRow = afterValue == null ? null : handleValue(afterValue.schema(), afterValue);
  LOG.debug("Beam Row is {}", afterValueRow);

  Row primaryKey = null;
  boolean hasPK = true;
  if (record.key() == null) {
    hasPK = false;
  } else {
    primaryKey = handleValue(record.keySchema(), record.key());
    LOG.debug("Key Schema: {} | Key Value: {}", primaryKey.getSchema(), primaryKey);
  }

  String sourceRecordOp = recordValue.getString("op");
  String operation = translateOperation(sourceRecordOp);
  if (operation == null) {
    return null;
  }

  Long timestampMs = recordValue.getInt64("ts_ms");

  if (!knownSchemas.containsKey(qualifiedTableName)) {
    org.apache.beam.sdk.schemas.Schema.Builder schemaBuilder = org.apache.beam.sdk.schemas.Schema
        .builder()
        .addStringField(DataflowCdcRowFormat.OPERATION)
        .addStringField(DataflowCdcRowFormat.TABLE_NAME)
        .addField(org.apache.beam.sdk.schemas.Schema.Field.nullable(
            DataflowCdcRowFormat.FULL_RECORD, FieldType.row(afterValueRow.getSchema())))
        .addInt64Field(DataflowCdcRowFormat.TIMESTAMP_MS);

    if (hasPK) {
      schemaBuilder.addRowField(DataflowCdcRowFormat.PRIMARY_KEY, primaryKey.getSchema());
    }
    knownSchemas.put(qualifiedTableName, schemaBuilder.build());
  }
  org.apache.beam.sdk.schemas.Schema finalBeamSchema = knownSchemas.get(qualifiedTableName);

  Row.Builder beamRowBuilder = Row.withSchema(finalBeamSchema)
      .addValue(operation)
      .addValue(qualifiedTableName)
      .addValue(afterValueRow)
      .addValue(timestampMs);

  if (hasPK) {
    beamRowBuilder.addValue(primaryKey);
  }

  return beamRowBuilder.build();
}
 
Example 16
Source File: DebeziumSourceRecordToDataflowCdcFormatTranslator.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
private static Row handleValue(org.apache.kafka.connect.data.Schema schema, Object value) {
  org.apache.beam.sdk.schemas.Schema beamSchema =  kafkaSchemaToBeamRowSchema(schema);
  Row.Builder rowBuilder = Row.withSchema(beamSchema);
  return kafkaSourceRecordToBeamRow((Struct) value, rowBuilder);
}
 
Example 17
Source File: DebeziumSourceRecordToDataflowCdcFormatTranslator.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
private static Row kafkaSourceRecordToBeamRow(Struct value, Row.Builder rowBuilder) {
  org.apache.beam.sdk.schemas.Schema beamSchema = rowBuilder.getSchema();
  for (org.apache.beam.sdk.schemas.Schema.Field f : beamSchema.getFields()) {
    switch (f.getType().getTypeName()) {
      case INT16:
        rowBuilder.addValue(value.getInt16(f.getName()));
        break;
      case INT32:
        rowBuilder.addValue(value.getInt32(f.getName()));
        break;
      case INT64:
        rowBuilder.addValue(value.getInt64(f.getName()));
        break;
      case FLOAT:
        rowBuilder.addValue(value.getFloat32(f.getName()));
        break;
      case DOUBLE:
        rowBuilder.addValue(value.getFloat64(f.getName()));
        break;
      case BOOLEAN:
        rowBuilder.addValue(value.getBoolean(f.getName()));
        break;
      case STRING:
        rowBuilder.addValue(value.getString(f.getName()));
        break;
      case BYTES:
        rowBuilder.addValue(value.getBytes(f.getName()));
        break;
      case ROW:
        Row.Builder nestedRowBuilder = Row.withSchema(f.getType().getRowSchema());
        rowBuilder.addValue(
            kafkaSourceRecordToBeamRow(value.getStruct(f.getName()), nestedRowBuilder));
        break;
      case MAP:
        throw new DataException("Map types are not supported.");
      case ARRAY:
        throw new DataException("Array types are not supported.");
      default:
        throw new DataException(
            String.format("Unsupported data type: {}", f.getType().getTypeName()));
    }
  }
  return rowBuilder.build();
}