Java Code Examples for org.apache.beam.sdk.schemas.Schema#getField()

The following examples show how to use org.apache.beam.sdk.schemas.Schema#getField() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RowJson.java    From beam with Apache License 2.0 6 votes vote down vote up
private void writeRow(Row row, Schema schema, JsonGenerator gen) throws IOException {
  gen.writeStartObject();
  for (int i = 0; i < schema.getFieldCount(); ++i) {
    Field field = schema.getField(i);
    Object value = row.getValue(i);
    if (dropNullsOnWrite && value == null && field.getType().getNullable()) {
      continue;
    }
    gen.writeFieldName(field.getName());
    if (field.getType().getNullable() && value == null) {
      gen.writeNull();
      continue;
    }
    writeValue(gen, field.getType(), value);
  }
  gen.writeEndObject();
}
 
Example 2
Source File: Cast.java    From beam with Apache License 2.0 6 votes vote down vote up
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) {
  if (input == null) {
    return null;
  }

  Row.Builder output = Row.withSchema(outputSchema);
  for (int i = 0; i < outputSchema.getFieldCount(); i++) {
    Schema.Field outputField = outputSchema.getField(i);

    int fromFieldIdx = inputSchema.indexOf(outputField.getName());
    Schema.Field inputField = inputSchema.getField(fromFieldIdx);

    Object inputValue = input.getValue(fromFieldIdx);
    Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType());

    output.addValue(outputValue);
  }

  return output.build();
}
 
Example 3
Source File: CalciteUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
private static RelDataType toRelDataType(
    RelDataTypeFactory dataTypeFactory, Schema schema, int fieldIndex) {
  Schema.Field field = schema.getField(fieldIndex);
  RelDataType type = toRelDataType(dataTypeFactory, field.getType());

  return dataTypeFactory.createTypeWithNullability(type, field.getType().getNullable());
}
 
Example 4
Source File: AvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred
 * from the Beam schema on the row.
 */
public static GenericRecord toGenericRecord(
    Row row, @Nullable org.apache.avro.Schema avroSchema) {
  Schema beamSchema = row.getSchema();
  // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row
  // schema.
  if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) {
    throw new IllegalArgumentException(
        "AVRO schema doesn't match row schema. Row schema "
            + beamSchema
            + ". AVRO schema + "
            + avroSchema);
  }
  if (avroSchema == null) {
    avroSchema = toAvroSchema(beamSchema);
  }

  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  for (int i = 0; i < beamSchema.getFieldCount(); ++i) {
    Schema.Field field = beamSchema.getField(i);
    builder.set(
        field.getName(),
        genericFromBeamField(
            field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i)));
  }
  return builder.build();
}
 
Example 5
Source File: AddFields.java    From beam with Apache License 2.0 4 votes vote down vote up
private static AddFieldsInformation getAddFieldsInformation(
    Schema inputSchema, Collection<NewField> fieldsToAdd) {
  List<NewField> newTopLevelFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  List<NewField> newNestedFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  // Group all nested fields together by the field at the current level. For example, if adding
  // a.b, a.c, a.d
  // this map will contain a -> {a.b, a.c, a.d}.
  Multimap<String, NewField> newNestedFieldsMap =
      Multimaps.index(newNestedFields, NewField::getName);

  Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap();
  Schema.Builder builder = Schema.builder();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    Schema.Field field = inputSchema.getField(i);
    Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName());

    // If this field is a nested field and new subfields are added further down the tree, add
    // those subfields before
    // adding to the current schema. Otherwise we just add this field as is to the new schema.
    if (!nestedFields.isEmpty()) {
      nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList());

      AddFieldsInformation nestedInformation =
          getAddFieldsInformation(field.getType(), nestedFields);
      field = field.withType(nestedInformation.getOutputFieldType());
      resolvedNestedNewValues.put(i, nestedInformation);
    }
    builder.addField(field);
  }

  // Add any new fields at this level.
  List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size());
  for (NewField newField : newTopLevelFields) {
    builder.addField(newField.getName(), newField.getFieldType());
    newValuesThisLevel.add(newField.getDefaultValue());
  }

  // If there are any nested field additions left that are not already processed, that means
  // that the root of the
  // nested field doesn't exist in the schema. In this case we'll walk down the new nested
  // fields and recursively create each nested level as necessary.
  for (Map.Entry<String, Collection<NewField>> newNested :
      newNestedFieldsMap.asMap().entrySet()) {
    String fieldName = newNested.getKey();

    // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x)
    FieldAccessDescriptor.FieldDescriptor fieldDescriptor =
        Iterables.getOnlyElement(
            newNested.getValue().stream()
                .map(NewField::getFieldDescriptor)
                .distinct()
                .collect(Collectors.toList()));
    FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true);
    for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
      // The problem with adding recursive map fields is that we don't know what the map key
      // type should be.
      // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in
      // the map value.
      // Since in this code path the mapField field does not already exist this means we need to
      // create the new
      // map field, and we have no way of knowing what type the key should be.
      // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or
      // extend our selector
      // syntax to allow specifying key types.
      checkArgument(
          !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here");
      fieldType = FieldType.array(fieldType).withNullable(true);
    }
    if (!inputSchema.hasField(fieldName)) {
      // This is a brand-new nested field with no matching field in the input schema. We will
      // recursively create a nested schema to match it.
      Collection<NewField> nestedNewFields =
          newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList());
      AddFieldsInformation addFieldsInformation =
          getAddFieldsInformation(fieldType, nestedNewFields);
      builder.addField(fieldName, addFieldsInformation.getOutputFieldType());
      resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation);
    }
  }
  Schema schema = builder.build();

  List<AddFieldsInformation> nestedNewValueList =
      new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null));
  for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) {
    nestedNewValueList.set(entry.getKey(), entry.getValue());
  }
  return AddFieldsInformation.of(
      Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList);
}
 
Example 6
Source File: DropFields.java    From beam with Apache License 2.0 4 votes vote down vote up
FieldAccessDescriptor complement(Schema inputSchema, FieldAccessDescriptor input) {
  // Create a FieldAccessDescriptor that select all fields _not_ selected in the input
  // descriptor. Maintain
  // the original order of the schema.
  Set<String> fieldNamesToSelect = Sets.newHashSet();
  Map<FieldAccessDescriptor.FieldDescriptor, FieldAccessDescriptor> nestedFieldsToSelect =
      Maps.newHashMap();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    if (input.fieldIdsAccessed().contains(i)) {
      // This field is selected, so exclude it from the complement.
      continue;
    }
    Field field = inputSchema.getField(i);
    Map<Integer, FieldAccessDescriptor.FieldDescriptor> nestedFields =
        input.getNestedFieldsAccessed().keySet().stream()
            .collect(Collectors.toMap(k -> k.getFieldId(), k -> k));

    FieldAccessDescriptor.FieldDescriptor fieldDescriptor = nestedFields.get(i);
    if (fieldDescriptor != null) {
      // Some subfields are selected, so recursively calculate the complementary subfields to
      // select.
      FieldType fieldType = inputSchema.getField(i).getType();
      for (FieldAccessDescriptor.FieldDescriptor.Qualifier qualifier :
          fieldDescriptor.getQualifiers()) {
        switch (qualifier.getKind()) {
          case LIST:
            fieldType = fieldType.getCollectionElementType();
            break;
          case MAP:
            fieldType = fieldType.getMapValueType();
            break;
          default:
            throw new RuntimeException("Unexpected field descriptor type.");
        }
      }
      checkArgument(fieldType.getTypeName().isCompositeType());
      FieldAccessDescriptor nestedDescriptor =
          input.getNestedFieldsAccessed().get(fieldDescriptor);
      nestedFieldsToSelect.put(
          fieldDescriptor, complement(fieldType.getRowSchema(), nestedDescriptor));
    } else {
      // Neither the field nor the subfield is selected. This means we should select it.
      fieldNamesToSelect.add(field.getName());
    }
  }

  FieldAccessDescriptor fieldAccess = FieldAccessDescriptor.withFieldNames(fieldNamesToSelect);
  for (Map.Entry<FieldAccessDescriptor.FieldDescriptor, FieldAccessDescriptor> entry :
      nestedFieldsToSelect.entrySet()) {
    fieldAccess = fieldAccess.withNestedField(entry.getKey(), entry.getValue());
  }
  return fieldAccess.resolve(inputSchema);
}