Java Code Examples for org.apache.beam.sdk.schemas.Schema#getFieldCount()

The following examples show how to use org.apache.beam.sdk.schemas.Schema#getFieldCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BeamTableUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Decode zero or more CSV records from the given string, according to the specified {@link
 * CSVFormat}, and converts them to {@link Row Rows} with the specified {@link Schema}.
 *
 * <p>A single "line" read from e.g. {@link TextIO} can have zero or more records, depending on
 * whether the line was split on the same characters that delimite CSV records, and whether the
 * {@link CSVFormat} ignores blank lines.
 */
public static Iterable<Row> csvLines2BeamRows(CSVFormat csvFormat, String line, Schema schema) {
  // Empty lines can result in empty strings after Beam splits the file,
  // which are not empty records to CSVParser unless they have a record terminator.
  if (!line.endsWith(csvFormat.getRecordSeparator())) {
    line += csvFormat.getRecordSeparator();
  }
  try (CSVParser parser = CSVParser.parse(line, csvFormat)) {
    List<Row> rows = new ArrayList<>();
    for (CSVRecord rawRecord : parser.getRecords()) {
      if (rawRecord.size() != schema.getFieldCount()) {
        throw new IllegalArgumentException(
            String.format(
                "Expect %d fields, but actually %d", schema.getFieldCount(), rawRecord.size()));
      }
      rows.add(
          IntStream.range(0, schema.getFieldCount())
              .mapToObj(idx -> autoCastField(schema.getField(idx), rawRecord.get(idx)))
              .collect(toRow(schema)));
    }
    return rows;
  } catch (IOException e) {
    throw new IllegalArgumentException(
        String.format("Could not parse CSV records from %s with format %s", line, csvFormat), e);
  }
}
 
Example 2
Source File: CoGroupTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static Void containsJoinedFields(List<Row> expected, Iterable<Row> actual) {
  List<Matcher<? super Row>> matchers = Lists.newArrayList();
  for (Row row : expected) {
    List<Matcher> fieldMatchers = Lists.newArrayList();
    Schema schema = row.getSchema();
    fieldMatchers.add(
        new RowFieldMatcherIterableFieldAnyOrder(row.getSchema(), 0, row.getRow(0)));
    for (int i = 1; i < schema.getFieldCount(); ++i) {
      assertEquals(TypeName.ITERABLE, schema.getField(i).getType().getTypeName());
      fieldMatchers.add(
          new RowFieldMatcherIterableFieldAnyOrder(row.getSchema(), i, row.getIterable(i)));
    }
    matchers.add(allOf(fieldMatchers.toArray(new Matcher[0])));
  }
  assertThat(actual, containsInAnyOrder(matchers.toArray(new Matcher[0])));
  return null;
}
 
Example 3
Source File: RowUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Row processRow(
    RowPosition rowPosition, Schema schema, Row value, RowFieldMatcher matcher) {
  FieldOverride override = override(rowPosition);
  Row retValue = value;
  if (override != null) {
    retValue = (Row) override.getOverrideValue();
  } else if (fieldOverrides.hasOverrideBelow(rowPosition.descriptor)) {
    List<Object> values = Lists.newArrayListWithCapacity(schema.getFieldCount());
    for (int i = 0; i < schema.getFieldCount(); ++i) {
      FieldAccessDescriptor nestedDescriptor =
          FieldAccessDescriptor.withFieldIds(rowPosition.descriptor, i).resolve(topSchema);
      Object fieldValue = (value != null) ? value.getValue(i) : null;
      values.add(
          matcher.match(
              this,
              schema.getField(i).getType(),
              new RowPosition(nestedDescriptor),
              fieldValue));
    }
    retValue = new RowWithStorage(schema, values);
  }
  return retValue;
}
 
Example 4
Source File: RowJson.java    From beam with Apache License 2.0 6 votes vote down vote up
private void writeRow(Row row, Schema schema, JsonGenerator gen) throws IOException {
  gen.writeStartObject();
  for (int i = 0; i < schema.getFieldCount(); ++i) {
    Field field = schema.getField(i);
    Object value = row.getValue(i);
    if (dropNullsOnWrite && value == null && field.getType().getNullable()) {
      continue;
    }
    gen.writeFieldName(field.getName());
    if (field.getType().getNullable() && value == null) {
      gen.writeNull();
      continue;
    }
    writeValue(gen, field.getType(), value);
  }
  gen.writeEndObject();
}
 
Example 5
Source File: Cast.java    From beam with Apache License 2.0 6 votes vote down vote up
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) {
  if (input == null) {
    return null;
  }

  Row.Builder output = Row.withSchema(outputSchema);
  for (int i = 0; i < outputSchema.getFieldCount(); i++) {
    Schema.Field outputField = outputSchema.getField(i);

    int fromFieldIdx = inputSchema.indexOf(outputField.getName());
    Schema.Field inputField = inputSchema.getField(fromFieldIdx);

    Object inputValue = input.getValue(fromFieldIdx);
    Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType());

    output.addValue(outputValue);
  }

  return output.build();
}
 
Example 6
Source File: Convert.java    From beam with Apache License 2.0 5 votes vote down vote up
@Nullable
private static Schema getBoxedNestedSchema(Schema schema) {
  if (schema.getFieldCount() != 1) {
    return null;
  }
  FieldType fieldType = schema.getField(0).getType();
  if (!fieldType.getTypeName().isCompositeType()) {
    return null;
  }
  return fieldType.getRowSchema();
}
 
Example 7
Source File: BigQueryUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
private static List<TableFieldSchema> toTableFieldSchema(Schema schema) {
  List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount());
  for (Field schemaField : schema.getFields()) {
    FieldType type = schemaField.getType();

    TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName());
    if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) {
      field.setDescription(schemaField.getDescription());
    }

    if (!schemaField.getType().getNullable()) {
      field.setMode(Mode.REQUIRED.toString());
    }
    if (type.getTypeName().isCollectionType()) {
      type = type.getCollectionElementType();
      if (type.getTypeName().isCollectionType() || type.getTypeName().isMapType()) {
        throw new IllegalArgumentException("Array of collection is not supported in BigQuery.");
      }
      field.setMode(Mode.REPEATED.toString());
    }
    if (TypeName.ROW == type.getTypeName()) {
      Schema subType = type.getRowSchema();
      field.setFields(toTableFieldSchema(subType));
    }
    if (TypeName.MAP == type.getTypeName()) {
      throw new IllegalArgumentException("Maps are not supported in BigQuery.");
    }
    field.setType(toStandardSQLTypeName(type).toString());

    fields.add(field);
  }
  return fields;
}
 
Example 8
Source File: AvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred
 * from the Beam schema on the row.
 */
public static GenericRecord toGenericRecord(
    Row row, @Nullable org.apache.avro.Schema avroSchema) {
  Schema beamSchema = row.getSchema();
  // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row
  // schema.
  if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) {
    throw new IllegalArgumentException(
        "AVRO schema doesn't match row schema. Row schema "
            + beamSchema
            + ". AVRO schema + "
            + avroSchema);
  }
  if (avroSchema == null) {
    avroSchema = toAvroSchema(beamSchema);
  }

  GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema);
  for (int i = 0; i < beamSchema.getFieldCount(); ++i) {
    Schema.Field field = beamSchema.getField(i);
    builder.set(
        field.getName(),
        genericFromBeamField(
            field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i)));
  }
  return builder.build();
}
 
Example 9
Source File: DoFnSchemaInformation.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Specified a parameter that is a selection from an input schema (specified using FieldAccess).
 * This method is called when the input parameter is a Java type that does not itself have a
 * schema, e.g. long, or String. In this case we expect the selection predicate to return a
 * single-field row with a field of the output type.
 *
 * @param inputCoder The coder for the ParDo's input elements.
 * @param selectDescriptor The descriptor describing which field to select.
 * @param selectOutputSchema The schema of the selected parameter.
 * @param elementT The type of the method's input parameter.
 * @return
 */
DoFnSchemaInformation withUnboxPrimitiveParameter(
    SchemaCoder inputCoder,
    FieldAccessDescriptor selectDescriptor,
    Schema selectOutputSchema,
    TypeDescriptor<?> elementT) {
  if (selectOutputSchema.getFieldCount() != 1) {
    throw new RuntimeException("Parameter has no schema and the input is not a simple type.");
  }
  FieldType fieldType = selectOutputSchema.getField(0).getType();
  if (fieldType.getTypeName().isCompositeType()) {
    throw new RuntimeException("Parameter has no schema and the input is not a primitive type.");
  }

  List<SerializableFunction<?, ?>> converters =
      ImmutableList.<SerializableFunction<?, ?>>builder()
          .addAll(getElementConverters())
          .add(
              UnboxingConversionFunction.of(
                  inputCoder.getSchema(),
                  inputCoder.getToRowFunction(),
                  selectDescriptor,
                  selectOutputSchema,
                  elementT))
          .build();

  return toBuilder().setElementConverters(converters).build();
}
 
Example 10
Source File: ZetaSqlBeamTranslationUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
public static Row zetaSqlStructValueToBeamRow(
    Value structValue, Schema schema, boolean verifyValues) {
  List<Object> objects = new ArrayList<>(schema.getFieldCount());
  List<Value> values = structValue.getFieldList();
  for (int i = 0; i < values.size(); i++) {
    objects.add(
        zetaSqlValueToJavaObject(values.get(i), schema.getField(i).getType(), verifyValues));
  }
  Row row =
      verifyValues
          ? Row.withSchema(schema).addValues(objects).build()
          : Row.withSchema(schema).attachValues(objects);
  return row;
}
 
Example 11
Source File: TextTableProvider.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public BeamSqlTable buildBeamSqlTable(Table table) {
  Schema schema = table.getSchema();

  String filePattern = table.getLocation();
  JSONObject properties = table.getProperties();
  String format = MoreObjects.firstNonNull(properties.getString("format"), "csv");
  String deadLetterFile = properties.getString("deadLetterFile");

  // Backwards compatibility: previously "type": "text" meant CSV and "format" was where the
  // CSV format went. So assume that any other format is the CSV format.
  @Nullable String legacyCsvFormat = null;
  if (!ImmutableSet.of("csv", "lines", "json").contains(format)) {
    legacyCsvFormat = format;
    format = "csv";
  }

  switch (format) {
    case "csv":
      String specifiedCsvFormat = properties.getString("csvformat");
      CSVFormat csvFormat =
          specifiedCsvFormat != null
              ? CSVFormat.valueOf(specifiedCsvFormat)
              : (legacyCsvFormat != null
                  ? CSVFormat.valueOf(legacyCsvFormat)
                  : CSVFormat.DEFAULT);
      return new TextTable(
          schema, filePattern, new CsvToRow(schema, csvFormat), new RowToCsv(csvFormat));
    case "json":
      return new TextJsonTable(
          schema, filePattern, JsonToRow.create(schema, deadLetterFile), RowToJson.create());
    case "lines":
      if (!(schema.getFieldCount() == 1
          && schema.getField(0).getType().getTypeName().equals(TypeName.STRING))) {
        throw new InvalidTableException(
            "Table with type 'text' and format 'lines' "
                + "must have exactly one STRING/VARCHAR/CHAR column ");
      }
      return new TextTable(
          schema, filePattern, new LinesReadConverter(), new LinesWriteConverter());
    default:
      throw new InvalidTableException(
          "Table with type 'text' must have format 'csv' or 'lines' or 'json'");
  }
}
 
Example 12
Source File: AddFields.java    From beam with Apache License 2.0 4 votes vote down vote up
private static AddFieldsInformation getAddFieldsInformation(
    Schema inputSchema, Collection<NewField> fieldsToAdd) {
  List<NewField> newTopLevelFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  List<NewField> newNestedFields =
      fieldsToAdd.stream()
          .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty())
          .collect(Collectors.toList());
  // Group all nested fields together by the field at the current level. For example, if adding
  // a.b, a.c, a.d
  // this map will contain a -> {a.b, a.c, a.d}.
  Multimap<String, NewField> newNestedFieldsMap =
      Multimaps.index(newNestedFields, NewField::getName);

  Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap();
  Schema.Builder builder = Schema.builder();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    Schema.Field field = inputSchema.getField(i);
    Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName());

    // If this field is a nested field and new subfields are added further down the tree, add
    // those subfields before
    // adding to the current schema. Otherwise we just add this field as is to the new schema.
    if (!nestedFields.isEmpty()) {
      nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList());

      AddFieldsInformation nestedInformation =
          getAddFieldsInformation(field.getType(), nestedFields);
      field = field.withType(nestedInformation.getOutputFieldType());
      resolvedNestedNewValues.put(i, nestedInformation);
    }
    builder.addField(field);
  }

  // Add any new fields at this level.
  List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size());
  for (NewField newField : newTopLevelFields) {
    builder.addField(newField.getName(), newField.getFieldType());
    newValuesThisLevel.add(newField.getDefaultValue());
  }

  // If there are any nested field additions left that are not already processed, that means
  // that the root of the
  // nested field doesn't exist in the schema. In this case we'll walk down the new nested
  // fields and recursively create each nested level as necessary.
  for (Map.Entry<String, Collection<NewField>> newNested :
      newNestedFieldsMap.asMap().entrySet()) {
    String fieldName = newNested.getKey();

    // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x)
    FieldAccessDescriptor.FieldDescriptor fieldDescriptor =
        Iterables.getOnlyElement(
            newNested.getValue().stream()
                .map(NewField::getFieldDescriptor)
                .distinct()
                .collect(Collectors.toList()));
    FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true);
    for (Qualifier qualifier : fieldDescriptor.getQualifiers()) {
      // The problem with adding recursive map fields is that we don't know what the map key
      // type should be.
      // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in
      // the map value.
      // Since in this code path the mapField field does not already exist this means we need to
      // create the new
      // map field, and we have no way of knowing what type the key should be.
      // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or
      // extend our selector
      // syntax to allow specifying key types.
      checkArgument(
          !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here");
      fieldType = FieldType.array(fieldType).withNullable(true);
    }
    if (!inputSchema.hasField(fieldName)) {
      // This is a brand-new nested field with no matching field in the input schema. We will
      // recursively create a nested schema to match it.
      Collection<NewField> nestedNewFields =
          newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList());
      AddFieldsInformation addFieldsInformation =
          getAddFieldsInformation(fieldType, nestedNewFields);
      builder.addField(fieldName, addFieldsInformation.getOutputFieldType());
      resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation);
    }
  }
  Schema schema = builder.build();

  List<AddFieldsInformation> nestedNewValueList =
      new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null));
  for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) {
    nestedNewValueList.set(entry.getKey(), entry.getValue());
  }
  return AddFieldsInformation.of(
      Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList);
}
 
Example 13
Source File: DropFields.java    From beam with Apache License 2.0 4 votes vote down vote up
FieldAccessDescriptor complement(Schema inputSchema, FieldAccessDescriptor input) {
  // Create a FieldAccessDescriptor that select all fields _not_ selected in the input
  // descriptor. Maintain
  // the original order of the schema.
  Set<String> fieldNamesToSelect = Sets.newHashSet();
  Map<FieldAccessDescriptor.FieldDescriptor, FieldAccessDescriptor> nestedFieldsToSelect =
      Maps.newHashMap();
  for (int i = 0; i < inputSchema.getFieldCount(); ++i) {
    if (input.fieldIdsAccessed().contains(i)) {
      // This field is selected, so exclude it from the complement.
      continue;
    }
    Field field = inputSchema.getField(i);
    Map<Integer, FieldAccessDescriptor.FieldDescriptor> nestedFields =
        input.getNestedFieldsAccessed().keySet().stream()
            .collect(Collectors.toMap(k -> k.getFieldId(), k -> k));

    FieldAccessDescriptor.FieldDescriptor fieldDescriptor = nestedFields.get(i);
    if (fieldDescriptor != null) {
      // Some subfields are selected, so recursively calculate the complementary subfields to
      // select.
      FieldType fieldType = inputSchema.getField(i).getType();
      for (FieldAccessDescriptor.FieldDescriptor.Qualifier qualifier :
          fieldDescriptor.getQualifiers()) {
        switch (qualifier.getKind()) {
          case LIST:
            fieldType = fieldType.getCollectionElementType();
            break;
          case MAP:
            fieldType = fieldType.getMapValueType();
            break;
          default:
            throw new RuntimeException("Unexpected field descriptor type.");
        }
      }
      checkArgument(fieldType.getTypeName().isCompositeType());
      FieldAccessDescriptor nestedDescriptor =
          input.getNestedFieldsAccessed().get(fieldDescriptor);
      nestedFieldsToSelect.put(
          fieldDescriptor, complement(fieldType.getRowSchema(), nestedDescriptor));
    } else {
      // Neither the field nor the subfield is selected. This means we should select it.
      fieldNamesToSelect.add(field.getName());
    }
  }

  FieldAccessDescriptor fieldAccess = FieldAccessDescriptor.withFieldNames(fieldNamesToSelect);
  for (Map.Entry<FieldAccessDescriptor.FieldDescriptor, FieldAccessDescriptor> entry :
      nestedFieldsToSelect.entrySet()) {
    fieldAccess = fieldAccess.withNestedField(entry.getKey(), entry.getValue());
  }
  return fieldAccess.resolve(inputSchema);
}
 
Example 14
Source File: ConvertHelpers.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Get the coder used for converting from an inputSchema to a given type. */
public static <T> ConvertedSchemaInformation<T> getConvertedSchemaInformation(
    Schema inputSchema, TypeDescriptor<T> outputType, SchemaRegistry schemaRegistry) {
  ConvertedSchemaInformation<T> convertedSchema = null;
  boolean toRow = outputType.equals(TypeDescriptor.of(Row.class));
  if (toRow) {
    // If the output is of type Row, then just forward the schema of the input type to the
    // output.
    convertedSchema =
        new ConvertedSchemaInformation<>((SchemaCoder<T>) SchemaCoder.of(inputSchema), null);
  } else {
    // Otherwise, try to find a schema for the output type in the schema registry.
    Schema outputSchema = null;
    SchemaCoder<T> outputSchemaCoder = null;
    try {
      outputSchema = schemaRegistry.getSchema(outputType);
      outputSchemaCoder =
          SchemaCoder.of(
              outputSchema,
              outputType,
              schemaRegistry.getToRowFunction(outputType),
              schemaRegistry.getFromRowFunction(outputType));
    } catch (NoSuchSchemaException e) {

    }
    FieldType unboxedType = null;
    // TODO: Properly handle nullable.
    if (outputSchema == null || !outputSchema.assignableToIgnoreNullable(inputSchema)) {
      // The schema is not convertible directly. Attempt to unbox it and see if the schema matches
      // then.
      Schema checkedSchema = inputSchema;
      if (inputSchema.getFieldCount() == 1) {
        unboxedType = inputSchema.getField(0).getType();
        if (unboxedType.getTypeName().isCompositeType()
            && !outputSchema.assignableToIgnoreNullable(unboxedType.getRowSchema())) {
          checkedSchema = unboxedType.getRowSchema();
        } else {
          checkedSchema = null;
        }
      }
      if (checkedSchema != null) {
        throw new RuntimeException(
            "Cannot convert between types that don't have equivalent schemas."
                + " input schema: "
                + checkedSchema
                + " output schema: "
                + outputSchema);
      }
    }
    convertedSchema = new ConvertedSchemaInformation<T>(outputSchemaCoder, unboxedType);
  }
  return convertedSchema;
}
 
Example 15
Source File: RowCoderGenerator.java    From beam with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static Coder<Row> generate(Schema schema) {
  // Using ConcurrentHashMap::computeIfAbsent here would deadlock in case of nested
  // coders. Using HashMap::computeIfAbsent generates ConcurrentModificationExceptions in Java 11.
  Coder<Row> rowCoder = GENERATED_CODERS.get(schema.getUUID());
  if (rowCoder == null) {
    TypeDescription.Generic coderType =
        TypeDescription.Generic.Builder.parameterizedType(Coder.class, Row.class).build();
    DynamicType.Builder<Coder> builder =
        (DynamicType.Builder<Coder>) BYTE_BUDDY.subclass(coderType);
    builder = implementMethods(schema, builder);

    Coder[] componentCoders = new Coder[schema.getFieldCount()];
    for (int i = 0; i < schema.getFieldCount(); ++i) {
      // We use withNullable(false) as nulls are handled by the RowCoder and the individual
      // component coders therefore do not need to handle nulls.
      componentCoders[i] =
          SchemaCoder.coderForFieldType(schema.getField(i).getType().withNullable(false));
    }

    builder =
        builder.defineField(
            CODERS_FIELD_NAME, Coder[].class, Visibility.PRIVATE, FieldManifestation.FINAL);

    builder =
        builder
            .defineConstructor(Modifier.PUBLIC)
            .withParameters(Coder[].class)
            .intercept(new GeneratedCoderConstructor());

    try {
      rowCoder =
          builder
              .make()
              .load(Coder.class.getClassLoader(), ClassLoadingStrategy.Default.INJECTION)
              .getLoaded()
              .getDeclaredConstructor(Coder[].class)
              .newInstance((Object) componentCoders);
    } catch (InstantiationException
        | IllegalAccessException
        | NoSuchMethodException
        | InvocationTargetException e) {
      throw new RuntimeException("Unable to generate coder for schema " + schema, e);
    }
    GENERATED_CODERS.put(schema.getUUID(), rowCoder);
  }
  return rowCoder;
}
 
Example 16
Source File: AvroByteBuddyUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <T> SchemaUserTypeCreator createCreator(Class<T> clazz, Schema schema) {
  Constructor baseConstructor = null;
  Constructor[] constructors = clazz.getDeclaredConstructors();
  for (Constructor constructor : constructors) {
    // TODO: This assumes that Avro only generates one constructor with this many fields.
    if (constructor.getParameterCount() == schema.getFieldCount()) {
      baseConstructor = constructor;
    }
  }
  if (baseConstructor == null) {
    throw new RuntimeException("No matching constructor found for class " + clazz);
  }

  // Generate a method call to create and invoke the SpecificRecord's constructor. .
  MethodCall construct = MethodCall.construct(baseConstructor);
  for (int i = 0; i < baseConstructor.getParameterTypes().length; ++i) {
    Class<?> baseType = baseConstructor.getParameterTypes()[i];
    construct = construct.with(readAndConvertParameter(baseType, i), baseType);
  }

  try {
    DynamicType.Builder<SchemaUserTypeCreator> builder =
        BYTE_BUDDY
            .with(new InjectPackageStrategy(clazz))
            .subclass(SchemaUserTypeCreator.class)
            .method(ElementMatchers.named("create"))
            .intercept(construct);

    return builder
        .visit(new AsmVisitorWrapper.ForDeclaredMethods().writerFlags(ClassWriter.COMPUTE_FRAMES))
        .make()
        .load(
            ReflectHelpers.findClassLoader(clazz.getClassLoader()),
            ClassLoadingStrategy.Default.INJECTION)
        .getLoaded()
        .getDeclaredConstructor()
        .newInstance();
  } catch (InstantiationException
      | IllegalAccessException
      | NoSuchMethodException
      | InvocationTargetException e) {
    throw new RuntimeException(
        "Unable to generate a getter for class " + clazz + " with schema " + schema);
  }
}
 
Example 17
Source File: JdbcIO.java    From beam with Apache License 2.0 4 votes vote down vote up
private List<SchemaUtil.FieldWithIndex> getFilteredFields(Schema schema) {
  Schema tableSchema;

  try (Connection connection = inner.getDataSourceProviderFn().apply(null).getConnection();
      PreparedStatement statement =
          connection.prepareStatement((String.format("SELECT * FROM %s", inner.getTable())))) {
    tableSchema = SchemaUtil.toBeamSchema(statement.getMetaData());
    statement.close();
  } catch (SQLException e) {
    throw new RuntimeException(
        "Error while determining columns from table: " + inner.getTable(), e);
  }

  if (tableSchema.getFieldCount() < schema.getFieldCount()) {
    throw new RuntimeException("Input schema has more fields than actual table.");
  }

  // filter out missing fields from output table
  List<Schema.Field> missingFields =
      tableSchema.getFields().stream()
          .filter(
              line ->
                  schema.getFields().stream()
                      .noneMatch(s -> s.getName().equalsIgnoreCase(line.getName())))
          .collect(Collectors.toList());

  // allow insert only if missing fields are nullable
  if (checkNullabilityForFields(missingFields)) {
    throw new RuntimeException("Non nullable fields are not allowed without schema.");
  }

  List<SchemaUtil.FieldWithIndex> tableFilteredFields =
      tableSchema.getFields().stream()
          .map(
              (tableField) -> {
                Optional<Schema.Field> optionalSchemaField =
                    schema.getFields().stream()
                        .filter((f) -> SchemaUtil.compareSchemaField(tableField, f))
                        .findFirst();
                return (optionalSchemaField.isPresent())
                    ? SchemaUtil.FieldWithIndex.of(
                        tableField, schema.getFields().indexOf(optionalSchemaField.get()))
                    : null;
              })
          .filter(Objects::nonNull)
          .collect(Collectors.toList());

  if (tableFilteredFields.size() != schema.getFieldCount()) {
    throw new RuntimeException("Provided schema doesn't match with database schema.");
  }

  return tableFilteredFields;
}