Java Code Examples for org.apache.kafka.connect.data.Schema#fields()

The following examples show how to use org.apache.kafka.connect.data.Schema#fields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroUtil.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 6 votes vote down vote up
private AbstractStreamCreateStatement addAvroFields(
    final AbstractStreamCreateStatement abstractStreamCreateStatement,
    final Schema schema,
    int schemaId
) {
  List<TableElement> elements = new ArrayList<>();
  for (Field field : schema.fields()) {
    TableElement tableElement = new TableElement(field.name().toUpperCase(), SchemaUtil
        .getSqlTypeName(field.schema()));
    elements.add(tableElement);
  }
  StringLiteral schemaIdLiteral = new StringLiteral(String.format("%d", schemaId));
  Map<String, Expression> properties =
      new HashMap<>(abstractStreamCreateStatement.getProperties());
  if (!abstractStreamCreateStatement.getProperties().containsKey(KsqlConstants.AVRO_SCHEMA_ID)) {
    properties.put(KsqlConstants.AVRO_SCHEMA_ID, schemaIdLiteral);
  }

  return abstractStreamCreateStatement.copyWith(elements, properties);
}
 
Example 2
Source File: BaseDocumentationTest.java    From connect-utils with Apache License 2.0 6 votes vote down vote up
Plugin.SchemaInput buildSchemaInput(Schema schema, String fieldName) {
  ImmutableSchemaInput.Builder schemaInput = ImmutableSchemaInput.builder()
      .name(schema.name())
      .doc(schema.doc())
      .type(schema.type())
      .fieldName(fieldName)
      .isOptional(schema.isOptional());

  if (Schema.Type.STRUCT == schema.type()) {
    for (Field field : schema.fields()) {
      Plugin.SchemaInput fieldSchema = buildSchemaInput(field.schema(), field.name());
      schemaInput.addFields(fieldSchema);
    }
  } else if (Schema.Type.MAP == schema.type()) {
    schemaInput.key(buildSchemaInput(schema.keySchema()));
    schemaInput.value(buildSchemaInput(schema.valueSchema()));
  } else if (Schema.Type.ARRAY == schema.type()) {
    schemaInput.value(buildSchemaInput(schema.valueSchema()));
  }

  return schemaInput.build();
}
 
Example 3
Source File: SpoolDirCsvSourceTaskTest.java    From kafka-connect-spooldir with Apache License 2.0 6 votes vote down vote up
void writeCSV(File outputFile, Schema schema, List<Struct> structs) throws IOException {
  try (Writer writer = new FileWriter(outputFile)) {
    try (ICSVWriter csvWriter = new CSVWriterBuilder(writer)
        .build()) {
      String[] header = schema.fields().stream().map(Field::name).toArray(String[]::new);
      csvWriter.writeNext(header);

      for (Struct struct : structs) {
        List<String> values = new ArrayList<>();
        for (Field field : schema.fields()) {
          values.add(struct.get(field).toString());
        }
        csvWriter.writeNext(values.toArray(new String[0]));
      }

      csvWriter.flush();
    }
  }
}
 
Example 4
Source File: AvroEventParser.java    From kafka-connect-hbase with Apache License 2.0 6 votes vote down vote up
/**
 * parses the value.
 * @param schema
 * @param value
 * @return
 */
private Map<String, byte[]> parse(final Schema schema, final Object value) {
    final Map<String, byte[]> values = new LinkedHashMap<>();
    try {
        Object data = avroData.fromConnectData(schema, value);
        if (data == null || !(data instanceof GenericRecord)) {
            return EMPTY_MAP;
        }
        final GenericRecord record = (GenericRecord) data;
        final List<Field> fields = schema.fields();
        for (Field field : fields) {
            final byte[] fieldValue = toValue(record, field);
            if (fieldValue == null) {
                continue;
            }
            values.put(field.name(), fieldValue);
        }
        return values;
    } catch (Exception ex) {
        final String errorMsg = String.format("Failed to parse the schema [%s] , value [%s] with ex [%s]" ,
           schema, value, ex.getMessage());
        throw new EventParsingException(errorMsg, ex);
    }
}
 
Example 5
Source File: AggregateNode.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
private Schema buildAggregateSchema(
    final Schema schema,
    final FunctionRegistry functionRegistry,
    final InternalSchema internalSchema
) {
  final SchemaBuilder schemaBuilder = SchemaBuilder.struct();
  final List<Field> fields = schema.fields();
  for (int i = 0; i < getRequiredColumnList().size(); i++) {
    schemaBuilder.field(fields.get(i).name(), fields.get(i).schema());
  }
  for (int aggFunctionVarSuffix = 0;
       aggFunctionVarSuffix < getFunctionList().size(); aggFunctionVarSuffix++) {
    String udafName = getFunctionList().get(aggFunctionVarSuffix).getName()
        .getSuffix();
    KsqlAggregateFunction aggregateFunction = functionRegistry.getAggregateFunction(
        udafName,
        internalSchema.getInternalExpressionList(
            getFunctionList().get(aggFunctionVarSuffix).getArguments()),
        schema
    );
    schemaBuilder.field(
        AggregateExpressionRewriter.AGGREGATE_FUNCTION_VARIABLE_PREFIX
        + aggFunctionVarSuffix,
        aggregateFunction.getReturnType()
    );
  }

  return schemaBuilder.build();
}
 
Example 6
Source File: HiveSchemaConverter.java    From streamx with Apache License 2.0 5 votes vote down vote up
public static TypeInfo convertStruct(Schema schema) {
  final List<Field> fields = schema.fields();
  final List<String> names = new ArrayList<>(fields.size());
  final List<TypeInfo> types = new ArrayList<>(fields.size());
  for (Field field : fields) {
    names.add(field.name());
    types.add(convert(field.schema()));
  }
  return TypeInfoFactory.getStructTypeInfo(names, types);
}
 
Example 7
Source File: JsonEventParser.java    From kafka-connect-hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Parses the value.
 * @param topic
 * @param schema
 * @param value
 * @return
 * @throws EventParsingException
 */
public Map<String, byte[]> parse(final String topic, final Schema schema, final Object value, final boolean isKey)
    throws EventParsingException {
    final Map<String, byte[]> values = new LinkedHashMap<>();
    try {
        byte[] valueBytes = null;
        if(isKey) {
            valueBytes = keyConverter.fromConnectData(topic, schema, value);
        } else {
            valueBytes = valueConverter.fromConnectData(topic, schema, value);
        }
        if(valueBytes == null || valueBytes.length == 0) {
            return Collections.emptyMap();
        }

        final JsonNode valueNode = JSON_READER.readValue(valueBytes);
        final Map<String, Object> keyValues = OBJECT_MAPPER.convertValue(valueNode,
          new TypeReference<Map<String, Object>>() {});

        final List<Field> fields = schema.fields();
        for(Field field : fields) {
            final byte[] fieldValue = toValue(keyValues, field);
            if(fieldValue == null) {
                continue;
            }
            values.put(field.name(), fieldValue);
        }
        return values;
    } catch (Exception ex) {
        final String errorMsg = String.format("Failed to parse the schema [%s] , value [%s] with ex [%s]" ,
           schema, value, ex.getMessage());
        throw new EventParsingException(errorMsg, ex);
    }
}
 
Example 8
Source File: RowData.java    From debezium-incubator with Apache License 2.0 5 votes vote down vote up
@Override
public Struct record(Schema schema) {
    Struct struct = new Struct(schema);
    for (Field field : schema.fields()) {
        Schema cellSchema = getFieldSchema(field.name(), schema);
        CellData cellData = cellMap.get(field.name());
        // only add the cell if it is not null
        if (cellData != null) {
            struct.put(field.name(), cellData.record(cellSchema));
        }
    }
    return struct;
}
 
Example 9
Source File: CellData.java    From debezium-incubator with Apache License 2.0 5 votes vote down vote up
private Struct cloneValue(Schema valueSchema, Struct value) {
    Struct clonedValue = new Struct(valueSchema);
    for (Field field : valueSchema.fields()) {
        String fieldName = field.name();
        clonedValue.put(fieldName, value.get(fieldName));
    }
    return clonedValue;
}
 
Example 10
Source File: SchemaUtil.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
/**
 * Remove the alias when reading/writing from outside
 */
public static Schema getSchemaWithNoAlias(Schema schema) {
  SchemaBuilder schemaBuilder = SchemaBuilder.struct();
  for (Field field : schema.fields()) {
    String name = getFieldNameWithNoAlias(field);
    schemaBuilder.field(name, field.schema());
  }
  return schemaBuilder.build();
}
 
Example 11
Source File: ParquetHiveUtilTest.java    From streamx with Apache License 2.0 5 votes vote down vote up
@Test
public void testAlterSchema() throws Exception {
  prepareData(TOPIC, PARTITION);
  Partitioner partitioner = HiveTestUtils.getPartitioner();
  Schema schema = createSchema();
  hive.createTable(hiveDatabase, TOPIC, schema, partitioner);

  String location = "partition=" + String.valueOf(PARTITION);
  hiveMetaStore.addPartition(hiveDatabase, TOPIC, location);

  List<String> expectedColumnNames = new ArrayList<>();
  for (Field field: schema.fields()) {
    expectedColumnNames.add(field.name());
  }

  Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
  List<String> actualColumnNames = new ArrayList<>();
  for (FieldSchema column: table.getSd().getCols()) {
    actualColumnNames.add(column.getName());
  }

  assertEquals(expectedColumnNames, actualColumnNames);

  Schema newSchema = createNewSchema();

  hive.alterSchema(hiveDatabase, TOPIC, newSchema);

  String[] expectedResult = {"true", "12", "12", "12.2", "12.2", "NULL", "12"};
  String result = HiveTestUtils.runHive(hiveExec, "SELECT * from " + TOPIC);
  String[] rows = result.split("\n");
  // Only 6 of the 7 records should have been delivered due to flush_size = 3
  assertEquals(6, rows.length);
  for (String row: rows) {
    String[] parts = HiveTestUtils.parseOutput(row);
    for (int j = 0; j < expectedResult.length; ++j) {
      assertEquals(expectedResult[j], parts[j]);
    }
  }
}
 
Example 12
Source File: SchemaUtil.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
public static Schema buildSchemaWithAlias(final Schema schema, final String alias) {
  SchemaBuilder newSchema = SchemaBuilder.struct().name(schema.name());
  for (Field field : schema.fields()) {
    newSchema.field((alias + "." + field.name()), field.schema());
  }
  return newSchema;
}
 
Example 13
Source File: HiveSchemaConverter.java    From streamx with Apache License 2.0 5 votes vote down vote up
public static List<FieldSchema> convertSchema(Schema schema) {
  List<FieldSchema> columns = new ArrayList<>();
  if (Schema.Type.STRUCT.equals(schema.type())) {
    for (Field field: schema.fields()) {
      columns.add(new FieldSchema(
          field.name(), convert(field.schema()).getTypeName(), field.schema().doc()));
    }
  }
  return columns;
}
 
Example 14
Source File: SchemaUtil.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 5 votes vote down vote up
public static Optional<Field> getFieldByName(final Schema schema, final String fieldName) {
  if (schema.fields() != null) {
    for (Field field : schema.fields()) {
      if (field.name().equals(fieldName)) {
        return Optional.of(field);
      } else if (field.name().equals(fieldName.substring(fieldName.indexOf(".") + 1))) {
        return Optional.of(field);
      }
    }
  }
  return Optional.empty();
}
 
Example 15
Source File: AvroData.java    From apicurio-registry with Apache License 2.0 5 votes vote down vote up
public static List<Field> fields(Schema schema) {
    Schema.Type type = schema.type();
    if (Schema.Type.STRUCT.equals(type)) {
        return schema.fields();
    } else {
        return null;
    }
}
 
Example 16
Source File: HiveIntegrationAvroTest.java    From streamx with Apache License 2.0 4 votes vote down vote up
@Test
public void testHiveIntegrationAvro() throws Exception {
  Map<String, String> props = createProps();
  props.put(HdfsSinkConnectorConfig.HIVE_INTEGRATION_CONFIG, "true");
  HdfsSinkConnectorConfig config = new HdfsSinkConnectorConfig(props);

  DataWriter hdfsWriter = new DataWriter(config, context, avroData);
  hdfsWriter.recover(TOPIC_PARTITION);

  String key = "key";
  Schema schema = createSchema();
  Struct record = createRecord(schema);

  Collection<SinkRecord> sinkRecords = new ArrayList<>();
  for (long offset = 0; offset < 7; offset++) {
    SinkRecord sinkRecord =
        new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schema, record, offset);

    sinkRecords.add(sinkRecord);
  }

  hdfsWriter.write(sinkRecords);
  hdfsWriter.close(assignment);
  hdfsWriter.stop();

  Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
  List<String> expectedColumnNames = new ArrayList<>();
  for (Field field: schema.fields()) {
    expectedColumnNames.add(field.name());
  }

  List<String> actualColumnNames = new ArrayList<>();
  for (FieldSchema column: table.getSd().getCols()) {
    actualColumnNames.add(column.getName());
  }
  assertEquals(expectedColumnNames, actualColumnNames);

  List<String> expectedPartitions = new ArrayList<>();
  String directory = TOPIC + "/" + "partition=" + String.valueOf(PARTITION);
  expectedPartitions.add(FileUtils.directoryName(url, topicsDir, directory));

  List<String> partitions = hiveMetaStore.listPartitions(hiveDatabase, TOPIC, (short)-1);

  assertEquals(expectedPartitions, partitions);
}
 
Example 17
Source File: AbstractConverter.java    From connect-utils with Apache License 2.0 4 votes vote down vote up
void convertStruct(final T result, Struct struct) {
  final Schema schema = struct.schema();

  for (final Field field : schema.fields()) {
    final String fieldName = field.name();
    log.trace("convertStruct() - Processing '{}'", field.name());
    final Object fieldValue = struct.get(field);

    try {
      if (null == fieldValue) {
        log.trace("convertStruct() - Setting '{}' to null.", fieldName);
        setNullField(result, fieldName);
        continue;
      }

      log.trace("convertStruct() - Field '{}'.field().schema().type() = '{}'", fieldName, field.schema().type());
      switch (field.schema().type()) {
        case STRING:
          log.trace("convertStruct() - Processing '{}' as string.", fieldName);
          setStringField(result, fieldName, (String) fieldValue);
          break;
        case INT8:
          log.trace("convertStruct() - Processing '{}' as int8.", fieldName);
          setInt8Field(result, fieldName, (Byte) fieldValue);
          break;
        case INT16:
          log.trace("convertStruct() - Processing '{}' as int16.", fieldName);
          setInt16Field(result, fieldName, (Short) fieldValue);
          break;
        case INT32:
          if (org.apache.kafka.connect.data.Date.LOGICAL_NAME.equals(field.schema().name())) {
            log.trace("convertStruct() - Processing '{}' as date.", fieldName);
            setDateField(result, fieldName, (Date) fieldValue);
          } else if (org.apache.kafka.connect.data.Time.LOGICAL_NAME.equals(field.schema().name())) {
            log.trace("convertStruct() - Processing '{}' as time.", fieldName);
            setTimeField(result, fieldName, (Date) fieldValue);
          } else {
            Integer int32Value = (Integer) fieldValue;
            log.trace("convertStruct() - Processing '{}' as int32.", fieldName);
            setInt32Field(result, fieldName, int32Value);
          }
          break;
        case INT64:

          if (Timestamp.LOGICAL_NAME.equals(field.schema().name())) {
            log.trace("convertStruct() - Processing '{}' as timestamp.", fieldName);
            setTimestampField(result, fieldName, (Date) fieldValue);
          } else {
            Long int64Value = (Long) fieldValue;
            log.trace("convertStruct() - Processing '{}' as int64.", fieldName);
            setInt64Field(result, fieldName, int64Value);
          }
          break;
        case BYTES:

          if (Decimal.LOGICAL_NAME.equals(field.schema().name())) {
            log.trace("convertStruct() - Processing '{}' as decimal.", fieldName);
            setDecimalField(result, fieldName, (BigDecimal) fieldValue);
          } else {
            byte[] bytes = (byte[]) fieldValue;
            log.trace("convertStruct() - Processing '{}' as bytes.", fieldName);
            setBytesField(result, fieldName, bytes);
          }
          break;
        case FLOAT32:
          log.trace("convertStruct() - Processing '{}' as float32.", fieldName);
          setFloat32Field(result, fieldName, (Float) fieldValue);
          break;
        case FLOAT64:
          log.trace("convertStruct() - Processing '{}' as float64.", fieldName);
          setFloat64Field(result, fieldName, (Double) fieldValue);
          break;
        case BOOLEAN:
          log.trace("convertStruct() - Processing '{}' as boolean.", fieldName);
          setBooleanField(result, fieldName, (Boolean) fieldValue);
          break;
        case STRUCT:
          log.trace("convertStruct() - Processing '{}' as struct.", fieldName);
          setStructField(result, fieldName, (Struct) fieldValue);
          break;
        case ARRAY:
          log.trace("convertStruct() - Processing '{}' as array.", fieldName);
          setArray(result, fieldName, schema, (List) fieldValue);
          break;
        case MAP:
          log.trace("convertStruct() - Processing '{}' as map.", fieldName);
          setMap(result, fieldName, schema, (Map) fieldValue);
          break;
        default:
          throw new DataException("Unsupported schema.type(): " + schema.type());
      }
    } catch (Exception ex) {
      throw new DataException(
          String.format("Exception thrown while processing field '%s'", fieldName),
          ex
      );
    }
  }
}
 
Example 18
Source File: HiveIntegrationAvroTest.java    From streamx with Apache License 2.0 4 votes vote down vote up
@Test
public void testSyncWithHiveAvro() throws Exception {
  DataWriter hdfsWriter = new DataWriter(connectorConfig, context, avroData);
  hdfsWriter.recover(TOPIC_PARTITION);

  String key = "key";
  Schema schema = createSchema();
  Struct record = createRecord(schema);

  Collection<SinkRecord> sinkRecords = new ArrayList<>();
  for (long offset = 0; offset < 7; offset++) {
    SinkRecord sinkRecord =
        new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schema, record, offset);
    sinkRecords.add(sinkRecord);
  }

  hdfsWriter.write(sinkRecords);
  hdfsWriter.close(assignment);
  hdfsWriter.stop();

  Map<String, String> props = createProps();
  props.put(HdfsSinkConnectorConfig.HIVE_INTEGRATION_CONFIG, "true");
  HdfsSinkConnectorConfig config = new HdfsSinkConnectorConfig(props);

  hdfsWriter = new DataWriter(config, context, avroData);
  hdfsWriter.syncWithHive();

  List<String> expectedColumnNames = new ArrayList<>();
  for (Field field: schema.fields()) {
    expectedColumnNames.add(field.name());
  }

  Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
  List<String> actualColumnNames = new ArrayList<>();
  for (FieldSchema column: table.getSd().getCols()) {
    actualColumnNames.add(column.getName());
  }
  assertEquals(expectedColumnNames, actualColumnNames);

  List<String> expectedPartitions = new ArrayList<>();
  String directory = TOPIC + "/" + "partition=" + String.valueOf(PARTITION);
  expectedPartitions.add(FileUtils.directoryName(url, topicsDir, directory));

  List<String> partitions = hiveMetaStore.listPartitions(hiveDatabase, TOPIC, (short)-1);

  assertEquals(expectedPartitions, partitions);

  hdfsWriter.close(assignment);
  hdfsWriter.stop();
}
 
Example 19
Source File: HiveIntegrationParquetTest.java    From streamx with Apache License 2.0 4 votes vote down vote up
@Test
public void testHiveIntegrationParquet() throws Exception {
  Map<String, String> props = createProps();
  props.put(HdfsSinkConnectorConfig.HIVE_INTEGRATION_CONFIG, "true");
  HdfsSinkConnectorConfig config = new HdfsSinkConnectorConfig(props);

  DataWriter hdfsWriter = new DataWriter(config, context, avroData);
  hdfsWriter.recover(TOPIC_PARTITION);

  String key = "key";
  Schema schema = createSchema();
  Struct record = createRecord(schema);

  Collection<SinkRecord> sinkRecords = new ArrayList<>();
  for (long offset = 0; offset < 7; offset++) {
    SinkRecord sinkRecord =
        new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schema, record, offset);

    sinkRecords.add(sinkRecord);
  }
  hdfsWriter.write(sinkRecords);
  hdfsWriter.close(assignment);
  hdfsWriter.stop();

  Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
  List<String> expectedColumnNames = new ArrayList<>();
  for (Field field: schema.fields()) {
    expectedColumnNames.add(field.name());
  }

  List<String> actualColumnNames = new ArrayList<>();
  for (FieldSchema column: table.getSd().getCols()) {
    actualColumnNames.add(column.getName());
  }
  assertEquals(expectedColumnNames, actualColumnNames);

  List<String> expectedPartitions = new ArrayList<>();
  String directory = TOPIC + "/" + "partition=" + String.valueOf(PARTITION);
  expectedPartitions.add(FileUtils.directoryName(url, topicsDir, directory));

  List<String> partitions = hiveMetaStore.listPartitions(hiveDatabase, TOPIC, (short)-1);

  assertEquals(expectedPartitions, partitions);
}
 
Example 20
Source File: HiveIntegrationParquetTest.java    From streamx with Apache License 2.0 4 votes vote down vote up
@Test
public void testSyncWithHiveParquet() throws Exception {
  Map<String, String> props = createProps();
  HdfsSinkConnectorConfig connectorConfig = new HdfsSinkConnectorConfig(props);

  DataWriter hdfsWriter = new DataWriter(connectorConfig, context, avroData);
  hdfsWriter.recover(TOPIC_PARTITION);

  String key = "key";
  Schema schema = createSchema();
  Struct record = createRecord(schema);

  Collection<SinkRecord> sinkRecords = new ArrayList<>();
  for (long offset = 0; offset < 7; offset++) {
    SinkRecord sinkRecord =
        new SinkRecord(TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schema, record, offset);
    sinkRecords.add(sinkRecord);
  }

  hdfsWriter.write(sinkRecords);
  hdfsWriter.close(assignment);
  hdfsWriter.stop();

  props = createProps();
  props.put(HdfsSinkConnectorConfig.HIVE_INTEGRATION_CONFIG, "true");
  HdfsSinkConnectorConfig config = new HdfsSinkConnectorConfig(props);

  hdfsWriter = new DataWriter(config, context, avroData);
  hdfsWriter.syncWithHive();

  List<String> expectedColumnNames = new ArrayList<>();
  for (Field field: schema.fields()) {
    expectedColumnNames.add(field.name());
  }

  Table table = hiveMetaStore.getTable(hiveDatabase, TOPIC);
  List<String> actualColumnNames = new ArrayList<>();
  for (FieldSchema column: table.getSd().getCols()) {
    actualColumnNames.add(column.getName());
  }
  assertEquals(expectedColumnNames, actualColumnNames);

  List<String> expectedPartitions = new ArrayList<>();
  String directory = TOPIC + "/" + "partition=" + String.valueOf(PARTITION);
  expectedPartitions.add(FileUtils.directoryName(url, topicsDir, directory));

  List<String> partitions = hiveMetaStore.listPartitions(hiveDatabase, TOPIC, (short)-1);

  assertEquals(expectedPartitions, partitions);

  hdfsWriter.close(assignment);
  hdfsWriter.stop();
}