Java Code Examples for org.apache.avro.Schema#createRecord()

The following examples show how to use org.apache.avro.Schema#createRecord() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroUtilitiesTest.java    From pxf with Apache License 2.0 6 votes vote down vote up
/**
 * Generate a schema that matches the avro file
 * server/pxf-hdfs/src/test/resources/avro/test.avro
 *
 * @return
 */
private Schema generateTestSchema() {
    Schema schema = Schema.createRecord("example_schema", "A basic schema for storing messages", "com.example", false);
    List<Schema.Field> fields = new ArrayList<>();

    Schema.Type type = Schema.Type.LONG;
    fields.add(new Schema.Field("id", Schema.create(type), "Id of the user account", null));

    type = Schema.Type.STRING;
    fields.add(new Schema.Field("username", Schema.create(type), "Name of the user account", null));

    // add an ARRAY of strings
    fields.add(new Schema.Field(
            "followers",
            Schema.createArray(Schema.create(Schema.Type.STRING)),
            "Users followers",
            null)
    );
    schema.setFields(fields);

    return schema;
}
 
Example 2
Source File: JiraSinkTest.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Prepares required instances for tests
 */
@Before
public void setUp() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field jsonField = new Schema.Field("json", stringSchema, null, null, Order.ASCENDING);
    schema = Schema.createRecord("jira", null, null, false, Collections.singletonList(jsonField));
    schema.addProp(TALEND_IS_LOCKED, "true");
	
    outputProperties = new TJiraOutputProperties("root");
    outputProperties.connection.hostUrl.setValue("hostValue");
    outputProperties.connection.basicAuthentication.userId.setValue("userIdValue");
    outputProperties.connection.basicAuthentication.password.setValue("passwordValue");
    outputProperties.resource.setValue(Resource.ISSUE);
    outputProperties.schema.schema.setValue(schema);
    outputProperties.action.setValue(Action.INSERT);
    outputProperties.deleteSubtasks.setValue(true);
}
 
Example 3
Source File: MarkLogicOutputProperties.java    From components with Apache License 2.0 6 votes vote down vote up
private void setupMainAndFlowSchemas() {
    Schema stringSchema = AvroUtils._string();
    Schema objectSchema = AvroUtils._bytes();

    // Talend type should be Object (but avro type is bytes[])
    objectSchema.addProp(SchemaConstants.JAVA_CLASS_FLAG, "java.lang.Object");

    Schema.Field docIdField = new Schema.Field("docId", stringSchema, null, (Object) null, Schema.Field.Order.ASCENDING);
    docIdField.addProp(SchemaConstants.TALEND_COLUMN_IS_KEY, "true");
    Schema.Field docContentField = new Schema.Field("docContent", objectSchema, null, (Object) null,
            Schema.Field.Order.IGNORE);
    List<Schema.Field> fields = new ArrayList<>();
    fields.add(docIdField);
    fields.add(docContentField);
    Schema initialSchema = Schema.createRecord("marklogic", null, null, false, fields);
    initialSchema.addProp(TALEND_IS_LOCKED, "true");
    fields.clear(); // Do we need this?

    datasetProperties.main.schema.setValue(initialSchema);
    schemaFlow.schema.setValue(initialSchema);
}
 
Example 4
Source File: JDBCAvroRegistry.java    From components with Apache License 2.0 6 votes vote down vote up
protected Schema inferSchemaResultSetMetaData(ResultSetMetaData metadata) throws SQLException {
    List<Field> fields = new ArrayList<>();

    int count = metadata.getColumnCount();
    for (int i = 1; i <= count; i++) {
        int size = metadata.getPrecision(i);
        int scale = metadata.getScale(i);
        boolean nullable = ResultSetMetaData.columnNullable == metadata.isNullable(i);

        int dbtype = metadata.getColumnType(i);
        String fieldName = metadata.getColumnLabel(i);
        String dbColumnName = metadata.getColumnName(i);

        // not necessary for the result schema from the query statement
        boolean isKey = false;

        Field field = sqlType2Avro(size, scale, dbtype, nullable, fieldName, dbColumnName, null, isKey);

        fields.add(field);
    }

    return Schema.createRecord("DYNAMIC", null, null, false, fields);
}
 
Example 5
Source File: TJiraOutputPropertiesTest.java    From components with Apache License 2.0 6 votes vote down vote up
/**
 * Checks {@link TJiraOutputProperties#afterAction()} sets correct schema for Delete
 * action, if Delete action is chosen
 */
@Test
public void testAfterActionDelete() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field idField = new Schema.Field("id", stringSchema, null, null, Order.ASCENDING);
    Schema expectedSchema = Schema.createRecord("jira", null, null, false, Collections.singletonList(idField));
    expectedSchema.addProp(TALEND_IS_LOCKED, "true");
	
    TJiraOutputProperties properties = new TJiraOutputProperties("root");
    properties.init();
    properties.action.setValue(Action.DELETE);

    properties.afterAction();

    Schema schema = properties.schema.schema.getValue();

    assertThat(schema, equalTo(expectedSchema));
}
 
Example 6
Source File: AvroRecordToPinotRowGeneratorTest.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
@Test
public void testIncomingTimeColumn()
    throws Exception {
  List<Schema.Field> avroFields =
      Collections.singletonList(new Schema.Field("incomingTime", Schema.create(Schema.Type.LONG), null, null));
  Schema avroSchema = Schema.createRecord(avroFields);
  GenericData.Record avroRecord = new GenericData.Record(avroSchema);
  avroRecord.put("incomingTime", 12345L);

  Set<String> sourceFields = Sets.newHashSet("incomingTime", "outgoingTime");

  AvroRecordExtractor avroRecordExtractor = new AvroRecordExtractor();
  avroRecordExtractor.init(sourceFields, null);
  GenericRow genericRow = new GenericRow();
  avroRecordExtractor.extract(avroRecord, genericRow);

  Assert.assertTrue(
      genericRow.getFieldToValueMap().keySet().containsAll(Arrays.asList("incomingTime", "outgoingTime")));
  Assert.assertEquals(genericRow.getValue("incomingTime"), 12345L);
}
 
Example 7
Source File: IssueAdapterFactoryTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes test arguments before tests
 */
@BeforeClass
public static void setUp() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field jsonField = new Schema.Field("json", stringSchema, null, null, Order.ASCENDING);
    testSchema = Schema.createRecord("jira", null, null, false, Collections.singletonList(jsonField));
    testSchema.addProp(TALEND_IS_LOCKED, "true");
    
    issueIndexedRecord = new IssueIndexedRecord(testJson, testSchema);
}
 
Example 8
Source File: SchemaGeneratorUtils.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Create a new schema by extracting elements from the inputSchema that are *not* present in the keyPaths
 * 
 * @param inputSchema a schema
 * @param keyPaths a list of path to element that will be considered as keys
 * @param currentPath the current subelement to extract
 * @return a new schema
 */
private static Schema extractValues(Schema inputSchema, List<String> keyPaths, String currentPath) {
    List<Schema.Field> fieldList = new ArrayList<>();
    for (Field field : inputSchema.getFields()) {
        String newPath = currentPath + "." + field.name();
        if (StringUtils.isEmpty(currentPath)) {
            newPath = currentPath + field.name();
        }
        if (keyPaths.contains(newPath)) {
            // Do nothing
        } else {
            Schema unwrappedSchema = getUnwrappedSchema(field);
            if (unwrappedSchema.getType().equals(Type.RECORD)) {
                Schema subElementSchema = extractValues(unwrappedSchema, keyPaths, newPath);
                if (subElementSchema != null) {
                    fieldList.add(new Field(field.name(), subElementSchema, "", ""));
                }
            } else {
                // element add it directly
                fieldList.add(new Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
            }
        }
    }
    if (fieldList.size() > 0) {
        try {
            return Schema.createRecord("value_" + inputSchema.getName(), inputSchema.getDoc(),
                    inputSchema.getNamespace(), inputSchema.isError(), fieldList);
        } catch (AvroRuntimeException e) {
            // this will be throw if we are trying to get the name of an anonymous type
            return Schema.createRecord(fieldList);
        }
    } else {
        return null;
    }
}
 
Example 9
Source File: TestAvroExport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
private Schema buildAvroSchema(ColumnGenerator... extraCols) {
  List<Field> fields = new ArrayList<Field>();
  fields.add(buildAvroField("id", Schema.Type.INT));
  fields.add(buildAvroField("msg", Schema.Type.STRING));
  int colNum = 0;
  for (ColumnGenerator gen : extraCols) {
    if (gen.getColumnAvroSchema() != null) {
      fields.add(buildAvroField(forIdx(colNum++),
          gen.getColumnAvroSchema()));
    }
  }
  Schema schema = Schema.createRecord("myschema", null, null, false);
  schema.setFields(fields);
  return schema;
}
 
Example 10
Source File: TestSpecificReadWrite.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjection() throws IOException {
  Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
  Configuration conf = new Configuration(testConf);

  Schema schema = Car.getClassSchema();
  List<Schema.Field> fields = schema.getFields();

  //Schema.Parser parser = new Schema.Parser();
  List<Schema.Field> projectedFields = new ArrayList<Schema.Field>();
  for (Schema.Field field : fields) {
    String name = field.name();
    if ("optionalExtra".equals(name) ||
        "serviceHistory".equals(name)) {
      continue;
    }

    //Schema schemaClone = parser.parse(field.schema().toString(false));
    Schema.Field fieldClone = new Schema.Field(name, field.schema(), field.doc(), field.defaultVal());
    projectedFields.add(fieldClone);
  }

  Schema projectedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
  projectedSchema.setFields(projectedFields);
  AvroReadSupport.setRequestedProjection(conf, projectedSchema);

  try(ParquetReader<Car> reader = new AvroParquetReader<Car>(conf, path)) {
    for (Car car = reader.read(); car != null; car = reader.read()) {
      assertTrue(car.getDoors() == 4 || car.getDoors() == 5);
      assertNotNull(car.getEngine());
      assertNotNull(car.getMake());
      assertNotNull(car.getModel());
      assertEquals(2010, car.getYear());
      assertNotNull(car.getVin());
      assertNull(car.getOptionalExtra());
      assertNull(car.getServiceHistory());
    }
  }
}
 
Example 11
Source File: AvroTypeSystem.java    From transport with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Override
protected Schema createStructType(List<String> fieldNames, List<Schema> fieldTypes) {
  return Schema.createRecord(IntStream.range(0, fieldTypes.size())
      .mapToObj(i -> new Schema.Field(
          fieldNames == null ? "field" + i : fieldNames.get(i),
          fieldTypes.get(i), null, null
      ))
      .collect(Collectors.toList()));
}
 
Example 12
Source File: JDBCAvroRegistryString.java    From components with Apache License 2.0 5 votes vote down vote up
protected Schema inferSchemaResultSetMetaData(ResultSetMetaData metadata) throws SQLException {
    List<Field> fields = new ArrayList<>();

    Set<String> existNames = new HashSet<String>();
    int index = 0;
    
    int count = metadata.getColumnCount();
    for (int i = 1; i <= count; i++) {
        int size = metadata.getPrecision(i);
        int scale = metadata.getScale(i);
        boolean nullable = ResultSetMetaData.columnNullable == metadata.isNullable(i);

        int dbtype = metadata.getColumnType(i);
        String fieldName = metadata.getColumnLabel(i);
        String dbColumnName = metadata.getColumnName(i);

        // not necessary for the result schema from the query statement
        boolean isKey = false;

        String validName = NameUtil.correct(fieldName, index++, existNames);
        existNames.add(validName);
        
        Field field = sqlType2Avro(size, scale, dbtype, nullable, validName, dbColumnName, null, isKey);

        fields.add(field);
    }
    return Schema.createRecord("DYNAMIC", null, null, false, fields);
}
 
Example 13
Source File: AvroKeyValueSinkWriter.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a KeyValuePair generic record schema.
 *
 * @return A schema for a generic record with two fields: 'key' and
 *         'value'.
 */
public static Schema getSchema(Schema keySchema, Schema valueSchema) {
	Schema schema = Schema.createRecord(KEY_VALUE_PAIR_RECORD_NAME,
			"A key/value pair", KEY_VALUE_PAIR_RECORD_NAMESPACE, false);
	schema.setFields(Arrays.asList(new Schema.Field(KEY_FIELD,
			keySchema, "The key", null), new Schema.Field(VALUE_FIELD,
			valueSchema, "The value", null)));
	return schema;
}
 
Example 14
Source File: AvroUtils.java    From Cubert with Apache License 2.0 5 votes vote down vote up
public static Schema getSchema(SeekableInput input) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader =
            new DataFileReader<GenericRecord>(input, datumReader);
    Schema schema = dataFileReader.getSchema();

    if (PadDefaultNullsToSchema)
    {
        // a list of "cloned" fields, with optional default value set to null
        ArrayList<Field> paddedFields = new ArrayList<Field>();

        for (Field field: schema.getFields())
        {
            // should this field be padded?
            boolean needsNullPadding = (field.schema() != null) // the field has nested schema
                && (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION
                && (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type

            JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue();

            Field f = new Field(field.name(), field.schema(), field.doc(), defValue);
            paddedFields.add(f);
        }

        schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
        schema.setFields(paddedFields);
    }

    return schema;
}
 
Example 15
Source File: TypeConverterUtils.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Transform input schema to a new schema.
 * <p>
 * The schema of the array field `pathToConvert` will be modified to the schema of its fields.
 */
public static Schema convertSchema(Schema inputSchema, Stack<String> converterPath, TypeConverterOutputTypes outputType,
        String outputFormat) {
    List<Schema.Field> fieldList = new ArrayList<>();
    String currentStep = converterPath.pop();
    for (Schema.Field field : inputSchema.getFields()) {
        Schema unwrappedSchema = AvroUtils.unwrapIfNullable(field.schema());
        if (field.name().equals(currentStep)) {
            // We are on the path to be converted
            if (converterPath.size() == 0) {
                // We are on the exact element to convert
                Schema fieldSchema = TypeConverterUtils.getSchema(outputType, outputFormat);
                // Ensure the output is nullable if the input is nullable.
                if (AvroUtils.isNullable(field.schema()))
                    fieldSchema = AvroUtils.wrapAsNullable(fieldSchema);
                fieldList.add(new Schema.Field(field.name(), fieldSchema, field.doc(), field.defaultVal()));
            } else {
                // Going down in the hierarchy
                fieldList.add(new Schema.Field(field.name(),
                        TypeConverterUtils.convertSchema(unwrappedSchema, converterPath, outputType, outputFormat),
                        field.doc(), field.defaultVal()));
            }
        } else {
            // We are not on the path to convert, just recopying schema
            fieldList.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
        }
    }
    return Schema.createRecord(inputSchema.getName(), inputSchema.getDoc(), inputSchema.getNamespace(), inputSchema.isError(),
            fieldList);

}
 
Example 16
Source File: SalesforceTestBase.java    From components with Apache License 2.0 5 votes vote down vote up
protected void setupModuleWithEmptySchema(SalesforceModuleProperties moduleProps, String module) throws Throwable {
    Form f = moduleProps.getForm(Form.REFERENCE);
    moduleProps = (SalesforceModuleProperties) PropertiesTestUtils.checkAndBeforeActivate(getComponentService(), f,
            "moduleName", moduleProps);
    moduleProps.moduleName.setValue(module);
    Schema emptySchema = Schema.createRecord(module, null, null, false);
    emptySchema.setFields(new ArrayList<Schema.Field>());
    emptySchema = AvroUtils.setIncludeAllFields(emptySchema, true);
    moduleProps.main.schema.setValue(emptySchema);
}
 
Example 17
Source File: TestAvroSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecimalLongType() throws Exception {
  Schema expected = Schema.createRecord("myrecord", null, null, false,
      Arrays.asList(new Schema.Field("dec", Schema.create(LONG), null, null)));

  // the decimal portion is lost because it isn't valid in Avro
  testParquetToAvroConversion(expected,
      "message myrecord {\n" +
          "  required int64 dec (DECIMAL(9,2));\n" +
          "}\n");
}
 
Example 18
Source File: DefinitionToAvroVisitor.java    From bunsen with Apache License 2.0 4 votes vote down vote up
@Override
public HapiConverter<Schema> visitReference(String elementName,
    List<String> referenceTypes,
    List<StructureField<HapiConverter<Schema>>> children) {

  // Generate a record name based on the type of references it can contain.
  String recordName = referenceTypes.stream().collect(Collectors.joining()) + "Reference";

  String fullName = basePackage + "." + recordName;

  HapiConverter<Schema> converter = visitedConverters.get(fullName);

  if (converter == null) {

    // Add direct references
    List<StructureField<HapiConverter<Schema>>> fieldsWithReferences =
        referenceTypes.stream()
            .map(refUri -> {

              String relativeType = refUri.substring(refUri.lastIndexOf('/') + 1);

              return new StructureField<HapiConverter<Schema>>("reference",
                  relativeType + "Id",
                  null,
                  false,
                  false,
                  new RelativeValueConverter(relativeType));

            }).collect(Collectors.toList());

    fieldsWithReferences.addAll(children);

    List<Field> fields = fieldsWithReferences.stream()
        .map(entry -> new Field(entry.fieldName(),
            nullable(entry.result().getDataType()),
            "Reference field",
            (Object) null))
        .collect(Collectors.toList());

    Schema schema = Schema.createRecord(recordName,
        "Structure for FHIR type " + recordName,
        basePackage,
        false, fields);

    converter = new CompositeToAvroConverter(null,
        fieldsWithReferences,
        schema,
        fhirSupport);

    visitedConverters.put(fullName, converter);
  }

  return converter;
}
 
Example 19
Source File: TestCircularReferences.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void test() throws IOException {
  ReferenceManager manager = new ReferenceManager();
  GenericData model = new GenericData();
  model.addLogicalTypeConversion(manager.getTracker());
  model.addLogicalTypeConversion(manager.getHandler());

  Schema parentSchema = Schema.createRecord("Parent", null, null, false);

  Schema placeholderSchema = Schema.createRecord("Placeholder", null, null, false);
  List<Schema.Field> placeholderFields = new ArrayList<Schema.Field>();
  placeholderFields.add( // at least one field is needed to be a valid schema
      new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
  placeholderSchema.setFields(placeholderFields);

  Referenceable idRef = new Referenceable("id");

  Schema parentRefSchema = Schema.createUnion(
      Schema.create(Schema.Type.NULL),
      Schema.create(Schema.Type.LONG),
      idRef.addToSchema(placeholderSchema));

  Reference parentRef = new Reference("parent");

  List<Schema.Field> childFields = new ArrayList<Schema.Field>();
  childFields.add(new Schema.Field("c", Schema.create(Schema.Type.STRING), null, null));
  childFields.add(new Schema.Field("parent", parentRefSchema, null, null));
  Schema childSchema = parentRef.addToSchema(
      Schema.createRecord("Child", null, null, false, childFields));

  List<Schema.Field> parentFields = new ArrayList<Schema.Field>();
  parentFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
  parentFields.add(new Schema.Field("p", Schema.create(Schema.Type.STRING), null, null));
  parentFields.add(new Schema.Field("child", childSchema, null, null));
  parentSchema.setFields(parentFields);

  Schema schema = idRef.addToSchema(parentSchema);

  LOG.debug("Schema: {}", schema.toString(true));

  Record parent = new Record(schema);
  parent.put("id", 1L);
  parent.put("p", "parent data!");

  Record child = new Record(childSchema);
  child.put("c", "child data!");
  child.put("parent", parent);

  parent.put("child", child);

  // serialization round trip
  File data = AvroTestUtil.write(temp, model, schema, parent);
  List<Record> records = AvroTestUtil.read(model, schema, data);

  Record actual = records.get(0);

  // because the record is a recursive structure, equals won't work
  Assert.assertEquals("Should correctly read back the parent id",
      1L, actual.get("id"));
  Assert.assertEquals("Should correctly read back the parent data",
      new Utf8("parent data!"), actual.get("p"));

  Record actualChild = (Record) actual.get("child");
  Assert.assertEquals("Should correctly read back the child data",
      new Utf8("child data!"), actualChild.get("c"));
  Object childParent = actualChild.get("parent");
  Assert.assertTrue("Should have a parent Record object",
      childParent instanceof Record);

  Record childParentRecord = (Record) actualChild.get("parent");
  Assert.assertEquals("Should have the right parent id",
      1L, childParentRecord.get("id"));
  Assert.assertEquals("Should have the right parent data",
      new Utf8("parent data!"), childParentRecord.get("p"));
}
 
Example 20
Source File: DefinitionToAvroVisitor.java    From bunsen with Apache License 2.0 4 votes vote down vote up
@Override
public HapiConverter<Schema> visitParentExtension(String elementName,
    String extensionUrl,
    List<StructureField<HapiConverter<Schema>>> children) {

  // Ignore extension fields that don't have declared content for now.
  if (children.isEmpty()) {
    return null;
  }

  String recordNamespace = DefinitionVisitorsUtil.namespaceFor(basePackage, extensionUrl);

  String localPart = extensionUrl.substring(extensionUrl.lastIndexOf('/') + 1);

  String[] parts = localPart.split("[-|_]");

  String recordName = Arrays.stream(parts).map(part ->
      part.substring(0,1).toUpperCase() + part.substring(1))
      .collect(Collectors.joining());

  String fullName = recordNamespace + "." + recordName;

  HapiConverter<Schema> converter = visitedConverters.get(fullName);

  if (converter == null) {

    List<Field> fields = children.stream()
        .map(entry ->
            new Field(entry.fieldName(),
                nullable(entry.result().getDataType()),
                "Doc here",
                (Object) null))
        .collect(Collectors.toList());

    Schema schema = Schema.createRecord(recordName,
        "Reference type.",
        recordNamespace,
        false, fields);

    converter = new CompositeToAvroConverter(null,
        children,
        schema,
        fhirSupport,
        extensionUrl);

    visitedConverters.put(fullName, converter);
  }

  return converter;
}