Java Code Examples for org.apache.avro.Schema#createRecord()

The following examples show how to use org.apache.avro.Schema#createRecord() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Test
public void testIncomingTimeColumn()
    throws Exception {
  List<Schema.Field> avroFields =
      Collections.singletonList(new Schema.Field("incomingTime", Schema.create(Schema.Type.LONG), null, null));
  Schema avroSchema = Schema.createRecord(avroFields);
  GenericData.Record avroRecord = new GenericData.Record(avroSchema);
  avroRecord.put("incomingTime", 12345L);

  Set<String> sourceFields = Sets.newHashSet("incomingTime", "outgoingTime");

  AvroRecordExtractor avroRecordExtractor = new AvroRecordExtractor();
  avroRecordExtractor.init(sourceFields, null);
  GenericRow genericRow = new GenericRow();
  avroRecordExtractor.extract(avroRecord, genericRow);

  Assert.assertTrue(
      genericRow.getFieldToValueMap().keySet().containsAll(Arrays.asList("incomingTime", "outgoingTime")));
  Assert.assertEquals(genericRow.getValue("incomingTime"), 12345L);
}
 
Example 2
private void setupMainAndFlowSchemas() {
    Schema stringSchema = AvroUtils._string();
    Schema objectSchema = AvroUtils._bytes();

    // Talend type should be Object (but avro type is bytes[])
    objectSchema.addProp(SchemaConstants.JAVA_CLASS_FLAG, "java.lang.Object");

    Schema.Field docIdField = new Schema.Field("docId", stringSchema, null, (Object) null, Schema.Field.Order.ASCENDING);
    docIdField.addProp(SchemaConstants.TALEND_COLUMN_IS_KEY, "true");
    Schema.Field docContentField = new Schema.Field("docContent", objectSchema, null, (Object) null,
            Schema.Field.Order.IGNORE);
    List<Schema.Field> fields = new ArrayList<>();
    fields.add(docIdField);
    fields.add(docContentField);
    Schema initialSchema = Schema.createRecord("marklogic", null, null, false, fields);
    initialSchema.addProp(TALEND_IS_LOCKED, "true");
    fields.clear(); // Do we need this?

    datasetProperties.main.schema.setValue(initialSchema);
    schemaFlow.schema.setValue(initialSchema);
}
 
Example 3
/**
 * Checks {@link TJiraOutputProperties#afterAction()} sets correct schema for Delete
 * action, if Delete action is chosen
 */
@Test
public void testAfterActionDelete() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field idField = new Schema.Field("id", stringSchema, null, null, Order.ASCENDING);
    Schema expectedSchema = Schema.createRecord("jira", null, null, false, Collections.singletonList(idField));
    expectedSchema.addProp(TALEND_IS_LOCKED, "true");
	
    TJiraOutputProperties properties = new TJiraOutputProperties("root");
    properties.init();
    properties.action.setValue(Action.DELETE);

    properties.afterAction();

    Schema schema = properties.schema.schema.getValue();

    assertThat(schema, equalTo(expectedSchema));
}
 
Example 4
Source Project: components   File: JDBCAvroRegistry.java    License: Apache License 2.0 6 votes vote down vote up
protected Schema inferSchemaResultSetMetaData(ResultSetMetaData metadata) throws SQLException {
    List<Field> fields = new ArrayList<>();

    int count = metadata.getColumnCount();
    for (int i = 1; i <= count; i++) {
        int size = metadata.getPrecision(i);
        int scale = metadata.getScale(i);
        boolean nullable = ResultSetMetaData.columnNullable == metadata.isNullable(i);

        int dbtype = metadata.getColumnType(i);
        String fieldName = metadata.getColumnLabel(i);
        String dbColumnName = metadata.getColumnName(i);

        // not necessary for the result schema from the query statement
        boolean isKey = false;

        Field field = sqlType2Avro(size, scale, dbtype, nullable, fieldName, dbColumnName, null, isKey);

        fields.add(field);
    }

    return Schema.createRecord("DYNAMIC", null, null, false, fields);
}
 
Example 5
Source Project: components   File: JiraSinkTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Prepares required instances for tests
 */
@Before
public void setUp() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field jsonField = new Schema.Field("json", stringSchema, null, null, Order.ASCENDING);
    schema = Schema.createRecord("jira", null, null, false, Collections.singletonList(jsonField));
    schema.addProp(TALEND_IS_LOCKED, "true");
	
    outputProperties = new TJiraOutputProperties("root");
    outputProperties.connection.hostUrl.setValue("hostValue");
    outputProperties.connection.basicAuthentication.userId.setValue("userIdValue");
    outputProperties.connection.basicAuthentication.password.setValue("passwordValue");
    outputProperties.resource.setValue(Resource.ISSUE);
    outputProperties.schema.schema.setValue(schema);
    outputProperties.action.setValue(Action.INSERT);
    outputProperties.deleteSubtasks.setValue(true);
}
 
Example 6
Source Project: pxf   File: AvroUtilitiesTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Generate a schema that matches the avro file
 * server/pxf-hdfs/src/test/resources/avro/test.avro
 *
 * @return
 */
private Schema generateTestSchema() {
    Schema schema = Schema.createRecord("example_schema", "A basic schema for storing messages", "com.example", false);
    List<Schema.Field> fields = new ArrayList<>();

    Schema.Type type = Schema.Type.LONG;
    fields.add(new Schema.Field("id", Schema.create(type), "Id of the user account", null));

    type = Schema.Type.STRING;
    fields.add(new Schema.Field("username", Schema.create(type), "Name of the user account", null));

    // add an ARRAY of strings
    fields.add(new Schema.Field(
            "followers",
            Schema.createArray(Schema.create(Schema.Type.STRING)),
            "Users followers",
            null)
    );
    schema.setFields(fields);

    return schema;
}
 
Example 7
@Test
public void testDecimalLongType() throws Exception {
  Schema expected = Schema.createRecord("myrecord", null, null, false,
      Arrays.asList(new Schema.Field("dec", Schema.create(LONG), null, null)));

  // the decimal portion is lost because it isn't valid in Avro
  testParquetToAvroConversion(expected,
      "message myrecord {\n" +
          "  required int64 dec (DECIMAL(9,2));\n" +
          "}\n");
}
 
Example 8
Source Project: components   File: SalesforceTestBase.java    License: Apache License 2.0 5 votes vote down vote up
protected void setupModuleWithEmptySchema(SalesforceModuleProperties moduleProps, String module) throws Throwable {
    Form f = moduleProps.getForm(Form.REFERENCE);
    moduleProps = (SalesforceModuleProperties) PropertiesTestUtils.checkAndBeforeActivate(getComponentService(), f,
            "moduleName", moduleProps);
    moduleProps.moduleName.setValue(module);
    Schema emptySchema = Schema.createRecord(module, null, null, false);
    emptySchema.setFields(new ArrayList<Schema.Field>());
    emptySchema = AvroUtils.setIncludeAllFields(emptySchema, true);
    moduleProps.main.schema.setValue(emptySchema);
}
 
Example 9
Source Project: components   File: TypeConverterUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Transform input schema to a new schema.
 * <p>
 * The schema of the array field `pathToConvert` will be modified to the schema of its fields.
 */
public static Schema convertSchema(Schema inputSchema, Stack<String> converterPath, TypeConverterOutputTypes outputType,
        String outputFormat) {
    List<Schema.Field> fieldList = new ArrayList<>();
    String currentStep = converterPath.pop();
    for (Schema.Field field : inputSchema.getFields()) {
        Schema unwrappedSchema = AvroUtils.unwrapIfNullable(field.schema());
        if (field.name().equals(currentStep)) {
            // We are on the path to be converted
            if (converterPath.size() == 0) {
                // We are on the exact element to convert
                Schema fieldSchema = TypeConverterUtils.getSchema(outputType, outputFormat);
                // Ensure the output is nullable if the input is nullable.
                if (AvroUtils.isNullable(field.schema()))
                    fieldSchema = AvroUtils.wrapAsNullable(fieldSchema);
                fieldList.add(new Schema.Field(field.name(), fieldSchema, field.doc(), field.defaultVal()));
            } else {
                // Going down in the hierarchy
                fieldList.add(new Schema.Field(field.name(),
                        TypeConverterUtils.convertSchema(unwrappedSchema, converterPath, outputType, outputFormat),
                        field.doc(), field.defaultVal()));
            }
        } else {
            // We are not on the path to convert, just recopying schema
            fieldList.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
        }
    }
    return Schema.createRecord(inputSchema.getName(), inputSchema.getDoc(), inputSchema.getNamespace(), inputSchema.isError(),
            fieldList);

}
 
Example 10
Source Project: Cubert   File: AvroUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static Schema getSchema(SeekableInput input) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader =
            new DataFileReader<GenericRecord>(input, datumReader);
    Schema schema = dataFileReader.getSchema();

    if (PadDefaultNullsToSchema)
    {
        // a list of "cloned" fields, with optional default value set to null
        ArrayList<Field> paddedFields = new ArrayList<Field>();

        for (Field field: schema.getFields())
        {
            // should this field be padded?
            boolean needsNullPadding = (field.schema() != null) // the field has nested schema
                && (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION
                && (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type

            JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue();

            Field f = new Field(field.name(), field.schema(), field.doc(), defValue);
            paddedFields.add(f);
        }

        schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
        schema.setFields(paddedFields);
    }

    return schema;
}
 
Example 11
Source Project: flink   File: AvroKeyValueSinkWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a KeyValuePair generic record schema.
 *
 * @return A schema for a generic record with two fields: 'key' and
 *         'value'.
 */
public static Schema getSchema(Schema keySchema, Schema valueSchema) {
	Schema schema = Schema.createRecord(KEY_VALUE_PAIR_RECORD_NAME,
			"A key/value pair", KEY_VALUE_PAIR_RECORD_NAMESPACE, false);
	schema.setFields(Arrays.asList(new Schema.Field(KEY_FIELD,
			keySchema, "The key", null), new Schema.Field(VALUE_FIELD,
			valueSchema, "The value", null)));
	return schema;
}
 
Example 12
protected Schema inferSchemaResultSetMetaData(ResultSetMetaData metadata) throws SQLException {
    List<Field> fields = new ArrayList<>();

    Set<String> existNames = new HashSet<String>();
    int index = 0;
    
    int count = metadata.getColumnCount();
    for (int i = 1; i <= count; i++) {
        int size = metadata.getPrecision(i);
        int scale = metadata.getScale(i);
        boolean nullable = ResultSetMetaData.columnNullable == metadata.isNullable(i);

        int dbtype = metadata.getColumnType(i);
        String fieldName = metadata.getColumnLabel(i);
        String dbColumnName = metadata.getColumnName(i);

        // not necessary for the result schema from the query statement
        boolean isKey = false;

        String validName = NameUtil.correct(fieldName, index++, existNames);
        existNames.add(validName);
        
        Field field = sqlType2Avro(size, scale, dbtype, nullable, validName, dbColumnName, null, isKey);

        fields.add(field);
    }
    return Schema.createRecord("DYNAMIC", null, null, false, fields);
}
 
Example 13
@Override
protected Schema createStructType(List<String> fieldNames, List<Schema> fieldTypes) {
  return Schema.createRecord(IntStream.range(0, fieldTypes.size())
      .mapToObj(i -> new Schema.Field(
          fieldNames == null ? "field" + i : fieldNames.get(i),
          fieldTypes.get(i), null, null
      ))
      .collect(Collectors.toList()));
}
 
Example 14
/**
 * Initializes test arguments before tests
 */
@BeforeClass
public static void setUp() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field jsonField = new Schema.Field("json", stringSchema, null, null, Order.ASCENDING);
    testSchema = Schema.createRecord("jira", null, null, false, Collections.singletonList(jsonField));
    testSchema.addProp(TALEND_IS_LOCKED, "true");
    
    issueIndexedRecord = new IssueIndexedRecord(testJson, testSchema);
}
 
Example 15
@Test
public void testProjection() throws IOException {
  Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
  Configuration conf = new Configuration(testConf);

  Schema schema = Car.getClassSchema();
  List<Schema.Field> fields = schema.getFields();

  //Schema.Parser parser = new Schema.Parser();
  List<Schema.Field> projectedFields = new ArrayList<Schema.Field>();
  for (Schema.Field field : fields) {
    String name = field.name();
    if ("optionalExtra".equals(name) ||
        "serviceHistory".equals(name)) {
      continue;
    }

    //Schema schemaClone = parser.parse(field.schema().toString(false));
    Schema.Field fieldClone = new Schema.Field(name, field.schema(), field.doc(), field.defaultVal());
    projectedFields.add(fieldClone);
  }

  Schema projectedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
  projectedSchema.setFields(projectedFields);
  AvroReadSupport.setRequestedProjection(conf, projectedSchema);

  try(ParquetReader<Car> reader = new AvroParquetReader<Car>(conf, path)) {
    for (Car car = reader.read(); car != null; car = reader.read()) {
      assertTrue(car.getDoors() == 4 || car.getDoors() == 5);
      assertNotNull(car.getEngine());
      assertNotNull(car.getMake());
      assertNotNull(car.getModel());
      assertEquals(2010, car.getYear());
      assertNotNull(car.getVin());
      assertNull(car.getOptionalExtra());
      assertNull(car.getServiceHistory());
    }
  }
}
 
Example 16
private Schema buildAvroSchema(ColumnGenerator... extraCols) {
  List<Field> fields = new ArrayList<Field>();
  fields.add(buildAvroField("id", Schema.Type.INT));
  fields.add(buildAvroField("msg", Schema.Type.STRING));
  int colNum = 0;
  for (ColumnGenerator gen : extraCols) {
    if (gen.getColumnAvroSchema() != null) {
      fields.add(buildAvroField(forIdx(colNum++),
          gen.getColumnAvroSchema()));
    }
  }
  Schema schema = Schema.createRecord("myschema", null, null, false);
  schema.setFields(fields);
  return schema;
}
 
Example 17
Source Project: components   File: SchemaGeneratorUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a new schema by extracting elements from the inputSchema that are *not* present in the keyPaths
 * 
 * @param inputSchema a schema
 * @param keyPaths a list of path to element that will be considered as keys
 * @param currentPath the current subelement to extract
 * @return a new schema
 */
private static Schema extractValues(Schema inputSchema, List<String> keyPaths, String currentPath) {
    List<Schema.Field> fieldList = new ArrayList<>();
    for (Field field : inputSchema.getFields()) {
        String newPath = currentPath + "." + field.name();
        if (StringUtils.isEmpty(currentPath)) {
            newPath = currentPath + field.name();
        }
        if (keyPaths.contains(newPath)) {
            // Do nothing
        } else {
            Schema unwrappedSchema = getUnwrappedSchema(field);
            if (unwrappedSchema.getType().equals(Type.RECORD)) {
                Schema subElementSchema = extractValues(unwrappedSchema, keyPaths, newPath);
                if (subElementSchema != null) {
                    fieldList.add(new Field(field.name(), subElementSchema, "", ""));
                }
            } else {
                // element add it directly
                fieldList.add(new Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
            }
        }
    }
    if (fieldList.size() > 0) {
        try {
            return Schema.createRecord("value_" + inputSchema.getName(), inputSchema.getDoc(),
                    inputSchema.getNamespace(), inputSchema.isError(), fieldList);
        } catch (AvroRuntimeException e) {
            // this will be throw if we are trying to get the name of an anonymous type
            return Schema.createRecord(fieldList);
        }
    } else {
        return null;
    }
}
 
Example 18
@Test
public void test() throws IOException {
  ReferenceManager manager = new ReferenceManager();
  GenericData model = new GenericData();
  model.addLogicalTypeConversion(manager.getTracker());
  model.addLogicalTypeConversion(manager.getHandler());

  Schema parentSchema = Schema.createRecord("Parent", null, null, false);

  Schema placeholderSchema = Schema.createRecord("Placeholder", null, null, false);
  List<Schema.Field> placeholderFields = new ArrayList<Schema.Field>();
  placeholderFields.add( // at least one field is needed to be a valid schema
      new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
  placeholderSchema.setFields(placeholderFields);

  Referenceable idRef = new Referenceable("id");

  Schema parentRefSchema = Schema.createUnion(
      Schema.create(Schema.Type.NULL),
      Schema.create(Schema.Type.LONG),
      idRef.addToSchema(placeholderSchema));

  Reference parentRef = new Reference("parent");

  List<Schema.Field> childFields = new ArrayList<Schema.Field>();
  childFields.add(new Schema.Field("c", Schema.create(Schema.Type.STRING), null, null));
  childFields.add(new Schema.Field("parent", parentRefSchema, null, null));
  Schema childSchema = parentRef.addToSchema(
      Schema.createRecord("Child", null, null, false, childFields));

  List<Schema.Field> parentFields = new ArrayList<Schema.Field>();
  parentFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
  parentFields.add(new Schema.Field("p", Schema.create(Schema.Type.STRING), null, null));
  parentFields.add(new Schema.Field("child", childSchema, null, null));
  parentSchema.setFields(parentFields);

  Schema schema = idRef.addToSchema(parentSchema);

  LOG.debug("Schema: {}", schema.toString(true));

  Record parent = new Record(schema);
  parent.put("id", 1L);
  parent.put("p", "parent data!");

  Record child = new Record(childSchema);
  child.put("c", "child data!");
  child.put("parent", parent);

  parent.put("child", child);

  // serialization round trip
  File data = AvroTestUtil.write(temp, model, schema, parent);
  List<Record> records = AvroTestUtil.read(model, schema, data);

  Record actual = records.get(0);

  // because the record is a recursive structure, equals won't work
  Assert.assertEquals("Should correctly read back the parent id",
      1L, actual.get("id"));
  Assert.assertEquals("Should correctly read back the parent data",
      new Utf8("parent data!"), actual.get("p"));

  Record actualChild = (Record) actual.get("child");
  Assert.assertEquals("Should correctly read back the child data",
      new Utf8("child data!"), actualChild.get("c"));
  Object childParent = actualChild.get("parent");
  Assert.assertTrue("Should have a parent Record object",
      childParent instanceof Record);

  Record childParentRecord = (Record) actualChild.get("parent");
  Assert.assertEquals("Should have the right parent id",
      1L, childParentRecord.get("id"));
  Assert.assertEquals("Should have the right parent data",
      new Utf8("parent data!"), childParentRecord.get("p"));
}
 
Example 19
Source Project: bunsen   File: DefinitionToAvroVisitor.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public HapiConverter<Schema> visitReference(String elementName,
    List<String> referenceTypes,
    List<StructureField<HapiConverter<Schema>>> children) {

  // Generate a record name based on the type of references it can contain.
  String recordName = referenceTypes.stream().collect(Collectors.joining()) + "Reference";

  String fullName = basePackage + "." + recordName;

  HapiConverter<Schema> converter = visitedConverters.get(fullName);

  if (converter == null) {

    // Add direct references
    List<StructureField<HapiConverter<Schema>>> fieldsWithReferences =
        referenceTypes.stream()
            .map(refUri -> {

              String relativeType = refUri.substring(refUri.lastIndexOf('/') + 1);

              return new StructureField<HapiConverter<Schema>>("reference",
                  relativeType + "Id",
                  null,
                  false,
                  false,
                  new RelativeValueConverter(relativeType));

            }).collect(Collectors.toList());

    fieldsWithReferences.addAll(children);

    List<Field> fields = fieldsWithReferences.stream()
        .map(entry -> new Field(entry.fieldName(),
            nullable(entry.result().getDataType()),
            "Reference field",
            (Object) null))
        .collect(Collectors.toList());

    Schema schema = Schema.createRecord(recordName,
        "Structure for FHIR type " + recordName,
        basePackage,
        false, fields);

    converter = new CompositeToAvroConverter(null,
        fieldsWithReferences,
        schema,
        fhirSupport);

    visitedConverters.put(fullName, converter);
  }

  return converter;
}
 
Example 20
Source Project: bunsen   File: DefinitionToAvroVisitor.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public HapiConverter<Schema> visitParentExtension(String elementName,
    String extensionUrl,
    List<StructureField<HapiConverter<Schema>>> children) {

  // Ignore extension fields that don't have declared content for now.
  if (children.isEmpty()) {
    return null;
  }

  String recordNamespace = DefinitionVisitorsUtil.namespaceFor(basePackage, extensionUrl);

  String localPart = extensionUrl.substring(extensionUrl.lastIndexOf('/') + 1);

  String[] parts = localPart.split("[-|_]");

  String recordName = Arrays.stream(parts).map(part ->
      part.substring(0,1).toUpperCase() + part.substring(1))
      .collect(Collectors.joining());

  String fullName = recordNamespace + "." + recordName;

  HapiConverter<Schema> converter = visitedConverters.get(fullName);

  if (converter == null) {

    List<Field> fields = children.stream()
        .map(entry ->
            new Field(entry.fieldName(),
                nullable(entry.result().getDataType()),
                "Doc here",
                (Object) null))
        .collect(Collectors.toList());

    Schema schema = Schema.createRecord(recordName,
        "Reference type.",
        recordNamespace,
        false, fields);

    converter = new CompositeToAvroConverter(null,
        children,
        schema,
        fhirSupport,
        extensionUrl);

    visitedConverters.put(fullName, converter);
  }

  return converter;
}