Java Code Examples for org.apache.avro.Schema#createFixed()

The following examples show how to use org.apache.avro.Schema#createFixed() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Test
public void testReadDecimalFixed() throws IOException {
  Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4);
  Schema fixedRecord = record("R", field("dec", fixedSchema));
  Schema decimalSchema = DECIMAL_9_2.addToSchema(
      Schema.createFixed("aFixed", null, null, 4));
  Schema decimalRecord = record("R", field("dec", decimalSchema));

  GenericRecord r1 = instance(decimalRecord, "dec", D1);
  GenericRecord r2 = instance(decimalRecord, "dec", D2);
  List<GenericRecord> expected = Arrays.asList(r1, r2);

  Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

  // use the conversion directly instead of relying on the write side
  GenericRecord r1fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D1, fixedSchema, DECIMAL_9_2));
  GenericRecord r2fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D2, fixedSchema, DECIMAL_9_2));

  File test = write(fixedRecord, r1fixed, r2fixed);
  Assert.assertEquals("Should convert fixed to BigDecimals",
      expected, read(GENERIC, decimalRecord, test));
}
 
Example 2
@Test
public void testWriteDecimalFixed() throws IOException {
  Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4);
  Schema fixedRecord = record("R", field("dec", fixedSchema));
  Schema decimalSchema = DECIMAL_9_2.addToSchema(
      Schema.createFixed("aFixed", null, null, 4));
  Schema decimalRecord = record("R", field("dec", decimalSchema));

  GenericRecord r1 = instance(decimalRecord, "dec", D1);
  GenericRecord r2 = instance(decimalRecord, "dec", D2);

  Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

  // use the conversion directly instead of relying on the write side
  GenericRecord r1fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D1, fixedSchema, DECIMAL_9_2));
  GenericRecord r2fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D2, fixedSchema, DECIMAL_9_2));
  List<GenericRecord> expected = Arrays.asList(r1fixed, r2fixed);

  File test = write(GENERIC, decimalRecord, r1, r2);
  Assert.assertEquals("Should read BigDecimals as fixed",
      expected, read(GENERIC, fixedRecord, test));
}
 
Example 3
Source Project: spork   File: TestAvroStorageUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMergeSchema3() throws IOException {
    Schema complexType[] = {
        Schema.createRecord(new ArrayList<Schema.Field>()),
        Schema.createArray(Schema.create(Schema.Type.INT)),
        Schema.createMap(Schema.create(Schema.Type.INT)),
        Schema.createUnion(new ArrayList<Schema>()),
        Schema.createFixed("fixed", null, null, 1),
    };

    for (int i = 0; i < complexType.length; i++) {
        Schema x = complexType[i];
        for (int j = 0; j < complexType.length; j++) {
            Schema y = complexType[j];
            if (i != j) {
                try {
                    Schema z = AvroStorageUtils.mergeSchema(x, y);
                    Assert.fail("exception is expected, but " + z.getType() + " is returned");
                } catch (IOException e) {
                    assertEquals("Cannot merge "+ x.getType()+ " with "+ y.getType(), e.getMessage());
                }
            }
        }
    }
}
 
Example 4
Source Project: spork   File: TestAvroStorageUtils.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMergeSchema8() throws IOException {
    Schema x, y, z;

    x = Schema.createFixed("fixed1", null, null, 1);
    y = Schema.createFixed("fixed2", null, null, 1);

    z = AvroStorageUtils.mergeSchema(x, y);
    assertEquals(Schema.Type.FIXED, z.getType());
    assertEquals(x.getFixedSize(), z.getFixedSize());

    x = Schema.createFixed("fixed1", null, null, 1);
    y = Schema.createFixed("fixed2", null, null, 2);

    try {
        z = AvroStorageUtils.mergeSchema(x, y);
        Assert.fail("exception is expected, but " + z.getType() + " is returned");
    } catch (IOException e) {
        assertTrue(e.getMessage().contains("Cannot merge FIXED types with different sizes"));
    }
}
 
Example 5
Source Project: nifi   File: TestAvroTypeUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertAvroRecordToMapWithFieldTypeOfFixedAndLogicalTypeDecimal() {
   // Create a field schema like {"type":"fixed","name":"amount","size":16,"logicalType":"decimal","precision":18,"scale":8}
   final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.createFixed("amount", null, null, 16);
    decimalType.addToSchema(fieldSchema);

    // Create a field named "amount" using the field schema above
    final Schema.Field field = new Schema.Field("amount", fieldSchema, null, (Object)null);

    // Create an overall record schema with the amount field
    final Schema avroSchema = Schema.createRecord(Collections.singletonList(field));

    // Create an example Avro record with the amount field of type fixed and a logical type of decimal
    final BigDecimal expectedBigDecimal = new BigDecimal("1234567890.12345678");
    final GenericRecord genericRecord = new GenericData.Record(avroSchema);
    genericRecord.put("amount", new Conversions.DecimalConversion().toFixed(expectedBigDecimal, fieldSchema, decimalType));

    // Convert the Avro schema to a Record schema
    thenConvertAvroSchemaToRecordSchema(avroSchema, expectedBigDecimal, genericRecord);
}
 
Example 6
Source Project: nifi   File: TestAvroTypeUtil.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFixedDecimalConversion(){
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.createFixed("mydecimal", "no doc", "myspace", 18);
    decimalType.addToSchema(fieldSchema);
    final Object convertedValue = AvroTypeUtil.convertToAvroObject("2.5", fieldSchema, StandardCharsets.UTF_8);
    assertTrue(convertedValue instanceof GenericFixed);
    final GenericFixed genericFixed = (GenericFixed)convertedValue;
    final BigDecimal bigDecimal = new Conversions.DecimalConversion().fromFixed(genericFixed, fieldSchema, decimalType);
    assertEquals(new BigDecimal("2.5").setScale(8), bigDecimal);
}
 
Example 7
public void testSupportedParquetTypes() throws IOException, SQLException {
  String[] argv = {};
  final int TOTAL_RECORDS = 1 * 10;

  byte[] b = new byte[] { (byte) 1, (byte) 2 };
  Schema fixed = Schema.createFixed("myfixed", null, null, 2);
  Schema enumeration = Schema.createEnum("myenum", null, null,
      Lists.newArrayList("a", "b"));

  ColumnGenerator[] gens = new ColumnGenerator[] {
    colGenerator(true, Schema.create(Schema.Type.BOOLEAN), true, "BIT"),
    colGenerator(100, Schema.create(Schema.Type.INT), 100, "INTEGER"),
    colGenerator(200L, Schema.create(Schema.Type.LONG), 200L, "BIGINT"),
    // HSQLDB maps REAL to double, not float:
    colGenerator(1.0f, Schema.create(Schema.Type.FLOAT), 1.0d, "REAL"),
    colGenerator(2.0d, Schema.create(Schema.Type.DOUBLE), 2.0d, "DOUBLE"),
    colGenerator("s", Schema.create(Schema.Type.STRING), "s", "VARCHAR(8)"),
    colGenerator(ByteBuffer.wrap(b), Schema.create(Schema.Type.BYTES),
        b, "VARBINARY(8)"),
    colGenerator(new GenericData.Fixed(fixed, b), fixed,
        b, "BINARY(2)"),
    colGenerator(new GenericData.EnumSymbol(enumeration, "a"), enumeration,
        "a", "VARCHAR(8)"),
  };
  createParquetFile(0, TOTAL_RECORDS, gens);
  createTable(gens);
  runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1)));
  verifyExport(TOTAL_RECORDS);
  for (int i = 0; i < gens.length; i++) {
    assertColMinAndMax(forIdx(i), gens[i]);
  }
}
 
Example 8
public void testSupportedAvroTypes() throws IOException, SQLException {
  String[] argv = {};
  final int TOTAL_RECORDS = 1 * 10;

  byte[] b = new byte[] { (byte) 1, (byte) 2 };
  Schema fixed = Schema.createFixed("myfixed", null, null, 2);
  Schema enumeration = Schema.createEnum("myenum", null, null,
      Lists.newArrayList("a", "b"));

  ColumnGenerator[] gens = new ColumnGenerator[] {
    colGenerator(true, Schema.create(Schema.Type.BOOLEAN), true, "BIT"),
    colGenerator(100, Schema.create(Schema.Type.INT), 100, "INTEGER"),
    colGenerator(200L, Schema.create(Schema.Type.LONG), 200L, "BIGINT"),
    // HSQLDB maps REAL to double, not float:
    colGenerator(1.0f, Schema.create(Schema.Type.FLOAT), 1.0d, "REAL"),
    colGenerator(2.0d, Schema.create(Schema.Type.DOUBLE), 2.0d, "DOUBLE"),
    colGenerator("s", Schema.create(Schema.Type.STRING), "s", "VARCHAR(8)"),
    colGenerator(ByteBuffer.wrap(b), Schema.create(Schema.Type.BYTES),
        b, "VARBINARY(8)"),
    colGenerator(new GenericData.Fixed(fixed, b), fixed,
        b, "BINARY(2)"),
    colGenerator(new GenericData.EnumSymbol(enumeration, "a"), enumeration,
        "a", "VARCHAR(8)"),
  };
  createAvroFile(0, TOTAL_RECORDS, gens);
  createTable(gens);
  runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1)));
  verifyExport(TOTAL_RECORDS);
  for (int i = 0; i < gens.length; i++) {
    assertColMinAndMax(forIdx(i), gens[i]);
  }
}
 
Example 9
@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
@SuppressWarnings("unchecked")
public void shouldReadGenericDefaults(Boolean whetherUseFastDeserializer) throws IOException {
  // given
  Schema oldRecordSchema = Schema.parse(this.getClass().getResourceAsStream("/schema/defaultsTestOld.avsc"));
  GenericData.Record oldRecord = new GenericData.Record(oldRecordSchema);
  GenericData.Record oldSubRecord = new GenericData.Record(oldRecordSchema.getField("oldSubRecord").schema());
  oldSubRecord.put("oldSubField", new Utf8("testValueOfSubField"));
  oldSubRecord.put("fieldToBeRemoved", 33);
  oldRecord.put("oldSubRecord", oldSubRecord);

  // when
  GenericRecord testRecord = null;
  if (whetherUseFastDeserializer) {
    testRecord = decodeGenericFast(DefaultsTestRecord.SCHEMA$, oldRecordSchema, genericDataAsDecoder(oldRecord));
  } else {
    testRecord = decodeGenericSlow(DefaultsTestRecord.SCHEMA$, oldRecordSchema, genericDataAsDecoder(oldRecord));
  }

  // then
  Assert.assertEquals(oldSubRecord.get("oldSubField"),
      ((GenericData.Record) testRecord.get("oldSubRecord")).get("oldSubField"));
  Assert.assertEquals(new Utf8("defaultOldSubField"),
      ((GenericData.Record) testRecord.get("newFieldWithOldSubRecord")).get("oldSubField"));
  Assert.assertEquals(42, (int) testRecord.get("testInt"));
  Assert.assertNull(testRecord.get("testIntUnion"));
  Assert.assertEquals(9223372036854775807L, (long) testRecord.get("testLong"));
  Assert.assertNull(testRecord.get("testLongUnion"));
  Assert.assertEquals(3.14d, (double) testRecord.get("testDouble"), 0);
  Assert.assertNull(testRecord.get("testDoubleUnion"));
  Assert.assertEquals(3.14f, (float) testRecord.get("testFloat"), 0);
  Assert.assertNull(testRecord.get("testFloatUnion"));
  Assert.assertEquals(true, testRecord.get("testBoolean"));
  Assert.assertNull(testRecord.get("testBooleanUnion"));
  Assert.assertEquals(ByteBuffer.wrap("1234".getBytes()), testRecord.get("testBytes"));
  Assert.assertNull(testRecord.get("testBytesUnion"));
  Assert.assertEquals(new Utf8("testStringValue"), testRecord.get("testString"));
  Assert.assertEquals(new Utf8("http://www.example.com"), testRecord.get("testStringable"));

  Assert.assertNull(testRecord.get("testStringUnion"));
  Schema fixedSchema = Schema.createFixed("DefaultsFixed", "", "", 1);
  GenericData.Fixed expectedFixed1 = AvroCompatibilityHelper.newFixedField(fixedSchema, new byte[]{(byte) '5'});
  Assert.assertEquals(expectedFixed1, testRecord.get("testFixed"));
  Assert.assertNull(testRecord.get("testFixedUnion"));
  GenericData.Fixed expectedFixed2 = AvroCompatibilityHelper.newFixedField(fixedSchema, new byte[]{(byte) '6'});
  Assert.assertTrue(Arrays.asList(expectedFixed2).equals(testRecord.get("testFixedArray")));

  List listWithNull = new LinkedList();
  listWithNull.add(null);
  Assert.assertTrue(listWithNull.equals(testRecord.get("testFixedUnionArray")));
  Assert.assertEquals("C", testRecord.get("testEnum").toString());
  Assert.assertNull(testRecord.get("testEnumUnion"));
  Schema enumSchema = Schema.createEnum("DefaultsNewEnum", "", "", Arrays.asList("A", "B"));
  Assert.assertTrue(Arrays.asList(Arrays.asList(AvroCompatibilityHelper.newEnumSymbol(enumSchema, "B")))
      .equals(testRecord.get("testNewEnumIntUnionArray")));

  Assert.assertEquals("E", ((List<GenericData.EnumSymbol>) testRecord.get("testEnumArray")).get(0).toString());
  Assert.assertEquals("B", ((List<GenericData.EnumSymbol>) testRecord.get("testEnumArray")).get(1).toString());
  Assert.assertTrue(listWithNull.equals(testRecord.get("testEnumUnionArray")));
  Assert.assertNull(testRecord.get("subRecordUnion"));
  Assert.assertEquals(newGenericSubRecord("valueOfSubField", null, "A"), testRecord.get("subRecord"));
  Assert.assertTrue(
      Arrays.asList(newGenericSubRecord("recordArrayValue", null, "A")).equals(testRecord.get("recordArray")));
  Assert.assertTrue(listWithNull.equals(testRecord.get("recordUnionArray")));

  Map stringableMap = new HashMap();
  stringableMap.put(new Utf8("http://www.example2.com"), new Utf8("123"));
  Assert.assertEquals(stringableMap, testRecord.get("stringableMap"));

  Map recordMap = new HashMap();
  recordMap.put(new Utf8("test"), newGenericSubRecord("recordMapValue", null, "A"));
  Assert.assertEquals(recordMap, testRecord.get("recordMap"));

  Map recordUnionMap = new HashMap();
  recordUnionMap.put(new Utf8("test"), null);
  Assert.assertEquals(recordUnionMap, testRecord.get("recordUnionMap"));
  Assert.assertTrue(
      new ArrayList(Collections.singletonList(recordUnionMap)).equals(testRecord.get("recordUnionMapArray")));

  Map recordUnionArrayMap = new HashMap();
  recordUnionArrayMap.put(new Utf8("test"), listWithNull);
  Assert.assertTrue(recordUnionArrayMap.equals(testRecord.get("recordUnionArrayMap")));
}
 
Example 10
public static Schema createFixedSchema(String name, int size) {
  return Schema.createFixed(name, "", "com.adpilot.utils.generated.avro", size);
}
 
Example 11
public static Schema generateRandomFixedSchema() {
    return Schema.createFixed("Fixed" + RandomStringUtils.randomAlphabetic(5), null, NAMESPACE,
            RandomUtils.nextInt(1, 10));
}
 
Example 12
public static Schema createFixedSchema(String name, int size) {
    return Schema.createFixed(name, "", NAMESPACE, size);
}
 
Example 13
Source Project: funcj   File: GenerateSchema.java    License: MIT License 4 votes vote down vote up
@Override
public Schema byteP(IList<String> path, String name) {
    return Schema.createFixed(toName(path, name), null, null, 1);
}
 
Example 14
Source Project: funcj   File: AvroSchemaCodecFormat.java    License: MIT License 4 votes vote down vote up
@Override
public Object encodePrim(byte value, Object out) {
    return Schema.createFixed((String)out, null, null, 1);
}
 
Example 15
Source Project: kite   File: AvroParquetMorphlineTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/all.avsc"));

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());
  
  AvroParquetWriter<GenericRecord> writer = new
      AvroParquetWriter<GenericRecord>(file, schema);

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
          .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

  writer.write(record);
  writer.close();

  morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema");
  
  Record morphlineRecord = new Record();
  morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
  collector.reset();
  
  assertTrue(morphline.process(morphlineRecord));

  assertEquals(1, collector.getRecords().size());
  GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
  assertNotNull(actualRecord);
  assertEquals(null, actualRecord.get("mynull"));
  assertEquals(true, actualRecord.get("myboolean"));
  assertEquals(1, actualRecord.get("myint"));
  assertEquals(2L, actualRecord.get("mylong"));
  assertEquals(null, actualRecord.get("myfloat"));
  assertEquals(4.1, actualRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), actualRecord.get("mybytes"));
  assertEquals("hello", actualRecord.get("mystring"));
  assertEquals("a", actualRecord.get("myenum"));
  assertEquals(nestedRecord, actualRecord.get("mynestedrecord"));
  assertEquals(integerArray, actualRecord.get("myarray"));
  assertEquals(emptyArray, actualRecord.get("myemptyarray"));
  assertEquals(integerArray, actualRecord.get("myoptionalarray"));
  assertEquals(ImmutableMap.of("a", 1, "b", 2), actualRecord.get("mymap"));
  assertEquals(emptyMap, actualRecord.get("myemptymap"));
  assertEquals(genericFixed, actualRecord.get("myfixed"));
}
 
Example 16
Source Project: incubator-gobblin   File: AvroUtils.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Copies the input {@link org.apache.avro.Schema} but changes the schema namespace.
 * @param schema {@link org.apache.avro.Schema} to copy.
 * @param namespaceOverride namespace for the copied {@link org.apache.avro.Schema}.
 * @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the new namespace.
 */
public static Schema switchNamespace(Schema schema, Map<String, String> namespaceOverride) {
  Schema newSchema;
  String newNamespace = StringUtils.EMPTY;

  // Process all Schema Types
  // (Primitives are simply cloned)
  switch (schema.getType()) {
    case ENUM:
      newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace())
          : schema.getNamespace();
      newSchema =
          Schema.createEnum(schema.getName(), schema.getDoc(), newNamespace, schema.getEnumSymbols());
      break;
    case FIXED:
      newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace())
          : schema.getNamespace();
      newSchema =
          Schema.createFixed(schema.getName(), schema.getDoc(), newNamespace, schema.getFixedSize());
      break;
    case MAP:
      newSchema = Schema.createMap(switchNamespace(schema.getValueType(), namespaceOverride));
      break;
    case RECORD:
      newNamespace = namespaceOverride.containsKey(schema.getNamespace()) ? namespaceOverride.get(schema.getNamespace())
          : schema.getNamespace();
      List<Schema.Field> newFields = new ArrayList<>();
      if (schema.getFields().size() > 0) {
        for (Schema.Field oldField : schema.getFields()) {
          Field newField = new Field(oldField.name(), switchNamespace(oldField.schema(), namespaceOverride), oldField.doc(),
              oldField.defaultValue(), oldField.order());
          newFields.add(newField);
        }
      }
      newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), newNamespace,
          schema.isError());
      newSchema.setFields(newFields);
      break;
    case UNION:
      List<Schema> newUnionMembers = new ArrayList<>();
      if (null != schema.getTypes() && schema.getTypes().size() > 0) {
        for (Schema oldUnionMember : schema.getTypes()) {
          newUnionMembers.add(switchNamespace(oldUnionMember, namespaceOverride));
        }
      }
      newSchema = Schema.createUnion(newUnionMembers);
      break;
    case ARRAY:
      newSchema = Schema.createArray(switchNamespace(schema.getElementType(), namespaceOverride));
      break;
    case BOOLEAN:
    case BYTES:
    case DOUBLE:
    case FLOAT:
    case INT:
    case LONG:
    case NULL:
    case STRING:
      newSchema = Schema.create(schema.getType());
      break;
    default:
      String exceptionMessage = String.format("Schema namespace replacement failed for \"%s\" ", schema);
      LOG.error(exceptionMessage);

      throw new AvroRuntimeException(exceptionMessage);
  }

  // Copy schema metadata
  copyProperties(schema, newSchema);

  return newSchema;
}
 
Example 17
Source Project: incubator-gobblin   File: AvroFlattener.java    License: Apache License 2.0 4 votes vote down vote up
/***
 * Flatten the Schema to un-nest recursive Records (to make it optimal for ORC)
 * @param schema Schema to flatten
 * @param shouldPopulateLineage is set to true if the field is going to be flattened and moved up the hierarchy -
 *                              so that lineage information can be tagged to it; which happens when there is a
 *                              Record within a Record OR Record within Option within Record and so on,
 *                              however not when there is a Record within Map or Array
 * @param flattenComplexTypes Flatten complex types recursively other than Record and Option
 * @return Flattened Avro Schema
 */
private Schema flatten(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) {
  Schema flattenedSchema;

  // Process all Schema Types
  // (Primitives are simply cloned)
  switch (schema.getType()) {
    case ARRAY:
      // Array might be an array of recursive Records, flatten them
      if (flattenComplexTypes) {
        flattenedSchema = Schema.createArray(flatten(schema.getElementType(), false));
      } else {
        flattenedSchema = Schema.createArray(schema.getElementType());
      }
      break;
    case BOOLEAN:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case BYTES:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case DOUBLE:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case ENUM:
      flattenedSchema =
          Schema.createEnum(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getEnumSymbols());
      break;
    case FIXED:
      flattenedSchema =
          Schema.createFixed(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.getFixedSize());
      break;
    case FLOAT:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case INT:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case LONG:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case MAP:
      if (flattenComplexTypes) {
        flattenedSchema = Schema.createMap(flatten(schema.getValueType(), false));
      } else {
        flattenedSchema = Schema.createMap(schema.getValueType());
      }
      break;
    case NULL:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case RECORD:
      flattenedSchema = flattenRecord(schema, shouldPopulateLineage, flattenComplexTypes);
      break;
    case STRING:
      flattenedSchema = Schema.create(schema.getType());
      break;
    case UNION:
      flattenedSchema = flattenUnion(schema, shouldPopulateLineage, flattenComplexTypes);
      break;
    default:
      String exceptionMessage = String.format("Schema flattening failed for \"%s\" ", schema);
      LOG.error(exceptionMessage);

      throw new AvroRuntimeException(exceptionMessage);
  }

  // Copy schema metadata
  copyProperties(schema, flattenedSchema);

  return flattenedSchema;
}
 
Example 18
Source Project: parquet-mr   File: TestReadWrite.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  Path file = new Path(createTempFile().getPath());
  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Record nestedRecord = new GenericRecordBuilder(
    schema.getField("mynestedrecord").schema())
    .set("mynestedint", 1).build();
  List<Integer> emptyArray = new ArrayList<Integer>();
  Schema arrayOfOptionalIntegers = Schema.createArray(
    optional(Schema.create(Schema.Type.INT)));
  GenericData.Array<Integer> genericIntegerArrayWithNulls =
    new GenericData.Array<Integer>(
      arrayOfOptionalIntegers,
      Arrays.asList(1, null, 2, null, 3));
  GenericFixed genericFixed = new GenericData.Fixed(
    Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65});
  ImmutableMap<String, Integer> emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  try(ParquetWriter<GenericRecord> writer = AvroParquetWriter
      .<GenericRecord>builder(file)
      .withSchema(schema)
      .withConf(testConf)
      .build()) {

    GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

    GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArrayWithNulls)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

    writer.write(record);
  }

  final GenericRecord nextRecord;
  try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file)) {
    nextRecord = reader.read();
  }

  Object expectedEnumSymbol = compat ? "a" :
      new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a");

  assertNotNull(nextRecord);
  assertEquals(null, nextRecord.get("mynull"));
  assertEquals(true, nextRecord.get("myboolean"));
  assertEquals(1, nextRecord.get("myint"));
  assertEquals(2L, nextRecord.get("mylong"));
  assertEquals(3.1f, nextRecord.get("myfloat"));
  assertEquals(4.1, nextRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
  assertEquals(str("hello"), nextRecord.get("mystring"));
  assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
  assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
  assertEquals(integerArray, nextRecord.get("myarray"));
  assertEquals(emptyArray, nextRecord.get("myemptyarray"));
  assertEquals(integerArray, nextRecord.get("myoptionalarray"));
  assertEquals(genericIntegerArrayWithNulls, nextRecord.get("myarrayofoptional"));
  assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
  assertEquals(emptyMap, nextRecord.get("myemptymap"));
  assertEquals(genericFixed, nextRecord.get("myfixed"));
}
 
Example 19
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
          .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65});

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  try(AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<>(file, schema)) {
    GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArray)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

    writer.write(record);
  }

  try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<>(testConf, file)) {
    GenericRecord nextRecord = reader.read();

    Object expectedEnumSymbol = compat ? "a" :
      new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a");

    assertNotNull(nextRecord);
    assertEquals(null, nextRecord.get("mynull"));
    assertEquals(true, nextRecord.get("myboolean"));
    assertEquals(1, nextRecord.get("myint"));
    assertEquals(2L, nextRecord.get("mylong"));
    assertEquals(3.1f, nextRecord.get("myfloat"));
    assertEquals(4.1, nextRecord.get("mydouble"));
    assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
    assertEquals(str("hello"), nextRecord.get("mystring"));
    assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
    assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
    assertEquals(integerArray, nextRecord.get("myarray"));
    assertEquals(emptyArray, nextRecord.get("myemptyarray"));
    assertEquals(integerArray, nextRecord.get("myoptionalarray"));
    assertEquals(integerArray, nextRecord.get("myarrayofoptional"));
    assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
    assertEquals(emptyMap, nextRecord.get("myemptymap"));
    assertEquals(genericFixed, nextRecord.get("myfixed"));
  }
}
 
Example 20
@Test
public void testArrayWithNullValues() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
      .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  Schema arrayOfOptionalIntegers = Schema.createArray(
      optional(Schema.create(Schema.Type.INT)));
  GenericData.Array<Integer> genericIntegerArrayWithNulls =
      new GenericData.Array<>(
          arrayOfOptionalIntegers,
          Arrays.asList(1, null, 2, null, 3));

  GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArrayWithNulls)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

  try (AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<>(file, schema)) {
    writer.write(record);
    fail("Should not succeed writing an array with null values");
  } catch (Exception e) {
    Assert.assertTrue("Error message should provide context and help",
      e.getMessage().contains("parquet.avro.write-old-list-structure"));
  }
}