org.apache.avro.generic.GenericFixed Java Examples

The following examples show how to use org.apache.avro.generic.GenericFixed. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroColumnDecoder.java    From presto with Apache License 2.0 6 votes vote down vote up
private static Slice getSlice(Object value, Type type, String columnName)
{
    if (type instanceof VarcharType && (value instanceof CharSequence || value instanceof GenericEnumSymbol)) {
        return truncateToLength(utf8Slice(value.toString()), type);
    }

    if (type instanceof VarbinaryType) {
        if (value instanceof ByteBuffer) {
            return Slices.wrappedBuffer((ByteBuffer) value);
        }
        else if (value instanceof GenericFixed) {
            return Slices.wrappedBuffer(((GenericFixed) value).bytes());
        }
    }

    throw new PrestoException(DECODER_CONVERSION_NOT_SUPPORTED, format("cannot decode object of '%s' as '%s' for column '%s'", value.getClass(), type, columnName));
}
 
Example #2
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testFixedDecimalConversion(){
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.createFixed("mydecimal", "no doc", "myspace", 18);
    decimalType.addToSchema(fieldSchema);
    final Object convertedValue = AvroTypeUtil.convertToAvroObject("2.5", fieldSchema, StandardCharsets.UTF_8);
    assertTrue(convertedValue instanceof GenericFixed);
    final GenericFixed genericFixed = (GenericFixed)convertedValue;
    final BigDecimal bigDecimal = new Conversions.DecimalConversion().fromFixed(genericFixed, fieldSchema, decimalType);
    assertEquals(new BigDecimal("2.5").setScale(8), bigDecimal);
}
 
Example #3
Source File: AvroRowDataDeserializationSchema.java    From flink with Apache License 2.0 6 votes vote down vote up
private static DeserializationRuntimeConverter createDecimalConverter(DecimalType decimalType) {
	final int precision = decimalType.getPrecision();
	final int scale = decimalType.getScale();
	return avroObject -> {
		final byte[] bytes;
		if (avroObject instanceof GenericFixed) {
			bytes = ((GenericFixed) avroObject).bytes();
		} else if (avroObject instanceof ByteBuffer) {
			ByteBuffer byteBuffer = (ByteBuffer) avroObject;
			bytes = new byte[byteBuffer.remaining()];
			byteBuffer.get(bytes);
		} else {
			bytes = (byte[]) avroObject;
		}
		return DecimalData.fromUnscaledBytes(bytes, precision, scale);
	};
}
 
Example #4
Source File: AvroUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
protected StackManipulation convertDefault(TypeDescriptor<?> type) {
  final ForLoadedType byteArrayType = new ForLoadedType(byte[].class);
  if (type.isSubtypeOf(TypeDescriptor.of(GenericFixed.class))) {
    // Generate the following code:
    // return new T((byte[]) value);
    ForLoadedType loadedType = new ForLoadedType(type.getRawType());
    return new Compound(
        TypeCreation.of(loadedType),
        Duplication.SINGLE,
        // Load the parameter and cast it to a byte[].
        readValue,
        TypeCasting.to(byteArrayType),
        // Create a new instance that wraps this byte[].
        MethodInvocation.invoke(
            loadedType
                .getDeclaredMethods()
                .filter(
                    ElementMatchers.isConstructor()
                        .and(ElementMatchers.takesArguments(byteArrayType)))
                .getOnly()));
  }
  return super.convertDefault(type);
}
 
Example #5
Source File: AvroUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
protected StackManipulation convertDefault(TypeDescriptor<?> type) {
  if (type.isSubtypeOf(TypeDescriptor.of(GenericFixed.class))) {
    // Generate the following code:
    // return value.bytes();
    return new Compound(
        readValue,
        MethodInvocation.invoke(
            new ForLoadedType(GenericFixed.class)
                .getDeclaredMethods()
                .filter(
                    ElementMatchers.named("bytes")
                        .and(ElementMatchers.returns(new ForLoadedType(byte[].class))))
                .getOnly()));
  }
  return super.convertDefault(type);
}
 
Example #6
Source File: AvroDecoderTestUtil.java    From presto with Apache License 2.0 5 votes vote down vote up
public static void checkPrimitiveValue(Object actual, Object expected)
{
    if (actual == null || expected == null) {
        assertNull(expected);
        assertNull(actual);
    }
    else if (actual instanceof CharSequence) {
        assertTrue(expected instanceof CharSequence || expected instanceof GenericEnumSymbol);
        assertEquals(actual.toString(), expected.toString());
    }
    else if (actual instanceof SqlVarbinary) {
        if (expected instanceof GenericFixed) {
            assertEquals(((SqlVarbinary) actual).getBytes(), ((GenericFixed) expected).bytes());
        }
        else if (expected instanceof ByteBuffer) {
            assertEquals(((SqlVarbinary) actual).getBytes(), ((ByteBuffer) expected).array());
        }
        else {
            fail(format("Unexpected value type %s", actual.getClass()));
        }
    }
    else if (isIntegralType(actual) && isIntegralType(expected)) {
        assertEquals(((Number) actual).longValue(), ((Number) expected).longValue());
    }
    else if (isRealType(actual) && isRealType(expected)) {
        assertEquals(((Number) actual).doubleValue(), ((Number) expected).doubleValue());
    }
    else {
        assertEquals(actual, expected);
    }
}
 
Example #7
Source File: PigAvroDatumReader.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Called to read a fixed value. Overridden to read a pig byte array.
 */
@Override
protected Object readFixed(Object old, Schema expected, Decoder in) throws IOException {
    GenericFixed fixed = (GenericFixed) super.readFixed(old, expected, in);
    DataByteArray byteArray = new DataByteArray(fixed.bytes());
    return byteArray;
 }
 
Example #8
Source File: PigAvroDatumReader.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Called to read a fixed value. Overridden to read a pig byte array.
 */
@Override
protected Object readFixed(Object old, Schema expected, Decoder in) throws IOException {
    GenericFixed fixed = (GenericFixed) super.readFixed(old, expected, in);
    DataByteArray byteArray = new DataByteArray(fixed.bytes());
    return byteArray;
 }
 
Example #9
Source File: GeneratorFunctions.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public GenericFixed apply(GeneratorContext input) {
    if (schema == null) {
        schema = new Schema.Parser().parse(jsonSchema);
    }
    byte[] buffer = new byte[size];
    input.getRandom().nextBytes(buffer);
    return new GenericData.Fixed(schema, buffer);
}
 
Example #10
Source File: AvroUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
protected java.lang.reflect.Type convertDefault(TypeDescriptor<?> type) {
  if (type.isSubtypeOf(TypeDescriptor.of(GenericFixed.class))) {
    return byte[].class;
  } else {
    return super.convertDefault(type);
  }
}
 
Example #11
Source File: UUIDConversion.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public GenericFixed toFixed(UUID value, Schema schema, LogicalType type) {
  ByteBuffer buffer = ByteBuffer.allocate(16);
  buffer.order(ByteOrder.BIG_ENDIAN);
  buffer.putLong(value.getMostSignificantBits());
  buffer.putLong(value.getLeastSignificantBits());
  return new GenericData.Fixed(schema, buffer.array());
}
 
Example #12
Source File: UUIDConversion.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public UUID fromFixed(GenericFixed value, Schema schema, LogicalType type) {
  ByteBuffer buffer = ByteBuffer.wrap(value.bytes());
  buffer.order(ByteOrder.BIG_ENDIAN);
  long mostSigBits = buffer.getLong();
  long leastSigBits = buffer.getLong();
  return new UUID(mostSigBits, leastSigBits);
}
 
Example #13
Source File: UUIDConversion.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public UUID fromFixed(GenericFixed value, Schema schema, LogicalType type) {
  ByteBuffer buffer = ByteBuffer.wrap(value.bytes());
  buffer.order(ByteOrder.BIG_ENDIAN);
  long mostSigBits = buffer.getLong();
  long leastSigBits = buffer.getLong();
  return new UUID(mostSigBits, leastSigBits);
}
 
Example #14
Source File: UUIDConversion.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public GenericFixed toFixed(UUID value, Schema schema, LogicalType type) {
  ByteBuffer buffer = ByteBuffer.allocate(16);
  buffer.order(ByteOrder.BIG_ENDIAN);
  buffer.putLong(value.getMostSignificantBits());
  buffer.putLong(value.getLeastSignificantBits());
  return new GenericData.Fixed(schema, buffer.array());
}
 
Example #15
Source File: AvroWriteSupportInt96Avro17.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private void writeValue(Type type, Schema avroSchema, Object value) {
  Schema nonNullAvroSchema = AvroSchemaConverter.getNonNull(avroSchema);
  Schema.Type avroType = nonNullAvroSchema.getType();
  if (avroType.equals(Schema.Type.BOOLEAN)) {
    recordConsumer.addBoolean((Boolean) value);
  } else if (avroType.equals(Schema.Type.INT)) {
    if (value instanceof Character) {
      recordConsumer.addInteger((Character) value);
    } else {
      recordConsumer.addInteger(((Number) value).intValue());
    }
  } else if (avroType.equals(Schema.Type.LONG)) {
    if (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) {
      final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1);
      final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1);
      final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);

      long timestamp = ((Number) value).longValue();
      Calendar calendar;
      if (timeZoneId != null && ! timeZoneId.isEmpty()) {
        calendar = Calendar.getInstance(TimeZone.getTimeZone(timeZoneId));
      } else {
        calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
      }
      calendar.setTime(new Date(timestamp));

      // Calculate Julian days and nanoseconds in the day
      LocalDate dt = LocalDate.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH)+1, calendar.get(Calendar.DAY_OF_MONTH));
      int julianDays = (int) JulianFields.JULIAN_DAY.getFrom(dt);
      long nanos = (calendar.get(Calendar.HOUR_OF_DAY) * NANOS_PER_HOUR)
          + (calendar.get(Calendar.MINUTE) * NANOS_PER_MINUTE)
          + (calendar.get(Calendar.SECOND) * NANOS_PER_SECOND);

      // Write INT96 timestamp
      byte[] timestampBuffer = new byte[12];
      ByteBuffer buf = ByteBuffer.wrap(timestampBuffer);
      buf.order(ByteOrder.LITTLE_ENDIAN).putLong(nanos).putInt(julianDays);

      // This is the properly encoded INT96 timestamp
      Binary timestampBinary = Binary.fromReusedByteArray(timestampBuffer);
      recordConsumer.addBinary(timestampBinary);
    } else {
      recordConsumer.addLong(((Number) value).longValue());
    }
  } else if (avroType.equals(Schema.Type.FLOAT)) {
    recordConsumer.addFloat(((Number) value).floatValue());
  } else if (avroType.equals(Schema.Type.DOUBLE)) {
    recordConsumer.addDouble(((Number) value).doubleValue());
  } else if (avroType.equals(Schema.Type.BYTES)) {
    if (value instanceof byte[]) {
      recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) value));
    } else {
      recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) value));
    }
  } else if (avroType.equals(Schema.Type.STRING)) {
    recordConsumer.addBinary(fromAvroString(value));
  } else if (avroType.equals(Schema.Type.RECORD)) {
    writeRecord(type.asGroupType(), nonNullAvroSchema, value);
  } else if (avroType.equals(Schema.Type.ENUM)) {
    recordConsumer.addBinary(Binary.fromString(value.toString()));
  } else if (avroType.equals(Schema.Type.ARRAY)) {
    listWriter.writeList(type.asGroupType(), nonNullAvroSchema, value);
  } else if (avroType.equals(Schema.Type.MAP)) {
    writeMap(type.asGroupType(), nonNullAvroSchema, (Map<CharSequence, ?>) value);
  } else if (avroType.equals(Schema.Type.UNION)) {
    writeUnion(type.asGroupType(), nonNullAvroSchema, value);
  } else if (avroType.equals(Schema.Type.FIXED)) {
    recordConsumer.addBinary(Binary.fromReusedByteArray(((GenericFixed) value).bytes()));
  }
}
 
Example #16
Source File: AvroParquetMorphlineTest.java    From kite with Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/all.avsc"));

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());
  
  AvroParquetWriter<GenericRecord> writer = new
      AvroParquetWriter<GenericRecord>(file, schema);

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
          .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

  writer.write(record);
  writer.close();

  morphline = createMorphline("test-morphlines/readAvroParquetFileWithProjectionSubSchema");
  
  Record morphlineRecord = new Record();
  morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString());
  collector.reset();
  
  assertTrue(morphline.process(morphlineRecord));

  assertEquals(1, collector.getRecords().size());
  GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY);
  assertNotNull(actualRecord);
  assertEquals(null, actualRecord.get("mynull"));
  assertEquals(true, actualRecord.get("myboolean"));
  assertEquals(1, actualRecord.get("myint"));
  assertEquals(2L, actualRecord.get("mylong"));
  assertEquals(null, actualRecord.get("myfloat"));
  assertEquals(4.1, actualRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), actualRecord.get("mybytes"));
  assertEquals("hello", actualRecord.get("mystring"));
  assertEquals("a", actualRecord.get("myenum"));
  assertEquals(nestedRecord, actualRecord.get("mynestedrecord"));
  assertEquals(integerArray, actualRecord.get("myarray"));
  assertEquals(emptyArray, actualRecord.get("myemptyarray"));
  assertEquals(integerArray, actualRecord.get("myoptionalarray"));
  assertEquals(ImmutableMap.of("a", 1, "b", 2), actualRecord.get("mymap"));
  assertEquals(emptyMap, actualRecord.get("myemptymap"));
  assertEquals(genericFixed, actualRecord.get("myfixed"));
}
 
Example #17
Source File: Generator.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 4 votes vote down vote up
private GenericFixed generateFixed(Schema schema) {
  byte[] bytes = new byte[schema.getFixedSize()];
  random.nextBytes(bytes);
  return new GenericData.Fixed(schema, bytes);
}
 
Example #18
Source File: ParquetAvro.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public BigDecimal fromFixed(GenericFixed value, Schema schema, LogicalType type) {
  return super.fromFixed(value, schema, decimalsByScale[((ParquetDecimal) type).scale()]);
}
 
Example #19
Source File: TestReadWriteOldListBehavior.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testArrayWithNullValues() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
      .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  Schema arrayOfOptionalIntegers = Schema.createArray(
      optional(Schema.create(Schema.Type.INT)));
  GenericData.Array<Integer> genericIntegerArrayWithNulls =
      new GenericData.Array<>(
          arrayOfOptionalIntegers,
          Arrays.asList(1, null, 2, null, 3));

  GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArrayWithNulls)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

  try (AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<>(file, schema)) {
    writer.write(record);
    fail("Should not succeed writing an array with null values");
  } catch (Exception e) {
    Assert.assertTrue("Error message should provide context and help",
      e.getMessage().contains("parquet.avro.write-old-list-structure"));
  }
}
 
Example #20
Source File: TestReadWriteOldListBehavior.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
  tmp.deleteOnExit();
  tmp.delete();
  Path file = new Path(tmp.getPath());

  GenericData.Record nestedRecord = new GenericRecordBuilder(
      schema.getField("mynestedrecord").schema())
          .set("mynestedint", 1).build();

  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

  GenericFixed genericFixed = new GenericData.Fixed(
      Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65});

  List<Integer> emptyArray = new ArrayList<Integer>();
  ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  try(AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<>(file, schema)) {
    GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArray)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

    writer.write(record);
  }

  try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<>(testConf, file)) {
    GenericRecord nextRecord = reader.read();

    Object expectedEnumSymbol = compat ? "a" :
      new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a");

    assertNotNull(nextRecord);
    assertEquals(null, nextRecord.get("mynull"));
    assertEquals(true, nextRecord.get("myboolean"));
    assertEquals(1, nextRecord.get("myint"));
    assertEquals(2L, nextRecord.get("mylong"));
    assertEquals(3.1f, nextRecord.get("myfloat"));
    assertEquals(4.1, nextRecord.get("mydouble"));
    assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
    assertEquals(str("hello"), nextRecord.get("mystring"));
    assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
    assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
    assertEquals(integerArray, nextRecord.get("myarray"));
    assertEquals(emptyArray, nextRecord.get("myemptyarray"));
    assertEquals(integerArray, nextRecord.get("myoptionalarray"));
    assertEquals(integerArray, nextRecord.get("myarrayofoptional"));
    assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
    assertEquals(emptyMap, nextRecord.get("myemptymap"));
    assertEquals(genericFixed, nextRecord.get("myfixed"));
  }
}
 
Example #21
Source File: TestReadWrite.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testAll() throws Exception {
  Schema schema = new Schema.Parser().parse(
      Resources.getResource("all.avsc").openStream());

  Path file = new Path(createTempFile().getPath());
  List<Integer> integerArray = Arrays.asList(1, 2, 3);
  GenericData.Record nestedRecord = new GenericRecordBuilder(
    schema.getField("mynestedrecord").schema())
    .set("mynestedint", 1).build();
  List<Integer> emptyArray = new ArrayList<Integer>();
  Schema arrayOfOptionalIntegers = Schema.createArray(
    optional(Schema.create(Schema.Type.INT)));
  GenericData.Array<Integer> genericIntegerArrayWithNulls =
    new GenericData.Array<Integer>(
      arrayOfOptionalIntegers,
      Arrays.asList(1, null, 2, null, 3));
  GenericFixed genericFixed = new GenericData.Fixed(
    Schema.createFixed("fixed", null, null, 1), new byte[]{(byte) 65});
  ImmutableMap<String, Integer> emptyMap = new ImmutableMap.Builder<String, Integer>().build();

  try(ParquetWriter<GenericRecord> writer = AvroParquetWriter
      .<GenericRecord>builder(file)
      .withSchema(schema)
      .withConf(testConf)
      .build()) {

    GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
      Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);

    GenericData.Record record = new GenericRecordBuilder(schema)
      .set("mynull", null)
      .set("myboolean", true)
      .set("myint", 1)
      .set("mylong", 2L)
      .set("myfloat", 3.1f)
      .set("mydouble", 4.1)
      .set("mybytes", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
      .set("mystring", "hello")
      .set("mynestedrecord", nestedRecord)
      .set("myenum", "a")
      .set("myarray", genericIntegerArray)
      .set("myemptyarray", emptyArray)
      .set("myoptionalarray", genericIntegerArray)
      .set("myarrayofoptional", genericIntegerArrayWithNulls)
      .set("mymap", ImmutableMap.of("a", 1, "b", 2))
      .set("myemptymap", emptyMap)
      .set("myfixed", genericFixed)
      .build();

    writer.write(record);
  }

  final GenericRecord nextRecord;
  try(AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file)) {
    nextRecord = reader.read();
  }

  Object expectedEnumSymbol = compat ? "a" :
      new GenericData.EnumSymbol(schema.getField("myenum").schema(), "a");

  assertNotNull(nextRecord);
  assertEquals(null, nextRecord.get("mynull"));
  assertEquals(true, nextRecord.get("myboolean"));
  assertEquals(1, nextRecord.get("myint"));
  assertEquals(2L, nextRecord.get("mylong"));
  assertEquals(3.1f, nextRecord.get("myfloat"));
  assertEquals(4.1, nextRecord.get("mydouble"));
  assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)), nextRecord.get("mybytes"));
  assertEquals(str("hello"), nextRecord.get("mystring"));
  assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
  assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
  assertEquals(integerArray, nextRecord.get("myarray"));
  assertEquals(emptyArray, nextRecord.get("myemptyarray"));
  assertEquals(integerArray, nextRecord.get("myoptionalarray"));
  assertEquals(genericIntegerArrayWithNulls, nextRecord.get("myarrayofoptional"));
  assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
  assertEquals(emptyMap, nextRecord.get("myemptymap"));
  assertEquals(genericFixed, nextRecord.get("myfixed"));
}
 
Example #22
Source File: AvroWriteSupport.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * Calls an appropriate write method based on the value.
 * Value must not be null and the schema must not be nullable.
 *
 * @param type a Parquet type
 * @param avroSchema a non-nullable Avro schema
 * @param value a non-null value to write
 */
@SuppressWarnings("unchecked")
private void writeValueWithoutConversion(Type type, Schema avroSchema, Object value) {
  switch (avroSchema.getType()) {
    case BOOLEAN:
      recordConsumer.addBoolean((Boolean) value);
      break;
    case INT:
      if (value instanceof Character) {
        recordConsumer.addInteger((Character) value);
      } else {
        recordConsumer.addInteger(((Number) value).intValue());
      }
      break;
    case LONG:
      recordConsumer.addLong(((Number) value).longValue());
      break;
    case FLOAT:
      recordConsumer.addFloat(((Number) value).floatValue());
      break;
    case DOUBLE:
      recordConsumer.addDouble(((Number) value).doubleValue());
      break;
    case FIXED:
      recordConsumer.addBinary(Binary.fromReusedByteArray(((GenericFixed) value).bytes()));
      break;
    case BYTES:
      if (value instanceof byte[]) {
        recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) value));
      } else {
        recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) value));
      }
      break;
    case STRING:
      if (type.asPrimitiveType().getLogicalTypeAnnotation() instanceof UUIDLogicalTypeAnnotation) {
        recordConsumer.addBinary(fromUUIDString(value));
      } else {
        recordConsumer.addBinary(fromAvroString(value));
      }
      break;
    case RECORD:
      writeRecord(type.asGroupType(), avroSchema, value);
      break;
    case ENUM:
      recordConsumer.addBinary(Binary.fromString(value.toString()));
      break;
    case ARRAY:
      listWriter.writeList(type.asGroupType(), avroSchema, value);
      break;
    case MAP:
      writeMap(type.asGroupType(), avroSchema, (Map<CharSequence, ?>) value);
      break;
    case UNION:
      writeUnion(type.asGroupType(), avroSchema, value);
      break;
  }
}
 
Example #23
Source File: ParquetAvro.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) {
  return super.toFixed(value, schema, decimalsByScale[((ParquetDecimal) type).scale()]);
}
 
Example #24
Source File: AvroScanner.java    From tajo with Apache License 2.0 4 votes vote down vote up
/**
 * Reads the next Tuple from the Avro file.
 *
 * @return The next Tuple from the Avro file or null if end of file is
 *         reached.
 */
@Override
public Tuple next() throws IOException {
  if (!dataFileReader.hasNext()) {
    return null;
  }

  GenericRecord record = dataFileReader.next();
  for (int i = 0; i < projectionMap.length; ++i) {
    int columnIndex = projectionMap[i];
    Object value = record.get(columnIndex);
    if (value == null) {
      outTuple.put(i, NullDatum.get());
      continue;
    }

    // Get Avro type.
    Schema.Field avroField = avroFields.get(columnIndex);
    Schema nonNullAvroSchema = getNonNull(avroField.schema());
    Schema.Type avroType = nonNullAvroSchema.getType();

    // Get Tajo type.
    Column column = schema.getColumn(columnIndex);
    DataType dataType = column.getDataType();
    TajoDataTypes.Type tajoType = dataType.getType();
    switch (avroType) {
      case NULL:
        outTuple.put(i, NullDatum.get());
        break;
      case BOOLEAN:
        outTuple.put(i, DatumFactory.createBool((Boolean) value));
        break;
      case INT:
        outTuple.put(i, convertInt(value, tajoType));
        break;
      case LONG:
        outTuple.put(i, DatumFactory.createInt8((Long) value));
        break;
      case FLOAT:
        outTuple.put(i, DatumFactory.createFloat4((Float) value));
        break;
      case DOUBLE:
        outTuple.put(i, DatumFactory.createFloat8((Double) value));
        break;
      case BYTES:
        outTuple.put(i, convertBytes(value, tajoType, dataType));
        break;
      case STRING:
        outTuple.put(i, convertString(value, tajoType));
        break;
      case RECORD:
        throw new RuntimeException("Avro RECORD not supported.");
      case ENUM:
        throw new RuntimeException("Avro ENUM not supported.");
      case MAP:
        throw new RuntimeException("Avro MAP not supported.");
      case UNION:
        throw new RuntimeException("Avro UNION not supported.");
      case FIXED:
        outTuple.put(i, new BlobDatum(((GenericFixed) value).bytes()));
        break;
      default:
        throw new RuntimeException("Unknown type.");
    }
  }
  return outTuple;
}
 
Example #25
Source File: AvroWriteSupportInt96Avro18.java    From datacollector with Apache License 2.0 4 votes vote down vote up
/**
 * Calls an appropriate write method based on the value.
 * Value must not be null and the schema must not be nullable.
 *
 * @param type a Parquet type
 * @param avroSchema a non-nullable Avro schema
 * @param value a non-null value to write
 */
@SuppressWarnings("unchecked")
private void writeValueWithoutConversion(Type type, Schema avroSchema, Object value) {
  switch (avroSchema.getType()) {
    case BOOLEAN:
      recordConsumer.addBoolean((Boolean) value);
      break;
    case INT:
      if (value instanceof Character) {
        recordConsumer.addInteger((Character) value);
      } else {
        recordConsumer.addInteger(((Number) value).intValue());
      }
      break;
    case LONG:
      if (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) {
        final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1);
        final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1);
        final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);

        long timestamp = ((Number) value).longValue();
        Calendar calendar;
        if (timeZoneId != null && ! timeZoneId.isEmpty()) {
          calendar = Calendar.getInstance(TimeZone.getTimeZone(timeZoneId));
        } else {
          calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
        }
        calendar.setTime(new Date(timestamp));

        // Calculate Julian days and nanoseconds in the day
        LocalDate dt = LocalDate.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH)+1, calendar.get(Calendar.DAY_OF_MONTH));
        int julianDays = (int) JulianFields.JULIAN_DAY.getFrom(dt);
        long nanos = (calendar.get(Calendar.HOUR_OF_DAY) * NANOS_PER_HOUR)
            + (calendar.get(Calendar.MINUTE) * NANOS_PER_MINUTE)
            + (calendar.get(Calendar.SECOND) * NANOS_PER_SECOND);

        // Write INT96 timestamp
        byte[] timestampBuffer = new byte[12];
        ByteBuffer buf = ByteBuffer.wrap(timestampBuffer);
        buf.order(ByteOrder.LITTLE_ENDIAN).putLong(nanos).putInt(julianDays);

        // This is the properly encoded INT96 timestamp
        Binary timestampBinary = Binary.fromReusedByteArray(timestampBuffer);
        recordConsumer.addBinary(timestampBinary);
      } else {
        recordConsumer.addLong(((Number) value).longValue());
      }
      break;
    case FLOAT:
      recordConsumer.addFloat(((Number) value).floatValue());
      break;
    case DOUBLE:
      recordConsumer.addDouble(((Number) value).doubleValue());
      break;
    case FIXED:
      recordConsumer.addBinary(Binary.fromReusedByteArray(((GenericFixed) value).bytes()));
      break;
    case BYTES:
      if (value instanceof byte[]) {
        recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) value));
      } else {
        recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) value));
      }
      break;
    case STRING:
      recordConsumer.addBinary(fromAvroString(value));
      break;
    case RECORD:
      writeRecord(type.asGroupType(), avroSchema, value);
      break;
    case ENUM:
      recordConsumer.addBinary(Binary.fromString(value.toString()));
      break;
    case ARRAY:
      listWriter.writeList(type.asGroupType(), avroSchema, value);
      break;
    case MAP:
      writeMap(type.asGroupType(), avroSchema, (Map<CharSequence, ?>) value);
      break;
    case UNION:
      writeUnion(type.asGroupType(), avroSchema, value);
      break;
    default:
      break;
  }
}
 
Example #26
Source File: AvroUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
private static Object convertFixedStrict(GenericFixed fixed, Schema.FieldType fieldType) {
  checkTypeName(fieldType.getTypeName(), TypeName.LOGICAL_TYPE, "fixed");
  checkArgument(FixedBytes.IDENTIFIER.equals(fieldType.getLogicalType().getIdentifier()));
  return fixed.bytes().clone(); // clone because GenericFixed is mutable
}
 
Example #27
Source File: AvroUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during
 * conversion.
 *
 * @param value {@link GenericRecord} or any nested value
 * @param avroSchema schema for value
 * @param fieldType target beam field type
 * @return value converted for {@link Row}
 */
@SuppressWarnings("unchecked")
@Nullable
public static Object convertAvroFieldStrict(
    @Nullable Object value,
    @Nonnull org.apache.avro.Schema avroSchema,
    @Nonnull Schema.FieldType fieldType) {
  if (value == null) {
    return null;
  }

  TypeWithNullability type = new TypeWithNullability(avroSchema);
  LogicalType logicalType = LogicalTypes.fromSchema(type.type);
  if (logicalType != null) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      ByteBuffer byteBuffer = (ByteBuffer) value;
      BigDecimal bigDecimal =
          new Conversions.DecimalConversion()
              .fromBytes(byteBuffer.duplicate(), type.type, logicalType);
      return convertDecimal(bigDecimal, fieldType);
    } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
      if (value instanceof ReadableInstant) {
        return convertDateTimeStrict(((ReadableInstant) value).getMillis(), fieldType);
      } else {
        return convertDateTimeStrict((Long) value, fieldType);
      }
    } else if (logicalType instanceof LogicalTypes.Date) {
      if (value instanceof ReadableInstant) {
        int epochDays = Days.daysBetween(Instant.EPOCH, (ReadableInstant) value).getDays();
        return convertDateStrict(epochDays, fieldType);
      } else {
        return convertDateStrict((Integer) value, fieldType);
      }
    }
  }

  switch (type.type.getType()) {
    case FIXED:
      return convertFixedStrict((GenericFixed) value, fieldType);

    case BYTES:
      return convertBytesStrict((ByteBuffer) value, fieldType);

    case STRING:
      return convertStringStrict((CharSequence) value, fieldType);

    case INT:
      return convertIntStrict((Integer) value, fieldType);

    case LONG:
      return convertLongStrict((Long) value, fieldType);

    case FLOAT:
      return convertFloatStrict((Float) value, fieldType);

    case DOUBLE:
      return convertDoubleStrict((Double) value, fieldType);

    case BOOLEAN:
      return convertBooleanStrict((Boolean) value, fieldType);

    case RECORD:
      return convertRecordStrict((GenericRecord) value, fieldType);

    case ENUM:
      // enums are either Java enums, or GenericEnumSymbol,
      // they don't share common interface, but override toString()
      return convertEnumStrict(value, fieldType);

    case ARRAY:
      return convertArrayStrict((List<Object>) value, type.type.getElementType(), fieldType);

    case MAP:
      return convertMapStrict(
          (Map<CharSequence, Object>) value, type.type.getValueType(), fieldType);

    case UNION:
      throw new IllegalArgumentException("Union types not yet supported");

    case NULL:
      throw new IllegalArgumentException("Can't convert 'null' to non-nullable field");

    default:
      throw new AssertionError("Unexpected AVRO Schema.Type: " + type.type.getType());
  }
}
 
Example #28
Source File: AvroRandomDataGenerator.java    From avro-util with BSD 2-Clause "Simplified" License 4 votes vote down vote up
private GenericFixed generateFixed(Schema schema) {
  byte[] bytes = new byte[schema.getFixedSize()];
  random.nextBytes(bytes);
  return AvroCompatibilityHelper.newFixedField(schema, bytes);
}
 
Example #29
Source File: ParquetAvro.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public BigDecimal fromFixed(GenericFixed value, Schema schema, LogicalType type) {
  return super.fromFixed(value, schema, decimalsByScale[((ParquetDecimal) type).scale()]);
}
 
Example #30
Source File: ParquetAvro.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) {
  return super.toFixed(value, schema, decimalsByScale[((ParquetDecimal) type).scale()]);
}