org.apache.avro.Conversions Java Examples

The following examples show how to use org.apache.avro.Conversions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FetchParquetTest.java    From nifi with Apache License 2.0 6 votes vote down vote up
private void writeParquetUsersWithDecimal(final File parquetFile, int numUsers) throws IOException {
    if (parquetFile.exists()) {
        Assert.assertTrue(parquetFile.delete());
    }

    final BigDecimal initialAmount = new BigDecimal("1234567.0123456789");
    final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schemaWithDecimal);

    final List<Schema> amountSchemaUnion = schemaWithDecimal.getField("amount").schema().getTypes();
    final Schema amountSchema = amountSchemaUnion.stream().filter(s -> s.getType() == Schema.Type.FIXED).findFirst().orElse(null);
    Assert.assertNotNull(amountSchema);

    final Conversions.DecimalConversion decimalConversion = new Conversions.DecimalConversion();

    try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) {
        for (int i=0; i < numUsers; i++) {
            final BigDecimal incrementedAmount = initialAmount.add(new BigDecimal("1"));
            final GenericRecord user = new GenericData.Record(schemaWithDecimal);
            user.put("name", "Bob" + i);
            user.put("amount", decimalConversion.toFixed(incrementedAmount, amountSchema, amountSchema.getLogicalType()));

            writer.write(user);
        }
    }

}
 
Example #2
Source File: AvroSchema.java    From pulsar with Apache License 2.0 6 votes vote down vote up
public static void addLogicalTypeConversions(ReflectData reflectData, boolean jsr310ConversionEnabled) {
    reflectData.addLogicalTypeConversion(new Conversions.DecimalConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.DateConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.TimeMillisConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion());
    reflectData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion());
    if (jsr310ConversionEnabled) {
        reflectData.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion());
    } else {
        try {
            Class.forName("org.joda.time.DateTime");
            reflectData.addLogicalTypeConversion(new JodaTimeConversions.TimestampConversion());
        } catch (ClassNotFoundException e) {
            // Skip if have not provide joda-time dependency.
        }
    }
}
 
Example #3
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testFixedDecimalConversion(){
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.createFixed("mydecimal", "no doc", "myspace", 18);
    decimalType.addToSchema(fieldSchema);
    final Object convertedValue = AvroTypeUtil.convertToAvroObject("2.5", fieldSchema, StandardCharsets.UTF_8);
    assertTrue(convertedValue instanceof GenericFixed);
    final GenericFixed genericFixed = (GenericFixed)convertedValue;
    final BigDecimal bigDecimal = new Conversions.DecimalConversion().fromFixed(genericFixed, fieldSchema, decimalType);
    assertEquals(new BigDecimal("2.5").setScale(8), bigDecimal);
}
 
Example #4
Source File: TestGenericLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadDecimalFixed() throws IOException {
  Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4);
  Schema fixedRecord = record("R", field("dec", fixedSchema));
  Schema decimalSchema = DECIMAL_9_2.addToSchema(
      Schema.createFixed("aFixed", null, null, 4));
  Schema decimalRecord = record("R", field("dec", decimalSchema));

  GenericRecord r1 = instance(decimalRecord, "dec", D1);
  GenericRecord r2 = instance(decimalRecord, "dec", D2);
  List<GenericRecord> expected = Arrays.asList(r1, r2);

  Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

  // use the conversion directly instead of relying on the write side
  GenericRecord r1fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D1, fixedSchema, DECIMAL_9_2));
  GenericRecord r2fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D2, fixedSchema, DECIMAL_9_2));

  File test = write(fixedRecord, r1fixed, r2fixed);
  Assert.assertEquals("Should convert fixed to BigDecimals",
      expected, read(GENERIC, decimalRecord, test));
}
 
Example #5
Source File: TestGenericLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteDecimalFixed() throws IOException {
  Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4);
  Schema fixedRecord = record("R", field("dec", fixedSchema));
  Schema decimalSchema = DECIMAL_9_2.addToSchema(
      Schema.createFixed("aFixed", null, null, 4));
  Schema decimalRecord = record("R", field("dec", decimalSchema));

  GenericRecord r1 = instance(decimalRecord, "dec", D1);
  GenericRecord r2 = instance(decimalRecord, "dec", D2);

  Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

  // use the conversion directly instead of relying on the write side
  GenericRecord r1fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D1, fixedSchema, DECIMAL_9_2));
  GenericRecord r2fixed = instance(fixedRecord, "dec",
      conversion.toFixed(D2, fixedSchema, DECIMAL_9_2));
  List<GenericRecord> expected = Arrays.asList(r1fixed, r2fixed);

  File test = write(GENERIC, decimalRecord, r1, r2);
  Assert.assertEquals("Should read BigDecimals as fixed",
      expected, read(GENERIC, fixedRecord, test));
}
 
Example #6
Source File: TestGenericLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadDecimalBytes() throws IOException {
  Schema bytesSchema = Schema.create(Schema.Type.BYTES);
  Schema bytesRecord = record("R", field("dec", bytesSchema));
  Schema decimalSchema = DECIMAL_9_2.addToSchema(Schema.create(Schema.Type.BYTES));
  Schema decimalRecord = record("R", field("dec", decimalSchema));

  GenericRecord r1 = instance(decimalRecord, "dec", D1);
  GenericRecord r2 = instance(decimalRecord, "dec", D2);
  List<GenericRecord> expected = Arrays.asList(r1, r2);

  Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

  // use the conversion directly instead of relying on the write side
  GenericRecord r1bytes = instance(bytesRecord, "dec",
      conversion.toBytes(D1, bytesSchema, DECIMAL_9_2));
  GenericRecord r2bytes = instance(bytesRecord, "dec",
      conversion.toBytes(D2, bytesSchema, DECIMAL_9_2));

  File test = write(bytesRecord, r1bytes, r2bytes);
  Assert.assertEquals("Should convert bytes to BigDecimals",
      expected, read(GENERIC, decimalRecord, test));
}
 
Example #7
Source File: TestGenericLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteDecimalBytes() throws IOException {
  Schema bytesSchema = Schema.create(Schema.Type.BYTES);
  Schema bytesRecord = record("R", field("dec", bytesSchema));
  Schema decimalSchema = DECIMAL_9_2.addToSchema(Schema.create(Schema.Type.BYTES));
  Schema decimalRecord = record("R", field("dec", decimalSchema));

  GenericRecord r1 = instance(decimalRecord, "dec", D1);
  GenericRecord r2 = instance(decimalRecord, "dec", D2);

  Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

  // use the conversion directly instead of relying on the write side
  GenericRecord r1bytes = instance(bytesRecord, "dec",
      conversion.toBytes(D1, bytesSchema, DECIMAL_9_2));
  GenericRecord r2bytes = instance(bytesRecord, "dec",
      conversion.toBytes(D2, bytesSchema, DECIMAL_9_2));

  List<GenericRecord> expected = Arrays.asList(r1bytes, r2bytes);

  File test = write(GENERIC, decimalRecord, r1, r2);
  Assert.assertEquals("Should read BigDecimals as bytes",
      expected, read(GENERIC, bytesRecord, test));
}
 
Example #8
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertAvroRecordToMapWithFieldTypeOfBinaryAndLogicalTypeDecimal() {
    // Create a field schema like {"type":"binary","name":"amount","logicalType":"decimal","precision":18,"scale":8}
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.create(Type.BYTES);
    decimalType.addToSchema(fieldSchema);

    // Create a field named "amount" using the field schema above
    final Schema.Field field = new Schema.Field("amount", fieldSchema, null, (Object)null);

    // Create an overall record schema with the amount field
    final Schema avroSchema = Schema.createRecord(Collections.singletonList(field));

    // Create an example Avro record with the amount field of type binary and a logical type of decimal
    final BigDecimal expectedBigDecimal = new BigDecimal("1234567890.12345678");
    final GenericRecord genericRecord = new GenericData.Record(avroSchema);
    genericRecord.put("amount", new Conversions.DecimalConversion().toBytes(expectedBigDecimal, fieldSchema, decimalType));

    // Convert the Avro schema to a Record schema
    thenConvertAvroSchemaToRecordSchema(avroSchema, expectedBigDecimal, genericRecord);
}
 
Example #9
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertAvroRecordToMapWithFieldTypeOfFixedAndLogicalTypeDecimal() {
   // Create a field schema like {"type":"fixed","name":"amount","size":16,"logicalType":"decimal","precision":18,"scale":8}
   final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.createFixed("amount", null, null, 16);
    decimalType.addToSchema(fieldSchema);

    // Create a field named "amount" using the field schema above
    final Schema.Field field = new Schema.Field("amount", fieldSchema, null, (Object)null);

    // Create an overall record schema with the amount field
    final Schema avroSchema = Schema.createRecord(Collections.singletonList(field));

    // Create an example Avro record with the amount field of type fixed and a logical type of decimal
    final BigDecimal expectedBigDecimal = new BigDecimal("1234567890.12345678");
    final GenericRecord genericRecord = new GenericData.Record(avroSchema);
    genericRecord.put("amount", new Conversions.DecimalConversion().toFixed(expectedBigDecimal, fieldSchema, decimalType));

    // Convert the Avro schema to a Record schema
    thenConvertAvroSchemaToRecordSchema(avroSchema, expectedBigDecimal, genericRecord);
}
 
Example #10
Source File: PentahoAvroRecordReader.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
private Object convertToPentahoType( int pentahoType, ByteBuffer avroData, Schema.Field field ) {
  Object pentahoData = null;
  if ( avroData != null ) {
    try {
      switch ( pentahoType ) {
        case ValueMetaInterface.TYPE_BIGNUMBER:
          Conversions.DecimalConversion converter = new Conversions.DecimalConversion();
          Schema schema = field.schema();
          if ( schema.getType().equals( Schema.Type.UNION ) ) {
            List<Schema> schemas = field.schema().getTypes();
            for ( Schema s : schemas ) {
              if ( !s.getName().equalsIgnoreCase( "null" ) ) {
                schema = s;
                break;
              }
            }
          }
          Object precision = schema.getObjectProp( AvroSpec.DECIMAL_PRECISION );
          Object scale = schema.getObjectProp( AvroSpec.DECIMAL_SCALE );
          LogicalTypes.Decimal decimalType =
            LogicalTypes.decimal( Integer.parseInt( precision.toString() ), Integer.parseInt( scale.toString() ) );
          pentahoData = converter.fromBytes( avroData, avroSchema, decimalType );
          break;
        case ValueMetaInterface.TYPE_BINARY:
          pentahoData = new byte[ avroData.remaining() ];
          avroData.get( (byte[]) pentahoData );
          break;
      }
    } catch ( Exception e ) {
      // If unable to do the type conversion just ignore. null will be returned.
    }
  }
  return pentahoData;
}
 
Example #11
Source File: AvroSpecificGenericMapper.java    From simplesource with Apache License 2.0 5 votes vote down vote up
@Override
public D fromGeneric(final GenericRecord serialized) {
    GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion());
    SpecificData specificData = SpecificData.get();
    specificData.addLogicalTypeConversion(new Conversions.DecimalConversion());
    return isNull(serialized) ? null : (D) specificData.deepCopy(serialized.getSchema(), serialized);
}
 
Example #12
Source File: AvroNestedReader.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
/**
 * @param pentahoType
 * @param avroData
 * @param fieldSchema
 * @return
 */
public Object convertToKettleValue( AvroInputField pentahoType, ByteBuffer avroData, Schema fieldSchema ) {
  Object pentahoData = null;
  if ( avroData != null ) {
    try {
      switch ( pentahoType.getPentahoType() ) {
        case ValueMetaInterface.TYPE_BIGNUMBER:
          Conversions.DecimalConversion converter = new Conversions.DecimalConversion();
          Schema schema = fieldSchema;
          if ( schema.getType().equals( Schema.Type.UNION ) ) {
            List<Schema> schemas = schema.getTypes();
            for ( Schema s : schemas ) {
              if ( !s.getName().equalsIgnoreCase( "null" ) ) {
                schema = s;
                break;
              }
            }
          }
          Object precision = schema.getObjectProp( AvroSpec.DECIMAL_PRECISION );
          Object scale = schema.getObjectProp( AvroSpec.DECIMAL_SCALE );
          LogicalTypes.Decimal decimalType =
            LogicalTypes.decimal( Integer.parseInt( precision.toString() ), Integer.parseInt( scale.toString() ) );
          pentahoData = converter.fromBytes( avroData, m_schemaToUse, decimalType );
          break;
        case ValueMetaInterface.TYPE_BINARY:
          pentahoData = new byte[ avroData.remaining() ];
          avroData.get( (byte[]) pentahoData );
          break;
      }
    } catch ( Exception e ) {
      // If unable to do the type conversion just ignore. null will be returned.
    }
  }
  return pentahoData;
}
 
Example #13
Source File: AvroToPdiConverter.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
private Object convertToPentahoType( int pentahoType, ByteBuffer avroData, Schema field ) {
  Object pentahoData = null;
  if ( avroData != null ) {
    try {
      switch ( pentahoType ) {
        case ValueMetaInterface.TYPE_BIGNUMBER:
          Conversions.DecimalConversion converter = new Conversions.DecimalConversion();
          Schema schema = field;
          if ( schema.getType().equals( Schema.Type.UNION ) ) {
            List<Schema> schemas = field.getTypes();
            for ( Schema s : schemas ) {
              if ( !s.getName().equalsIgnoreCase( "null" ) ) {
                schema = s;
                break;
              }
            }
          }
          Object precision = schema.getObjectProp( AvroSpec.DECIMAL_PRECISION );
          Object scale = schema.getObjectProp( AvroSpec.DECIMAL_SCALE );
          LogicalTypes.Decimal decimalType =
            LogicalTypes.decimal( Integer.parseInt( precision.toString() ), Integer.parseInt( scale.toString() ) );
          pentahoData = converter.fromBytes( avroData, avroSchema, decimalType );
          break;
        case ValueMetaInterface.TYPE_BINARY:
          pentahoData = new byte[ avroData.remaining() ];
          avroData.get( (byte[]) pentahoData );
          break;
      }
    } catch ( Exception e ) {
      // If unable to do the type conversion just ignore. null will be returned.
    }
  }
  return pentahoData;
}
 
Example #14
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testBytesDecimalConversion(){
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.create(Type.BYTES);
    decimalType.addToSchema(fieldSchema);
    final Object convertedValue = AvroTypeUtil.convertToAvroObject("2.5", fieldSchema, StandardCharsets.UTF_8);
    assertTrue(convertedValue instanceof ByteBuffer);
    final ByteBuffer serializedBytes = (ByteBuffer)convertedValue;
    final BigDecimal bigDecimal = new Conversions.DecimalConversion().fromBytes(serializedBytes, fieldSchema, decimalType);
    assertEquals(new BigDecimal("2.5").setScale(8), bigDecimal);
}
 
Example #15
Source File: AvroUtilsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static GenericRecord getGenericRecord() {

    LogicalType decimalType =
        LogicalTypes.decimal(Integer.MAX_VALUE)
            .addToSchema(org.apache.avro.Schema.create(Type.BYTES))
            .getLogicalType();
    ByteBuffer encodedDecimal =
        new Conversions.DecimalConversion().toBytes(BIG_DECIMAL, null, decimalType);

    return new GenericRecordBuilder(getAvroSchema())
        .set("bool", true)
        .set("int", 43)
        .set("long", 44L)
        .set("float", (float) 44.1)
        .set("double", (double) 44.2)
        .set("string", new Utf8("string"))
        .set("bytes", ByteBuffer.wrap(BYTE_ARRAY))
        .set("decimal", encodedDecimal)
        .set("timestampMillis", DATE_TIME.getMillis())
        .set("row", getSubGenericRecord("row"))
        .set("array", ImmutableList.of(getSubGenericRecord("array"), getSubGenericRecord("array")))
        .set(
            "map",
            ImmutableMap.of(
                new Utf8("k1"),
                getSubGenericRecord("map"),
                new Utf8("k2"),
                getSubGenericRecord("map")))
        .build();
  }
 
Example #16
Source File: TestAvroTypeUtil.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void testToDecimalConversion() {
    final LogicalTypes.Decimal decimalType = LogicalTypes.decimal(18, 8);
    final Schema fieldSchema = Schema.create(Type.BYTES);
    decimalType.addToSchema(fieldSchema);

    final Map<Object, String> expects = new HashMap<>();

    // Double to Decimal
    expects.put(123d, "123.00000000");
    // Double can not represent exact 1234567890.12345678, so use 1 less digit to
    // test here.
    expects.put(1234567890.12345678d, "1234567890.12345670");
    expects.put(123456789.12345678d, "123456789.12345678");
    expects.put(1234567890123456d, "1234567890123456.00000000");
    // ROUND HALF UP.
    expects.put(0.1234567890123456d, "0.12345679");

    // BigDecimal to BigDecimal
    expects.put(new BigDecimal("123"), "123.00000000");
    expects.put(new BigDecimal("1234567890.12345678"), "1234567890.12345678");
    expects.put(new BigDecimal("123456789012345678"), "123456789012345678.00000000");
    // ROUND HALF UP.
    expects.put(new BigDecimal("0.123456789012345678"), "0.12345679");

    // String to BigDecimal
    expects.put("123", "123.00000000");
    expects.put("1234567890.12345678", "1234567890.12345678");
    expects.put("123456789012345678", "123456789012345678.00000000");
    expects.put("0.1234567890123456", "0.12345679");
    expects.put("Not a number", "java.lang.NumberFormatException");

    // Integer to BigDecimal
    expects.put(123, "123.00000000");
    expects.put(-1234567, "-1234567.00000000");

    // Long to BigDecimal
    expects.put(123L, "123.00000000");
    expects.put(123456789012345678L, "123456789012345678.00000000");

    expects.forEach((rawValue, expect) -> {
        final Object convertedValue;
        try {
            convertedValue = AvroTypeUtil.convertToAvroObject(rawValue, fieldSchema, StandardCharsets.UTF_8);
        } catch (Exception e) {
            if (expect.equals(e.getClass().getCanonicalName())) {
                // Expected behavior.
                return;
            }
            fail(String.format("Unexpected exception, %s with %s %s while expecting %s", e,
                    rawValue.getClass().getSimpleName(), rawValue, expect));
            return;
        }

        assertTrue(convertedValue instanceof ByteBuffer);
        final ByteBuffer serializedBytes = (ByteBuffer) convertedValue;

        final BigDecimal bigDecimal = new Conversions.DecimalConversion().fromBytes(serializedBytes, fieldSchema,
                decimalType);
        assertEquals(String.format("%s %s should be converted to %s", rawValue.getClass().getSimpleName(), rawValue,
                expect), expect, bigDecimal.toString());
    });

}
 
Example #17
Source File: TestJdbcCommon.java    From nifi with Apache License 2.0 4 votes vote down vote up
private void testConvertToAvroStreamForBigDecimal(BigDecimal bigDecimal, int dbPrecision, int defaultPrecision, int expectedPrecision, int expectedScale) throws SQLException, IOException {

        final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
        when(metadata.getColumnCount()).thenReturn(1);
        when(metadata.getColumnType(1)).thenReturn(Types.NUMERIC);
        when(metadata.getColumnName(1)).thenReturn("The.Chairman");
        when(metadata.getTableName(1)).thenReturn("1the::table");
        when(metadata.getPrecision(1)).thenReturn(dbPrecision);
        when(metadata.getScale(1)).thenReturn(expectedScale);

        final ResultSet rs = JdbcCommonTestUtils.resultSetReturningMetadata(metadata);

        when(rs.getObject(Mockito.anyInt())).thenReturn(bigDecimal);

        final ByteArrayOutputStream baos = new ByteArrayOutputStream();

        final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions
                .builder().convertNames(true).useLogicalTypes(true).defaultPrecision(defaultPrecision).build();
        JdbcCommon.convertToAvroStream(rs, baos, options, null);

        final byte[] serializedBytes = baos.toByteArray();

        final InputStream instream = new ByteArrayInputStream(serializedBytes);

        final GenericData genericData = new GenericData();
        genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());

        final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(null, null, genericData);
        try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
            final Schema generatedUnion = dataFileReader.getSchema().getField("The_Chairman").schema();
            // null and decimal.
            assertEquals(2, generatedUnion.getTypes().size());
            final LogicalType logicalType = generatedUnion.getTypes().get(1).getLogicalType();
            assertNotNull(logicalType);
            assertEquals("decimal", logicalType.getName());
            LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
            assertEquals(expectedPrecision, decimalType.getPrecision());
            assertEquals(expectedScale, decimalType.getScale());

            GenericRecord record = null;
            while (dataFileReader.hasNext()) {
                record = dataFileReader.next(record);
                assertEquals("_1the__table", record.getSchema().getName());
                assertEquals(bigDecimal, record.get("The_Chairman"));
            }
        }
    }
 
Example #18
Source File: TestWriteAvroResult.java    From nifi with Apache License 2.0 4 votes vote down vote up
private void testLogicalTypes(Schema schema) throws ParseException, IOException {
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();

    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("timeMillis", RecordFieldType.TIME.getDataType()));
    fields.add(new RecordField("timeMicros", RecordFieldType.TIME.getDataType()));
    fields.add(new RecordField("timestampMillis", RecordFieldType.TIMESTAMP.getDataType()));
    fields.add(new RecordField("timestampMicros", RecordFieldType.TIMESTAMP.getDataType()));
    fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
    fields.add(new RecordField("decimal", RecordFieldType.DECIMAL.getDecimalDataType(5,2)));
    final RecordSchema recordSchema = new SimpleRecordSchema(fields);

    final String expectedTime = "2017-04-04 14:20:33.789";
    final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
    df.setTimeZone(TimeZone.getTimeZone("gmt"));
    final long timeLong = df.parse(expectedTime).getTime();

    final Map<String, Object> values = new HashMap<>();
    values.put("timeMillis", new Time(timeLong));
    values.put("timeMicros", new Time(timeLong));
    values.put("timestampMillis", new Timestamp(timeLong));
    values.put("timestampMicros", new Timestamp(timeLong));
    values.put("date", new Date(timeLong));
    values.put("decimal", new BigDecimal("123.45"));
    final Record record = new MapRecord(recordSchema, values);

    try (final RecordSetWriter writer = createWriter(schema, baos)) {
        writer.write(RecordSet.of(record.getSchema(), record));
    }

    final byte[] data = baos.toByteArray();

    try (final InputStream in = new ByteArrayInputStream(data)) {
        final GenericRecord avroRecord = readRecord(in, schema);
        final long secondsSinceMidnight = 33 + (20 * 60) + (14 * 60 * 60);
        final long millisSinceMidnight = (secondsSinceMidnight * 1000L) + 789;

        assertEquals((int) millisSinceMidnight, avroRecord.get("timeMillis"));
        assertEquals(millisSinceMidnight * 1000L, avroRecord.get("timeMicros"));
        assertEquals(timeLong, avroRecord.get("timestampMillis"));
        assertEquals(timeLong * 1000L, avroRecord.get("timestampMicros"));
        // Double value will be converted into logical decimal if Avro schema is defined as logical decimal.
        final Schema decimalSchema = schema.getField("decimal").schema();
        final LogicalType logicalType = decimalSchema.getLogicalType() != null
                ? decimalSchema.getLogicalType()
                // Union type doesn't return logical type. Find the first logical type defined within the union.
                : decimalSchema.getTypes().stream().map(s -> s.getLogicalType()).filter(Objects::nonNull).findFirst().get();
        final BigDecimal decimal = new Conversions.DecimalConversion().fromBytes((ByteBuffer) avroRecord.get("decimal"), decimalSchema, logicalType);
        assertEquals(new BigDecimal("123.45"), decimal);
    }
}
 
Example #19
Source File: TestGenericLogicalTypes.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void addDecimalAndUUID() {
  GENERIC.addLogicalTypeConversion(new Conversions.DecimalConversion());
  GENERIC.addLogicalTypeConversion(new Conversions.UUIDConversion());
}
 
Example #20
Source File: TestReflectLogicalTypes.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void addUUID() {
  REFLECT.addLogicalTypeConversion(new Conversions.UUIDConversion());
  REFLECT.addLogicalTypeConversion(new Conversions.DecimalConversion());
}
 
Example #21
Source File: BigQueryAvroUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
private static Object convertRequiredField(
    Type avroType, LogicalType avroLogicalType, TableFieldSchema fieldSchema, Object v) {
  // REQUIRED fields are represented as the corresponding Avro types. For example, a BigQuery
  // INTEGER type maps to an Avro LONG type.
  checkNotNull(v, "REQUIRED field %s should not be null", fieldSchema.getName());
  // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the type field
  // is required, so it may not be null.
  String bqType = fieldSchema.getType();
  ImmutableCollection<Type> expectedAvroTypes = BIG_QUERY_TO_AVRO_TYPES.get(bqType);
  verifyNotNull(expectedAvroTypes, "Unsupported BigQuery type: %s", bqType);
  verify(
      expectedAvroTypes.contains(avroType),
      "Expected Avro schema types %s for BigQuery %s field %s, but received %s",
      expectedAvroTypes,
      bqType,
      fieldSchema.getName(),
      avroType);
  // For historical reasons, don't validate avroLogicalType except for with NUMERIC.
  // BigQuery represents NUMERIC in Avro format as BYTES with a DECIMAL logical type.
  switch (bqType) {
    case "STRING":
    case "DATETIME":
    case "GEOGRAPHY":
      // Avro will use a CharSequence to represent String objects, but it may not always use
      // java.lang.String; for example, it may prefer org.apache.avro.util.Utf8.
      verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
      return v.toString();
    case "DATE":
      if (avroType == Type.INT) {
        verify(v instanceof Integer, "Expected Integer, got %s", v.getClass());
        verifyNotNull(avroLogicalType, "Expected Date logical type");
        verify(avroLogicalType instanceof LogicalTypes.Date, "Expected Date logical type");
        return formatDate((Integer) v);
      } else {
        verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
        return v.toString();
      }
    case "TIME":
      if (avroType == Type.LONG) {
        verify(v instanceof Long, "Expected Long, got %s", v.getClass());
        verifyNotNull(avroLogicalType, "Expected TimeMicros logical type");
        verify(
            avroLogicalType instanceof LogicalTypes.TimeMicros,
            "Expected TimeMicros logical type");
        return formatTime((Long) v);
      } else {
        verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
        return v.toString();
      }
    case "INTEGER":
      verify(v instanceof Long, "Expected Long, got %s", v.getClass());
      return ((Long) v).toString();
    case "FLOAT":
      verify(v instanceof Double, "Expected Double, got %s", v.getClass());
      return v;
    case "NUMERIC":
      // NUMERIC data types are represented as BYTES with the DECIMAL logical type. They are
      // converted back to Strings with precision and scale determined by the logical type.
      verify(v instanceof ByteBuffer, "Expected ByteBuffer, got %s", v.getClass());
      verifyNotNull(avroLogicalType, "Expected Decimal logical type");
      verify(avroLogicalType instanceof LogicalTypes.Decimal, "Expected Decimal logical type");
      BigDecimal numericValue =
          new Conversions.DecimalConversion()
              .fromBytes((ByteBuffer) v, Schema.create(avroType), avroLogicalType);
      return numericValue.toString();
    case "BOOLEAN":
      verify(v instanceof Boolean, "Expected Boolean, got %s", v.getClass());
      return v;
    case "TIMESTAMP":
      // TIMESTAMP data types are represented as Avro LONG types, microseconds since the epoch.
      // Values may be negative since BigQuery timestamps start at 0001-01-01 00:00:00 UTC.
      verify(v instanceof Long, "Expected Long, got %s", v.getClass());
      return formatTimestamp((Long) v);
    case "RECORD":
      verify(v instanceof GenericRecord, "Expected GenericRecord, got %s", v.getClass());
      return convertGenericRecordToTableRow((GenericRecord) v, fieldSchema.getFields());
    case "BYTES":
      verify(v instanceof ByteBuffer, "Expected ByteBuffer, got %s", v.getClass());
      ByteBuffer byteBuffer = (ByteBuffer) v;
      byte[] bytes = new byte[byteBuffer.limit()];
      byteBuffer.get(bytes);
      return BaseEncoding.base64().encode(bytes);
    default:
      throw new UnsupportedOperationException(
          String.format(
              "Unexpected BigQuery field schema type %s for field named %s",
              fieldSchema.getType(), fieldSchema.getName()));
  }
}
 
Example #22
Source File: AvroUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during
 * conversion.
 *
 * @param value {@link GenericRecord} or any nested value
 * @param avroSchema schema for value
 * @param fieldType target beam field type
 * @return value converted for {@link Row}
 */
@SuppressWarnings("unchecked")
@Nullable
public static Object convertAvroFieldStrict(
    @Nullable Object value,
    @Nonnull org.apache.avro.Schema avroSchema,
    @Nonnull Schema.FieldType fieldType) {
  if (value == null) {
    return null;
  }

  TypeWithNullability type = new TypeWithNullability(avroSchema);
  LogicalType logicalType = LogicalTypes.fromSchema(type.type);
  if (logicalType != null) {
    if (logicalType instanceof LogicalTypes.Decimal) {
      ByteBuffer byteBuffer = (ByteBuffer) value;
      BigDecimal bigDecimal =
          new Conversions.DecimalConversion()
              .fromBytes(byteBuffer.duplicate(), type.type, logicalType);
      return convertDecimal(bigDecimal, fieldType);
    } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
      if (value instanceof ReadableInstant) {
        return convertDateTimeStrict(((ReadableInstant) value).getMillis(), fieldType);
      } else {
        return convertDateTimeStrict((Long) value, fieldType);
      }
    } else if (logicalType instanceof LogicalTypes.Date) {
      if (value instanceof ReadableInstant) {
        int epochDays = Days.daysBetween(Instant.EPOCH, (ReadableInstant) value).getDays();
        return convertDateStrict(epochDays, fieldType);
      } else {
        return convertDateStrict((Integer) value, fieldType);
      }
    }
  }

  switch (type.type.getType()) {
    case FIXED:
      return convertFixedStrict((GenericFixed) value, fieldType);

    case BYTES:
      return convertBytesStrict((ByteBuffer) value, fieldType);

    case STRING:
      return convertStringStrict((CharSequence) value, fieldType);

    case INT:
      return convertIntStrict((Integer) value, fieldType);

    case LONG:
      return convertLongStrict((Long) value, fieldType);

    case FLOAT:
      return convertFloatStrict((Float) value, fieldType);

    case DOUBLE:
      return convertDoubleStrict((Double) value, fieldType);

    case BOOLEAN:
      return convertBooleanStrict((Boolean) value, fieldType);

    case RECORD:
      return convertRecordStrict((GenericRecord) value, fieldType);

    case ENUM:
      // enums are either Java enums, or GenericEnumSymbol,
      // they don't share common interface, but override toString()
      return convertEnumStrict(value, fieldType);

    case ARRAY:
      return convertArrayStrict((List<Object>) value, type.type.getElementType(), fieldType);

    case MAP:
      return convertMapStrict(
          (Map<CharSequence, Object>) value, type.type.getValueType(), fieldType);

    case UNION:
      throw new IllegalArgumentException("Union types not yet supported");

    case NULL:
      throw new IllegalArgumentException("Can't convert 'null' to non-nullable field");

    default:
      throw new AssertionError("Unexpected AVRO Schema.Type: " + type.type.getType());
  }
}
 
Example #23
Source File: HoodieTestDataGenerator.java    From hudi with Apache License 2.0 4 votes vote down vote up
public static GenericRecord generateGenericRecord(String rowKey, String riderName, String driverName,
                                                  double timestamp, boolean isDeleteRecord,
                                                  boolean isFlattened) {
  GenericRecord rec = new GenericData.Record(isFlattened ? FLATTENED_AVRO_SCHEMA : AVRO_SCHEMA);
  rec.put("_row_key", rowKey);
  rec.put("timestamp", timestamp);
  rec.put("rider", riderName);
  rec.put("driver", driverName);
  rec.put("begin_lat", RAND.nextDouble());
  rec.put("begin_lon", RAND.nextDouble());
  rec.put("end_lat", RAND.nextDouble());
  rec.put("end_lon", RAND.nextDouble());

  if (isFlattened) {
    rec.put("fare", RAND.nextDouble() * 100);
    rec.put("currency", "USD");
  } else {
    rec.put("distance_in_meters", RAND.nextInt());
    rec.put("seconds_since_epoch", RAND.nextLong());
    rec.put("weight", RAND.nextFloat());
    byte[] bytes = "Canada".getBytes();
    rec.put("nation", ByteBuffer.wrap(bytes));
    long currentTimeMillis = System.currentTimeMillis();
    Date date = new Date(currentTimeMillis);
    rec.put("current_date", (int) date.toLocalDate().toEpochDay());
    rec.put("current_ts", currentTimeMillis);

    BigDecimal bigDecimal = new BigDecimal(String.format("%5f", RAND.nextFloat()));
    Schema decimalSchema = AVRO_SCHEMA.getField("height").schema();
    Conversions.DecimalConversion decimalConversions = new Conversions.DecimalConversion();
    GenericFixed genericFixed = decimalConversions.toFixed(bigDecimal, decimalSchema, LogicalTypes.decimal(10, 6));
    rec.put("height", genericFixed);

    rec.put("city_to_state", Collections.singletonMap("LA", "CA"));

    GenericRecord fareRecord = new GenericData.Record(AVRO_SCHEMA.getField("fare").schema());
    fareRecord.put("amount", RAND.nextDouble() * 100);
    fareRecord.put("currency", "USD");
    rec.put("fare", fareRecord);

    GenericArray<GenericRecord> tipHistoryArray = new GenericData.Array<>(1, AVRO_SCHEMA.getField("tip_history").schema());
    Schema tipSchema = new Schema.Parser().parse(AVRO_SCHEMA.getField("tip_history").schema().toString()).getElementType();
    GenericRecord tipRecord = new GenericData.Record(tipSchema);
    tipRecord.put("amount", RAND.nextDouble() * 100);
    tipRecord.put("currency", "USD");
    tipHistoryArray.add(tipRecord);
    rec.put("tip_history", tipHistoryArray);
  }

  if (isDeleteRecord) {
    rec.put("_hoodie_is_deleted", true);
  } else {
    rec.put("_hoodie_is_deleted", false);
  }
  return rec;
}