org.apache.avro.file.SeekableByteArrayInput Java Examples

The following examples show how to use org.apache.avro.file.SeekableByteArrayInput. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroRecordWriterTest.java    From data-highway with Apache License 2.0 6 votes vote down vote up
@Test
public void typical() throws Exception {
  Schema schema = SchemaBuilder
      .builder()
      .record("record")
      .fields()
      .requiredLong("id")
      .requiredString("name")
      .endRecord();
  Record value = new GenericRecordBuilder(schema).set("id", 1L).set("name", "hello").build();
  ByteArrayOutputStream output = new ByteArrayOutputStream();

  Factory factory = new Factory(CodecFactory.nullCodec());
  RecordWriter writer = factory.create(schema, output);
  writer.write(value);
  writer.close();

  SeekableInput input = new SeekableByteArrayInput(output.toByteArray());
  DatumReader<Record> datumReader = new GenericDatumReader<>(schema);
  DataFileReader<Record> dataFileReader = new DataFileReader<>(input, datumReader);
  assertThat(dataFileReader.next(), is(value));
  assertThat(dataFileReader.hasNext(), is(false));
  dataFileReader.close();
}
 
Example #2
Source File: PutHiveStreaming.java    From nifi with Apache License 2.0 6 votes vote down vote up
private void appendAvroRecords(ProcessSession session, byte[] avroHeader, DataFileWriter<GenericRecord> writer,
                               AtomicReference<FlowFile> flowFileRef, List<HiveStreamingRecord> hRecords) {

    flowFileRef.set(session.append(flowFileRef.get(), (out) -> {
        if (hRecords != null) {
            // Initialize the writer again as append mode, so that Avro header is written only once.
            writer.appendTo(new SeekableByteArrayInput(avroHeader), out);
            try {
                for (HiveStreamingRecord hRecord : hRecords) {
                    writer.append(hRecord.getRecord());
                }
            } catch (IOException ioe) {
                // The records were put to Hive Streaming successfully, but there was an error while writing the
                // Avro records to the flow file. Log as an error and move on.
                logger.error("Error writing Avro records (which were sent successfully to Hive Streaming) to the flow file, " + ioe, ioe);
            }
        }
        writer.close();
    }));
}
 
Example #3
Source File: AvroMessageParser.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public AvroMessageParser(
    ProtoConfigurableEntity.Context context,
    final Schema schema,
    final byte[] message,
    final String messageId,
    final OriginAvroSchemaSource schemaSource,
    boolean skipAvroUnionIndexes
) throws IOException {
  this.context = context;
  this.messageId = messageId;
  this.schemaSource = schemaSource;
  this.skipAvroUnionIndexes = skipAvroUnionIndexes;

  datumReader = new GenericDatumReader<>(schema); //Reader schema argument is optional
  if(schemaSource == OriginAvroSchemaSource.SOURCE) {
    dataFileReader = new DataFileReader<>(new SeekableByteArrayInput(message), datumReader);
  } else {
    decoder = DecoderFactory.get().binaryDecoder(new ByteArrayInputStream(message), null);
    avroRecord = new GenericData.Record(schema);
  }
}
 
Example #4
Source File: TestMergeContent.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException {
    // create a reader for the merged contet
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    SeekableByteArrayInput input = new SeekableByteArrayInput(data);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader);

    // read all the records into a map to verify all the records are there
    Map<String,GenericRecord> records = new HashMap<>();
    while (dataFileReader.hasNext()) {
        GenericRecord user = dataFileReader.next();
        records.put(user.get(key).toString(), user);
    }
    return records;
}
 
Example #5
Source File: AvroToJsonConverter.java    From celos with Apache License 2.0 5 votes vote down vote up
@Override
public FixFile convert(TestRun testRun, FixFile ff) throws IOException {
    byte[] bytes = IOUtils.toByteArray(ff.getContent());
    if (bytes.length == 0) {
        return ff;
    }
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    GenericDatumReader<Object> reader = new GenericDatumReader<>();
    FileReader<Object> fileReader =  DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
    try {
        Schema schema = fileReader.getSchema();
        DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, os);

        for (Object datum : fileReader) {
            writer.write(datum, encoder);
        }
        encoder.flush();
    } finally {
        fileReader.close();
    }
    return new FixFile(new ByteArrayInputStream(os.toByteArray()));
}
 
Example #6
Source File: TestMergeContent.java    From nifi with Apache License 2.0 5 votes vote down vote up
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException {
    // create a reader for the merged contet
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    SeekableByteArrayInput input = new SeekableByteArrayInput(data);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader);

    // read all the records into a map to verify all the records are there
    Map<String,GenericRecord> records = new HashMap<>();
    while (dataFileReader.hasNext()) {
        GenericRecord user = dataFileReader.next();
        records.put(user.get(key).toString(), user);
    }
    return records;
}
 
Example #7
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorDateType() throws Exception {
  Map<String, Field> map = new LinkedHashMap<>();
  map.put("d", Field.create(Field.Type.DATE, new Date(116, 0, 1)));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    DATE_SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals(16801, readRecord.get("d"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #8
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorDecimalType() throws Exception {
  Map<String, Field> map = new LinkedHashMap<>();
  map.put("decimal", Field.create(Field.Type.DECIMAL, BigDecimal.valueOf(1.5)));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    DECIMAL_SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertArrayEquals(new byte[] {0x0F}, ((ByteBuffer)readRecord.get("decimal")).array());
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #9
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertIntToStringInUnion() throws Exception {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    true,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    null,
    null,
    null,
    null,
    0
  );

  Map<String, Field> rootField = new HashMap<>();
  rootField.put("string", Field.create(Field.Type.INTEGER, 10));

  Record r = RecordCreator.create();
  r.getHeader().setAttribute(BaseAvroDataGenerator.AVRO_SCHEMA_HEADER, STRING_UNION_SCHEMA);
  r.set(Field.create(rootField));
  gen.write(r);
  gen.close();

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals(new Utf8("10"), readRecord.get("string"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #10
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchemaInHeader() throws Exception {
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    true,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    null,
    null,
    null,
    null,
    0
  );
  Record record = createRecord();
  record.getHeader().setAttribute(BaseAvroDataGenerator.AVRO_SCHEMA_HEADER, AVRO_SCHEMA);
  gen.write(record);
  gen.close();

  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals("hari", readRecord.get("name").toString());
  Assert.assertEquals(3100, readRecord.get("age"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #11
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testAvroGeneratorListMapType() throws Exception {
  LinkedHashMap<String, Field> linkedHashMap = new LinkedHashMap<>();
  linkedHashMap.put("name", Field.create("Jon Natkins"));
  linkedHashMap.put("age", Field.create(29));
  linkedHashMap.put("emails", Field.create(ImmutableList.of(Field.create("[email protected]"))));
  linkedHashMap.put("boss", Field.create(Field.Type.MAP, null));
  Field listMapField = Field.createListMap(linkedHashMap);
  Record record = RecordCreator.create();
  record.set(listMapField);

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
      false,
      baos,
      COMPRESSION_CODEC_DEFAULT,
      SCHEMA,
      new HashMap<String, Object>(),
      null,
      null,
      0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals("Jon Natkins", readRecord.get("name").toString());
  Assert.assertEquals(29, readRecord.get("age"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #12
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private void testGenerateCompressed(String codecName) throws Exception {

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataGenerator gen = new AvroDataOutputStreamGenerator(
        false,
        baos,
        codecName,
        SCHEMA,
        AvroTypeUtil.getDefaultValuesFromSchema(SCHEMA, new HashSet<String>()),
        null,
        null,
        0
    );
    Record record = createRecord();
    gen.write(record);
    gen.close();

    //reader schema must be extracted from the data file
    GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
        new SeekableByteArrayInput(baos.toByteArray()), reader);
    Assert.assertEquals(codecName, dataFileReader.getMetaString("avro.codec"));
    Assert.assertTrue(dataFileReader.hasNext());
    GenericRecord readRecord = dataFileReader.next();

    Assert.assertEquals("hari", readRecord.get("name").toString());
    Assert.assertEquals(3100, readRecord.get("age"));
    Assert.assertFalse(dataFileReader.hasNext());
  }
 
Example #13
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenerate() throws Exception {

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    SCHEMA,
    AvroTypeUtil.getDefaultValuesFromSchema(SCHEMA, new HashSet<String>()),
    null,
    null,
    0
  );
  Record record = createRecord();
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Assert.assertEquals("hari", readRecord.get("name").toString());
  Assert.assertEquals(3100, readRecord.get("age"));
  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #14
Source File: TimelineMetadataUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz)
    throws IOException {
  DatumReader<T> reader = new SpecificDatumReader<>(clazz);
  FileReader<T> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
  ValidationUtils.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
  return fileReader.next();
}
 
Example #15
Source File: FullSerializationRecordCoder.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
@Override
public Record decode(final InputStream inputStream) throws IOException {
    final DatumReader<IndexedRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileReader<IndexedRecord> reader =
            new DataFileReader<>(new SeekableByteArrayInput(IOUtils.toByteArray(inputStream)), datumReader)) {
        return new AvroRecord(reader.next());
    }
}
 
Example #16
Source File: RegistrylessAvroConverterTest.java    From registryless-avro-converter with Apache License 2.0 5 votes vote down vote up
@Test
void fromConnectDataWorksWithoutWriterSchema() throws Exception {
  RegistrylessAvroConverter sut = new RegistrylessAvroConverter();
  Map<String, Object> settings = new HashMap<String, Object>();
  sut.configure(settings, false);

  Schema dogSchema = SchemaBuilder.struct()
    .name("dog")
    .field("name", Schema.STRING_SCHEMA)
    .field("breed", Schema.STRING_SCHEMA)
    .build();

  Struct dogStruct = new Struct(dogSchema)
    .put("name", "Beamer")
    .put("breed", "Boarder Collie");

  byte[] result = sut.fromConnectData("test_topic", dogSchema, dogStruct);

  // This is a bit annoying but because of the way avro works - the resulting byte array isn't
  // deterministic - so we need to read it back using the avro tools.
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
  GenericRecord instance = null;
  try (
    SeekableByteArrayInput sbai = new SeekableByteArrayInput(result);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(sbai, datumReader);
  ) {
    instance = dataFileReader.next();

    assertEquals("Beamer", instance.get("name").toString());
    assertEquals("Boarder Collie", instance.get("breed").toString());
  } catch (IOException ioe) {
    throw new Exception("Failed to deserialize Avro data", ioe);
  }
}
 
Example #17
Source File: RegistrylessAvroConverterTest.java    From registryless-avro-converter with Apache License 2.0 5 votes vote down vote up
@Test
void fromConnectDataWorksWithWriterSchema() throws Exception {
  // This only has to work in the project directory because this is a test. I'm not particularly
  // concerned if it works when the tests are packaged in JAR form right now. If we start doing
  // that then we'll do something clever-er.
  String validSchemaPath = new File("src/test/resources/schema/dog.avsc").getAbsolutePath();

  RegistrylessAvroConverter sut = new RegistrylessAvroConverter();
  Map<String, Object> settings = new HashMap<String, Object>();
  settings.put("schema.path", validSchemaPath);
  sut.configure(settings, false);

  Schema dogSchema = SchemaBuilder.struct()
    .name("dog")
    .field("name", Schema.STRING_SCHEMA)
    .field("breed", Schema.STRING_SCHEMA)
    .build();

  Struct dogStruct = new Struct(dogSchema)
    .put("name", "Beamer")
    .put("breed", "Boarder Collie");

  byte[] result = sut.fromConnectData("test_topic", dogSchema, dogStruct);

  // This is a bit annoying but because of the way avro works - the resulting byte array isn't
  // deterministic - so we need to read it back using the avro tools.
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
  GenericRecord instance = null;
  try (
    SeekableByteArrayInput sbai = new SeekableByteArrayInput(result);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(sbai, datumReader);
  ) {
    instance = dataFileReader.next();

    assertEquals("Beamer", instance.get("name").toString());
    assertEquals("Boarder Collie", instance.get("breed").toString());
  } catch (IOException ioe) {
    throw new Exception("Failed to deserialize Avro data", ioe);
  }
}
 
Example #18
Source File: RegistrylessAvroConverter.java    From registryless-avro-converter with Apache License 2.0 5 votes vote down vote up
@Override
public SchemaAndValue toConnectData(String topic, byte[] value) {
  DatumReader<GenericRecord> datumReader;
  if (avroSchema != null) {
    datumReader = new GenericDatumReader<>(avroSchema);
  } else {
    datumReader = new GenericDatumReader<>();
  }
  GenericRecord instance = null;

  try (
    SeekableByteArrayInput sbai = new SeekableByteArrayInput(value);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(sbai, datumReader);
  ) {
    instance = dataFileReader.next(instance);
    if (instance == null) {
      logger.warn("Instance was null");
    }

    if (avroSchema != null) {
      return avroDataHelper.toConnectData(avroSchema, instance);
    } else {
      return avroDataHelper.toConnectData(instance.getSchema(), instance);
    }
  } catch (IOException ioe) {
    throw new DataException("Failed to deserialize Avro data from topic %s :".format(topic), ioe);
  }
}
 
Example #19
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testGenerateWithDefaults() throws Exception {

  Stage.Context context = ContextInfoCreator.createTargetContext("i", false, OnRecordError.TO_ERROR);

  DataFactory dataFactory = new DataGeneratorFactoryBuilder(context, DataGeneratorFormat.AVRO)
    .setCharset(Charset.forName("UTF-16"))
    .setConfig(SCHEMA_KEY, RECORD_SCHEMA)
    .setConfig(
        DEFAULT_VALUES_KEY,
        AvroTypeUtil.getDefaultValuesFromSchema(new Schema.Parser().parse(RECORD_SCHEMA), new HashSet<String>())
    )
    .build();
  Assert.assertTrue(dataFactory instanceof AvroDataGeneratorFactory);
  AvroDataGeneratorFactory factory = (AvroDataGeneratorFactory) dataFactory;

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  AvroDataOutputStreamGenerator gen = (AvroDataOutputStreamGenerator) factory.getGenerator(baos);
  Assert.assertNotNull(gen);

  Record record = RecordCreator.create();
  Map<String, Field> employee = new HashMap<>();
  record.set(Field.create(employee));

  gen.write(record);
  gen.close();

  // reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
    new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord result = dataFileReader.next();

  Assert.assertEquals("Hello", result.get("name").toString());
  Assert.assertEquals(25, result.get("age"));
  Assert.assertEquals(false, result.get("resident"));
  Assert.assertEquals("DIAMONDS", result.get("enum").toString());

  List<Utf8> emails = (List<Utf8>) result.get("emails");
  Assert.assertEquals(4, emails.size());
  Assert.assertEquals("SPADES", emails.get(0).toString());
  Assert.assertEquals("HEARTS", emails.get(1).toString());
  Assert.assertEquals("DIAMONDS", emails.get(2).toString());
  Assert.assertEquals("CLUBS", emails.get(3).toString());

  Assert.assertEquals(null, result.get("boss"));

  Map<Utf8, Object> phones = (Map<Utf8, Object>) result.get("phones");
  Assert.assertEquals(8675309, (long)phones.get(new Utf8("home")));
  Assert.assertEquals(8675308, (long)phones.get(new Utf8("mobile")));
}
 
Example #20
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testGenerateWithNestedRecordsAndDefaults() throws Exception {

  Stage.Context context = ContextInfoCreator.createTargetContext("i", false, OnRecordError.TO_ERROR);

  DataFactory dataFactory = new DataGeneratorFactoryBuilder(context, DataGeneratorFormat.AVRO)
      .setCharset(Charset.forName("UTF-16"))
      .setConfig(SCHEMA_KEY, NESTED_RECORD_SCHEMA)
      .setConfig(
          DEFAULT_VALUES_KEY,
          AvroTypeUtil.getDefaultValuesFromSchema(new Schema.Parser().parse(NESTED_RECORD_SCHEMA), new HashSet<String>())
      )
      .build();
  Assert.assertTrue(dataFactory instanceof AvroDataGeneratorFactory);
  AvroDataGeneratorFactory factory = (AvroDataGeneratorFactory) dataFactory;

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  AvroDataOutputStreamGenerator gen = (AvroDataOutputStreamGenerator) factory.getGenerator(baos);
  Assert.assertNotNull(gen);

  Record record = RecordCreator.create();
  record.set(Field.create(Field.Type.LIST_MAP, ImmutableMap.builder()
      .put("name", Field.create(Field.Type.STRING, "my_name"))
      .put("parts", Field.create(Field.Type.MAP, ImmutableMap.builder()
          .put("required", Field.create(Field.Type.MAP, ImmutableMap.builder()
              .put("name", Field.create(Field.Type.STRING, "nothing"))
              .put("params", Field.create(Field.Type.MAP, ImmutableMap.builder()
                  .put("size", Field.create(Field.Type.STRING, "size"))
                  .put("randomField", Field.create(Field.Type.STRING, "random"))
                  .build()))
              .build()))
          .put("optional", Field.create(Field.Type.MAP, ImmutableMap.builder()
              .put("params", Field.create(Field.Type.MAP, ImmutableMap.builder()
                  .put("color", Field.create(Field.Type.STRING, "green"))
                  .put("randomField", Field.create(Field.Type.STRING, "random"))
                  .build()))
              .build()))
          .build()))
      .build()));
  gen.write(record);
  gen.close();

  // reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord result = dataFileReader.next();

  Assert.assertEquals("my_name", result.get("name").toString());

  GenericRecord parts = (GenericRecord) result.get("parts");
  GenericRecord required = (GenericRecord) parts.get("required");
  Assert.assertEquals("nothing", required.get("name").toString());
  GenericRecord params1 = (GenericRecord) required.get("params");
  Assert.assertEquals("size", params1.get("size").toString());
  Assert.assertNull(params1.get("color"));
  Assert.assertNull(params1.get("randomField"));
  GenericRecord optional = (GenericRecord) parts.get("optional");
  Assert.assertNull(optional.get("name"));
  GenericRecord params2 = (GenericRecord) optional.get("params");
  Assert.assertNull(params2.get("size"));
  Assert.assertEquals("green", params2.get("color").toString());
  Assert.assertNull(params2.get("randomField"));
}
 
Example #21
Source File: JdbcAvroRecordTest.java    From dbeam with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldEncodeResultSetToValidAvro()
    throws ClassNotFoundException, SQLException, IOException {
  ResultSet rs =
      DbTestHelper.createConnection(CONNECTION_URL)
          .createStatement()
          .executeQuery("SELECT * FROM COFFEES");
  Schema schema =
      JdbcAvroSchema.createAvroSchema(rs, "dbeam_generated", "connection", "doc", false);
  JdbcAvroRecordConverter converter = JdbcAvroRecordConverter.create(rs);
  DataFileWriter<GenericRecord> dataFileWriter =
      new DataFileWriter<>(new GenericDatumWriter<>(schema));
  ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
  dataFileWriter.create(schema, outputStream);
  // convert and write
  while (rs.next()) {
    dataFileWriter.appendEncoded(converter.convertResultSetIntoAvroBytes());
  }
  dataFileWriter.flush();
  outputStream.close();
  // transform to generic record
  SeekableByteArrayInput inputStream = new SeekableByteArrayInput(outputStream.toByteArray());
  DataFileReader<GenericRecord> dataFileReader =
      new DataFileReader<>(inputStream, new GenericDatumReader<>(schema));
  final List<GenericRecord> records =
      StreamSupport.stream(dataFileReader.spliterator(), false).collect(Collectors.toList());

  Assert.assertEquals(2, records.size());
  GenericRecord record =
      records.stream()
          .filter(r -> Coffee.COFFEE1.name().equals(r.get(0).toString()))
          .findFirst()
          .orElseThrow(() -> new IllegalArgumentException("not found"));

  Assert.assertEquals(12, record.getSchema().getFields().size());
  Assert.assertEquals(schema, record.getSchema());
  Coffee actual =
      Coffee.create(
          record.get(0).toString(),
          Optional.ofNullable((Integer) record.get(1)),
          new java.math.BigDecimal(record.get(2).toString()),
          (Float) record.get(3),
          (Double) record.get(4),
          (Boolean) record.get(5),
          (Integer) record.get(6),
          (Long) record.get(7),
          new java.sql.Timestamp((Long) record.get(8)),
          Optional.ofNullable((Long) record.get(9)).map(Timestamp::new),
          TestHelper.byteBufferToUuid((ByteBuffer) record.get(10)),
          (Long) record.get(11));
  Assert.assertEquals(Coffee.COFFEE1, actual);
}
 
Example #22
Source File: TestAvroDataGenerator.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@Test
public void testAvroGeneratorShortType() throws Exception {
  final String SCHEMA_JSON = "{\n"
  +"\"type\": \"record\",\n"
  +"\"name\": \"WithDecimal\",\n"
  +"\"fields\": [\n"
  +" {\"name\": \"short\", \"type\": \"int\"}"
  +"]}";
  final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON);

  Map<String, Field> map = new LinkedHashMap<>();
  map.put("short", Field.create(Field.Type.SHORT, (short)1));
  Record record = RecordCreator.create();
  record.set(Field.create(map));

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataGenerator gen = new AvroDataOutputStreamGenerator(
    false,
    baos,
    COMPRESSION_CODEC_DEFAULT,
    SCHEMA,
    new HashMap<String, Object>(),
    null,
    null,
    0
  );
  gen.write(record);
  gen.close();

  //reader schema must be extracted from the data file
  GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(null);
  DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(baos.toByteArray()), reader);
  Assert.assertTrue(dataFileReader.hasNext());
  GenericRecord readRecord = dataFileReader.next();

  Object retrievedField = readRecord.get("short");
  Assert.assertEquals(1, retrievedField);

  Assert.assertFalse(dataFileReader.hasNext());
}
 
Example #23
Source File: TestFlumeFailoverTarget.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteAvroRecords() throws InterruptedException, StageException, IOException {

  DataGeneratorFormatConfig dataGeneratorFormatConfig = new DataGeneratorFormatConfig();
  dataGeneratorFormatConfig.avroSchema = SdcAvroTestUtil.AVRO_SCHEMA1;
  dataGeneratorFormatConfig.avroSchemaSource = INLINE;
  dataGeneratorFormatConfig.includeSchema = true;
  dataGeneratorFormatConfig.avroCompression = AvroCompression.NULL;

  FlumeTarget flumeTarget = FlumeTestUtil.createFlumeTarget(
    FlumeTestUtil.createDefaultFlumeConfig(port, false),
    DataFormat.AVRO,
    dataGeneratorFormatConfig
  );
  TargetRunner targetRunner = new TargetRunner.Builder(FlumeDTarget.class, flumeTarget).build();

  targetRunner.runInit();
  List<Record> records = SdcAvroTestUtil.getRecords1();
  targetRunner.runWrite(records);
  targetRunner.runDestroy();

  List<GenericRecord> genericRecords = new ArrayList<>();
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); //Reader schema argument is optional

  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  while(event != null) {
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(event.getBody()), datumReader);
    while(dataFileReader.hasNext()) {
      genericRecords.add(dataFileReader.next());
    }
    event = ch.take();
  }
  transaction.commit();
  transaction.close();

  Assert.assertEquals(3, genericRecords.size());
  SdcAvroTestUtil.compare1(genericRecords);
}
 
Example #24
Source File: TestFlumeFailoverTarget.java    From datacollector with Apache License 2.0 4 votes vote down vote up
@Test
public void testWriteAvroRecordsSingleEvent() throws InterruptedException, StageException, IOException {

  DataGeneratorFormatConfig dataGeneratorFormatConfig = new DataGeneratorFormatConfig();
  dataGeneratorFormatConfig.avroSchema = SdcAvroTestUtil.AVRO_SCHEMA1;
  dataGeneratorFormatConfig.avroSchemaSource = INLINE;
  dataGeneratorFormatConfig.includeSchema = true;
  dataGeneratorFormatConfig.avroCompression = AvroCompression.NULL;
  FlumeTarget flumeTarget = FlumeTestUtil.createFlumeTarget(
    FlumeTestUtil.createDefaultFlumeConfig(port, true),
    DataFormat.AVRO,
    dataGeneratorFormatConfig
  );
  TargetRunner targetRunner = new TargetRunner.Builder(FlumeDTarget.class, flumeTarget).build();

  targetRunner.runInit();
  List<Record> records = SdcAvroTestUtil.getRecords1();
  targetRunner.runWrite(records);
  targetRunner.runDestroy();

  List<GenericRecord> genericRecords = new ArrayList<>();
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); //Reader schema argument is optional

  int eventCounter = 0;
  Transaction transaction = ch.getTransaction();
  transaction.begin();
  Event event = ch.take();
  while(event != null) {
    eventCounter++;
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(
      new SeekableByteArrayInput(event.getBody()), datumReader);
    while(dataFileReader.hasNext()) {
      genericRecords.add(dataFileReader.next());
    }
    event = ch.take();
  }
  transaction.commit();
  transaction.close();

  Assert.assertEquals(1, eventCounter);
  Assert.assertEquals(3, genericRecords.size());
  SdcAvroTestUtil.compare1(genericRecords);
}