Java Code Examples for org.apache.avro.io.DatumReader

The following examples show how to use org.apache.avro.io.DatumReader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Cubert   Source File: Purge.java    License: Apache License 2.0 7 votes vote down vote up
private DataFileReader<GenericRecord> createDataFileReader(String filename,
                                                           boolean localFS) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader;

    if (localFS)
    {
        dataFileReader =
                new DataFileReader<GenericRecord>(new File(filename), datumReader);
    }
    else
    {
        Path path = new Path(filename);
        SeekableInput input = new FsInput(path, conf);
        dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
    }

    return dataFileReader;
}
 
Example 2
Source Project: kite   Source File: AvroEntitySerDe.java    License: Apache License 2.0 6 votes vote down vote up
private void initKACRecordDatumMaps(String fieldName, Schema fieldSchema,
    Schema writtenFieldSchema) {
  Map<String, DatumReader<Object>> recordFieldReaderMap = new HashMap<String, DatumReader<Object>>();
  Map<String, DatumWriter<Object>> recordFieldWriterMap = new HashMap<String, DatumWriter<Object>>();
  kacRecordDatumReaders.put(fieldName, recordFieldReaderMap);
  kacRecordDatumWriters.put(fieldName, recordFieldWriterMap);
  for (Field recordField : fieldSchema.getFields()) {
    Field writtenRecordField = writtenFieldSchema
        .getField(recordField.name());
    if (writtenRecordField == null) {
      continue;
    }
    recordFieldReaderMap.put(recordField.name(),
        buildDatumReader(recordField.schema(), writtenRecordField.schema()));
    recordFieldWriterMap.put(recordField.name(),
        buildDatumWriter(recordField.schema()));
  }
}
 
Example 3
Source Project: kite   Source File: AvroEntitySerDe.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Object deserializeColumnValueFromBytes(String fieldName, byte[] bytes) {
  Field field = avroSchema.getAvroSchema().getField(fieldName);
  DatumReader<Object> datumReader = fieldDatumReaders.get(fieldName);
  if (field == null) {
    throw new ValidationException("Invalid field name " + fieldName
        + " for schema " + avroSchema.toString());
  }
  if (datumReader == null) {
    throw new ValidationException("No datum reader for field name: "
        + fieldName);
  }

  ByteArrayInputStream byteIn = new ByteArrayInputStream(bytes);
  Decoder decoder = getColumnDecoder(field.schema(), byteIn);
  return AvroUtils.readAvroEntity(decoder, datumReader);
}
 
Example 4
Source Project: flink   Source File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example 5
Source Project: flink   Source File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example 6
private DataFileStream<GenericRecord> createDataFileStream() throws Exception {
  DatumReader<GenericRecord> datumReader;
  if ( useFieldAsInputStream ) {
    datumReader = new GenericDatumReader<GenericRecord>();
    inputStream.reset();
    return new DataFileStream<GenericRecord>( inputStream, datumReader );
  }
  if ( schemaFileName != null && schemaFileName.length() > 0 ) {
    Schema schema = new Schema.Parser().parse( KettleVFS.getInputStream( schemaFileName, variableSpace ) );
    datumReader = new GenericDatumReader<GenericRecord>( schema );
  } else {
    datumReader = new GenericDatumReader<GenericRecord>();
  }
  FileObject fileObject = KettleVFS.getFileObject( fileName, variableSpace );
  if ( fileObject.isFile() ) {
    this.inputStream = fileObject.getContent().getInputStream();
    return new DataFileStream<>( inputStream, datumReader );
  } else {
    FileObject[] avroFiles = fileObject.findFiles( new FileExtensionSelector( "avro" ) );
    if ( !Utils.isEmpty( avroFiles ) ) {
      this.inputStream = avroFiles[ 0 ].getContent().getInputStream();
      return new DataFileStream<>( inputStream, datumReader );
    }
    return null;
  }
}
 
Example 7
Source Project: flink   Source File: AvroStreamingFileSinkITCase.java    License: Apache License 2.0 6 votes vote down vote up
private static <T> void validateResults(File folder, DatumReader<T> datumReader, List<T> expected) throws Exception {
	File[] buckets = folder.listFiles();
	assertNotNull(buckets);
	assertEquals(1, buckets.length);

	File[] partFiles = buckets[0].listFiles();
	assertNotNull(partFiles);
	assertEquals(2, partFiles.length);

	for (File partFile : partFiles) {
		assertTrue(partFile.length() > 0);

		final List<T> fileContent = readAvroFile(partFile, datumReader);
		assertEquals(expected, fileContent);
	}
}
 
Example 8
Source Project: nifi   Source File: QueryDatabaseTableTest.java    License: Apache License 2.0 6 votes vote down vote up
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
        GenericRecord record = null;
        long recordsFromStream = 0;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            recordsFromStream += 1;
        }

        return recordsFromStream;
    }
}
 
Example 9
Source Project: HiveKa   Source File: KafkaAvroMessageDecoder.java    License: Apache License 2.0 6 votes vote down vote up
public AvroGenericRecordWritable decode(byte[] payload) {
	try {
		MessageDecoderHelper helper = new MessageDecoderHelper(registry,
				topicName, payload).invoke();
     DatumReader<Record> reader = new GenericDatumReader<Record>(helper.getTargetSchema());

     log.debug("Trying to read kafka payload");
     log.debug("buffer: " + helper.getBuffer());
     log.debug("start: " + helper.getStart());
     log.debug("length: " + helper.getLength());
     log.debug("target schema: " + helper.getTargetSchema());
     log.debug("schema: " + helper.getSchema());
	  GenericRecord record = reader.read(null, decoderFactory.binaryDecoder(helper.getBuffer().array(),
         helper.getStart(), helper.getLength(), null));
     log.debug("Read kafka payload as " + record);

     AvroGenericRecordWritable grw = new AvroGenericRecordWritable(record);
     grw.setFileSchema(latestSchema);

     return grw;

	} catch (IOException e) {
		throw new MessageDecoderException(e);
	}
}
 
Example 10
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    ByteBuffer body = (ByteBuffer) record.get("body");
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode(body).toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example 11
Source Project: big-data-lite   Source File: ReadActivityFile.java    License: MIT License 6 votes vote down vote up
/**
 *Reads the avro file
 * @throws IOException
 */
private void readFile() throws IOException {
    // Deserialize Activities from disk
    
    File file = new File(filename);
            
    DatumReader<Activity> activityDatumReader = new SpecificDatumReader<Activity>(Activity.class);
    DataFileReader<Activity> dataFileReader = new DataFileReader<Activity>(file, activityDatumReader);

    Activity activity = null;
    int i = 0;
    
    while (dataFileReader.hasNext() && i < numrecs) {
        i++;
        activity = dataFileReader.next(activity);
        System.out.println(activity);
    }
}
 
Example 12
Source Project: xml-avro   Source File: Converter.java    License: Apache License 2.0 6 votes vote down vote up
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);

    GenericRecord record = dataFileReader.next();

    Document doc;
    try {
        doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    } catch (ParserConfigurationException e) {
        throw new RuntimeException(e);
    }

    Element el = unwrapElement(record, doc);
    doc.appendChild(el);

    saveDocument(doc, xmlFile);
}
 
Example 13
Source Project: nifi   Source File: TestSelectHive3QL.java    License: Apache License 2.0 6 votes vote down vote up
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
        GenericRecord record = null;
        long recordsFromStream = 0;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            recordsFromStream += 1;
        }

        return recordsFromStream;
    }
}
 
Example 14
@Override
public DatumReader<T> createDatumReader(Schema schema) {
    if (readerSchema == null) {
        return new ReflectDatumReader<>(schema);
    } else {
        return new ReflectDatumReader<>(schema, readerSchema);
    }
}
 
Example 15
Source Project: incubator-gobblin   Source File: TestAvroExtractor.java    License: Apache License 2.0 5 votes vote down vote up
public static List<GenericRecord> getRecordFromFile(String path)
    throws IOException {
  Configuration config = new Configuration();
  SeekableInput input = new FsInput(new Path(path), config);
  DatumReader<GenericRecord> reader1 = new GenericDatumReader<>();
  FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1);
  List<GenericRecord> records = new ArrayList<>();
  for (GenericRecord datum : fileReader) {
    records.add(datum);
  }
  fileReader.close();
  return records;
}
 
Example 16
Source Project: kite   Source File: TestDataModelUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetDatumReaderForGenericType() {
  Class<GenericData.Record> type = GenericData.Record.class;
  Schema writerSchema = StandardEvent.getClassSchema();
  DatumReader result = DataModelUtil.getDatumReaderForType(type, writerSchema);
  assertEquals(GenericDatumReader.class, result.getClass());
}
 
Example 17
Source Project: datafu   Source File: TestAvroJob.java    License: Apache License 2.0 5 votes vote down vote up
private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)r.get("id");
        Long count = (Long)r.get("count");   
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}
 
Example 18
Source Project: nifi   Source File: TestJdbcCommon.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToAvroStreamForUnsignedIntegerWithPrecision1ReturnedAsLong_NIFI5612() throws SQLException, IOException {
    final String mockColumnName = "t_int";
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    when(metadata.getColumnCount()).thenReturn(1);
    when(metadata.getColumnType(1)).thenReturn(Types.INTEGER);
    when(metadata.isSigned(1)).thenReturn(false);
    when(metadata.getPrecision(1)).thenReturn(1);
    when(metadata.getColumnName(1)).thenReturn(mockColumnName);
    when(metadata.getTableName(1)).thenReturn("table");

    final ResultSet rs = JdbcCommonTestUtils.resultSetReturningMetadata(metadata);

    final Long ret = 0L;
    when(rs.getObject(Mockito.anyInt())).thenReturn(ret);

    final InputStream instream = JdbcCommonTestUtils.convertResultSetToAvroInputStream(rs);

    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertEquals(Long.toString(ret), record.get(mockColumnName).toString());
        }
    }
}
 
Example 19
Source Project: nifi   Source File: TestJdbcCommon.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToAvroStreamForUnsignedIntegerWithPrecision10() throws SQLException, IOException {
    final String mockColumnName = "t_int";
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    when(metadata.getColumnCount()).thenReturn(1);
    when(metadata.getColumnType(1)).thenReturn(Types.INTEGER);
    when(metadata.isSigned(1)).thenReturn(false);
    when(metadata.getPrecision(1)).thenReturn(10);
    when(metadata.getColumnName(1)).thenReturn(mockColumnName);
    when(metadata.getTableName(1)).thenReturn("table");

    final ResultSet rs = JdbcCommonTestUtils.resultSetReturningMetadata(metadata);

    final Long ret = 0L;
    when(rs.getObject(Mockito.anyInt())).thenReturn(ret);

    final InputStream instream = JdbcCommonTestUtils.convertResultSetToAvroInputStream(rs);

    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertEquals(Long.toString(ret), record.get(mockColumnName).toString());
        }
    }
}
 
Example 20
Source Project: spork   Source File: AvroArrayReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(final InputSplit isplit, final TaskAttemptContext tc)
    throws IOException, InterruptedException {

  FileSplit fsplit = (FileSplit) isplit;
  start  = fsplit.getStart();
  end    = fsplit.getStart() + fsplit.getLength();
  DatumReader<GenericData.Array<Object>> datumReader
    = new GenericDatumReader<GenericData.Array<Object>>(schema);
  reader = DataFileReader.openReader(
      new FsInput(fsplit.getPath(), tc.getConfiguration()),
      datumReader);
  reader.sync(start);
}
 
Example 21
Source Project: kite   Source File: AvroEntitySerDe.java    License: Apache License 2.0 5 votes vote down vote up
private DatumReader<Object> buildDatumReader(Schema schema,
    Schema writtenSchema) {
  if (specific) {
    return new SpecificDatumReader<Object>(writtenSchema, schema);
  } else {
    return new GenericDatumReader<Object>(writtenSchema, schema);
  }
}
 
Example 22
Source Project: Flink-CEPplus   Source File: AvroRecordInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This test validates proper serialization with specific (generated POJO) types.
 */
@Test
public void testDeserializeToSpecificType() throws IOException {

	DatumReader<User> datumReader = new SpecificDatumReader<>(userSchema);

	try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
		User rec = dataFileReader.next();

		// check if record has been read correctly
		assertNotNull(rec);
		assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());

		// now serialize it with our framework:
		ExecutionConfig ec = new ExecutionConfig();
		TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);

		assertEquals(AvroTypeInfo.class, te.getClass());
		TypeSerializer<User> tser = te.createSerializer(ec);

		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
			tser.serialize(rec, outView);
		}

		User newRec;
		try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
				new ByteArrayInputStream(out.toByteArray()))) {
			newRec = tser.deserialize(inView);
		}

		// check if it is still the same
		assertNotNull(newRec);
		assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
	}
}
 
Example 23
Source Project: flink   Source File: AvroFactory.java    License: Apache License 2.0 5 votes vote down vote up
private AvroFactory(
	GenericData avroData,
	Schema schema,
	DatumReader<T> reader,
	DatumWriter<T> writer) {

	this.avroData = checkNotNull(avroData);
	this.schema = checkNotNull(schema);
	this.writer = checkNotNull(writer);
	this.reader = checkNotNull(reader);
}
 
Example 24
Source Project: datafu   Source File: PartitionCollapsingTests.java    License: Apache License 2.0 5 votes vote down vote up
private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Long count = (Long)((GenericRecord)r.get("value")).get("count");        
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}
 
Example 25
Source Project: flink   Source File: AvroRecordInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This test validates proper serialization with specific (generated POJO) types.
 */
@Test
public void testDeserializeToSpecificType() throws IOException {

	DatumReader<User> datumReader = new SpecificDatumReader<>(userSchema);

	try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
		User rec = dataFileReader.next();

		// check if record has been read correctly
		assertNotNull(rec);
		assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());

		// now serialize it with our framework:
		ExecutionConfig ec = new ExecutionConfig();
		TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);

		assertEquals(AvroTypeInfo.class, te.getClass());
		TypeSerializer<User> tser = te.createSerializer(ec);

		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
			tser.serialize(rec, outView);
		}

		User newRec;
		try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
				new ByteArrayInputStream(out.toByteArray()))) {
			newRec = tser.deserialize(inView);
		}

		// check if it is still the same
		assertNotNull(newRec);
		assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
	}
}
 
Example 26
Source Project: iceberg   Source File: AvroIterable.java    License: Apache License 2.0 5 votes vote down vote up
AvroIterable(InputFile file, DatumReader<D> reader,
             Long start, Long length, boolean reuseContainers) {
  this.file = file;
  this.reader = reader;
  this.start = start;
  this.end = start != null ? start + length : null;
  this.reuseContainers = reuseContainers;
}
 
Example 27
Source Project: iceberg   Source File: ProjectionDatumReader.java    License: Apache License 2.0 5 votes vote down vote up
public ProjectionDatumReader(Function<Schema, DatumReader<?>> getReader,
                             org.apache.iceberg.Schema expectedSchema,
                             Map<String, String> renames,
                             NameMapping nameMapping) {
  this.getReader = getReader;
  this.expectedSchema = expectedSchema;
  this.renames = renames;
  this.nameMapping = nameMapping;
}
 
Example 28
Source Project: localization_nifi   Source File: TestJdbcCommon.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToBytes() throws ClassNotFoundException, SQLException, IOException {
    final Statement st = con.createStatement();
    st.executeUpdate("insert into restaurants values (1, 'Irifunes', 'San Mateo')");
    st.executeUpdate("insert into restaurants values (2, 'Estradas', 'Daly City')");
    st.executeUpdate("insert into restaurants values (3, 'Prime Rib House', 'San Francisco')");

    final ResultSet resultSet = st.executeQuery("select R.*, ROW_NUMBER() OVER () as rownr from restaurants R");

    final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    JdbcCommon.convertToAvroStream(resultSet, outStream, false);

    final byte[] serializedBytes = outStream.toByteArray();
    assertNotNull(serializedBytes);
    System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);

    st.close();

    // Deserialize bytes to records
    final InputStream instream = new ByteArrayInputStream(serializedBytes);

    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            System.out.println(record);
        }
    }
}
 
Example 29
Source Project: kite   Source File: DataModelUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get the DatumReader for the given type.
 *
 * @param <E> The entity type
 * @param type The Java class of the entity type
 * @param writerSchema The {@link Schema} for entities
 * @return The DatumReader for the given type
 */
@SuppressWarnings("unchecked")
public static <E> DatumReader<E> getDatumReaderForType(Class<E> type, Schema writerSchema) {
  Schema readerSchema = getReaderSchema(type, writerSchema);
  GenericData dataModel = getDataModelForType(type);
  if (dataModel instanceof ReflectData) {
    return new ReflectDatumReader<E>(writerSchema, readerSchema, (ReflectData)dataModel);
  } else if (dataModel instanceof SpecificData) {
    return new SpecificDatumReader<E>(writerSchema, readerSchema, (SpecificData)dataModel);
  } else {
    return new GenericDatumReader<E>(writerSchema, readerSchema, dataModel);
  }
}
 
Example 30
Source Project: datacollector   Source File: ClusterHdfsSource.java    License: Apache License 2.0 5 votes vote down vote up
private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize) throws IOException {
  int previewCount = previewBuffer.size();
  Path filePath = fileStatus.getPath();
  SeekableInput input = new FsInput(filePath, hadoopConf);
  DatumReader<GenericRecord> reader = new GenericDatumReader<>();
  List<Map.Entry> batch = new ArrayList<>();
  try (FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader)) {
    int count = 0;
    while (fileReader.hasNext() && batch.size() < batchSize && previewCount < batchSize) {
      GenericRecord datum = fileReader.next();
      ByteArrayOutputStream out = new ByteArrayOutputStream();
      DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>
          (datum.getSchema()));
      try {
        dataFileWriter.create(datum.getSchema(), out);
        dataFileWriter.append(datum);
      } finally {
        dataFileWriter.close();
        out.close();
      }
      batch.add(new Pair(filePath.toUri().getPath() + "::" + count, out.toByteArray()));
      count++;
      previewCount++;
    }
  }
  return batch;
}