org.apache.avro.io.DatumReader Java Examples

The following examples show how to use org.apache.avro.io.DatumReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: kite   Author: kite-sdk   File: AvroEntitySerDe.java    License: Apache License 2.0 6 votes vote down vote up
private void initKACRecordDatumMaps(String fieldName, Schema fieldSchema,
    Schema writtenFieldSchema) {
  Map<String, DatumReader<Object>> recordFieldReaderMap = new HashMap<String, DatumReader<Object>>();
  Map<String, DatumWriter<Object>> recordFieldWriterMap = new HashMap<String, DatumWriter<Object>>();
  kacRecordDatumReaders.put(fieldName, recordFieldReaderMap);
  kacRecordDatumWriters.put(fieldName, recordFieldWriterMap);
  for (Field recordField : fieldSchema.getFields()) {
    Field writtenRecordField = writtenFieldSchema
        .getField(recordField.name());
    if (writtenRecordField == null) {
      continue;
    }
    recordFieldReaderMap.put(recordField.name(),
        buildDatumReader(recordField.schema(), writtenRecordField.schema()));
    recordFieldWriterMap.put(recordField.name(),
        buildDatumWriter(recordField.schema()));
  }
}
 
Example #2
Source Project: kite   Author: kite-sdk   File: AvroEntitySerDe.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Object deserializeColumnValueFromBytes(String fieldName, byte[] bytes) {
  Field field = avroSchema.getAvroSchema().getField(fieldName);
  DatumReader<Object> datumReader = fieldDatumReaders.get(fieldName);
  if (field == null) {
    throw new ValidationException("Invalid field name " + fieldName
        + " for schema " + avroSchema.toString());
  }
  if (datumReader == null) {
    throw new ValidationException("No datum reader for field name: "
        + fieldName);
  }

  ByteArrayInputStream byteIn = new ByteArrayInputStream(bytes);
  Decoder decoder = getColumnDecoder(field.schema(), byteIn);
  return AvroUtils.readAvroEntity(decoder, datumReader);
}
 
Example #3
Source Project: flink   Author: flink-tpc-ds   File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example #4
Source Project: flink   Author: apache   File: AvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
Example #5
Source Project: pentaho-hadoop-shims   Author: pentaho   File: PentahoAvroInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileStream<GenericRecord> createDataFileStream() throws Exception {
  DatumReader<GenericRecord> datumReader;
  if ( useFieldAsInputStream ) {
    datumReader = new GenericDatumReader<GenericRecord>();
    inputStream.reset();
    return new DataFileStream<GenericRecord>( inputStream, datumReader );
  }
  if ( schemaFileName != null && schemaFileName.length() > 0 ) {
    Schema schema = new Schema.Parser().parse( KettleVFS.getInputStream( schemaFileName, variableSpace ) );
    datumReader = new GenericDatumReader<GenericRecord>( schema );
  } else {
    datumReader = new GenericDatumReader<GenericRecord>();
  }
  FileObject fileObject = KettleVFS.getFileObject( fileName, variableSpace );
  if ( fileObject.isFile() ) {
    this.inputStream = fileObject.getContent().getInputStream();
    return new DataFileStream<>( inputStream, datumReader );
  } else {
    FileObject[] avroFiles = fileObject.findFiles( new FileExtensionSelector( "avro" ) );
    if ( !Utils.isEmpty( avroFiles ) ) {
      this.inputStream = avroFiles[ 0 ].getContent().getInputStream();
      return new DataFileStream<>( inputStream, datumReader );
    }
    return null;
  }
}
 
Example #6
Source Project: flink   Author: apache   File: AvroStreamingFileSinkITCase.java    License: Apache License 2.0 6 votes vote down vote up
private static <T> void validateResults(File folder, DatumReader<T> datumReader, List<T> expected) throws Exception {
	File[] buckets = folder.listFiles();
	assertNotNull(buckets);
	assertEquals(1, buckets.length);

	File[] partFiles = buckets[0].listFiles();
	assertNotNull(partFiles);
	assertEquals(2, partFiles.length);

	for (File partFile : partFiles) {
		assertTrue(partFile.length() > 0);

		final List<T> fileContent = readAvroFile(partFile, datumReader);
		assertEquals(expected, fileContent);
	}
}
 
Example #7
Source Project: nifi   Author: apache   File: QueryDatabaseTableTest.java    License: Apache License 2.0 6 votes vote down vote up
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
        GenericRecord record = null;
        long recordsFromStream = 0;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            recordsFromStream += 1;
        }

        return recordsFromStream;
    }
}
 
Example #8
Source Project: HiveKa   Author: HiveKa   File: KafkaAvroMessageDecoder.java    License: Apache License 2.0 6 votes vote down vote up
public AvroGenericRecordWritable decode(byte[] payload) {
	try {
		MessageDecoderHelper helper = new MessageDecoderHelper(registry,
				topicName, payload).invoke();
     DatumReader<Record> reader = new GenericDatumReader<Record>(helper.getTargetSchema());

     log.debug("Trying to read kafka payload");
     log.debug("buffer: " + helper.getBuffer());
     log.debug("start: " + helper.getStart());
     log.debug("length: " + helper.getLength());
     log.debug("target schema: " + helper.getTargetSchema());
     log.debug("schema: " + helper.getSchema());
	  GenericRecord record = reader.read(null, decoderFactory.binaryDecoder(helper.getBuffer().array(),
         helper.getStart(), helper.getLength(), null));
     log.debug("Read kafka payload as " + record);

     AvroGenericRecordWritable grw = new AvroGenericRecordWritable(record);
     grw.setFileSchema(latestSchema);

     return grw;

	} catch (IOException e) {
		throw new MessageDecoderException(e);
	}
}
 
Example #9
Source Project: mt-flume   Author: javachen   File: TestFlumeEventAvroEventSerializer.java    License: Apache License 2.0 6 votes vote down vote up
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    ByteBuffer body = (ByteBuffer) record.get("body");
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode(body).toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
Example #10
Source Project: Cubert   Author: linkedin   File: Purge.java    License: Apache License 2.0 6 votes vote down vote up
private DataFileReader<GenericRecord> createDataFileReader(String filename,
                                                           boolean localFS) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader;

    if (localFS)
    {
        dataFileReader =
                new DataFileReader<GenericRecord>(new File(filename), datumReader);
    }
    else
    {
        Path path = new Path(filename);
        SeekableInput input = new FsInput(path, conf);
        dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
    }

    return dataFileReader;
}
 
Example #11
Source Project: big-data-lite   Author: oracle   File: ReadActivityFile.java    License: MIT License 6 votes vote down vote up
/**
 *Reads the avro file
 * @throws IOException
 */
private void readFile() throws IOException {
    // Deserialize Activities from disk
    
    File file = new File(filename);
            
    DatumReader<Activity> activityDatumReader = new SpecificDatumReader<Activity>(Activity.class);
    DataFileReader<Activity> dataFileReader = new DataFileReader<Activity>(file, activityDatumReader);

    Activity activity = null;
    int i = 0;
    
    while (dataFileReader.hasNext() && i < numrecs) {
        i++;
        activity = dataFileReader.next(activity);
        System.out.println(activity);
    }
}
 
Example #12
Source Project: xml-avro   Author: elodina   File: Converter.java    License: Apache License 2.0 6 votes vote down vote up
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);

    GenericRecord record = dataFileReader.next();

    Document doc;
    try {
        doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    } catch (ParserConfigurationException e) {
        throw new RuntimeException(e);
    }

    Element el = unwrapElement(record, doc);
    doc.appendChild(el);

    saveDocument(doc, xmlFile);
}
 
Example #13
Source Project: nifi   Author: apache   File: TestSelectHive3QL.java    License: Apache License 2.0 6 votes vote down vote up
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
        GenericRecord record = null;
        long recordsFromStream = 0;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            recordsFromStream += 1;
        }

        return recordsFromStream;
    }
}
 
Example #14
Source Project: apicurio-registry   Author: Apicurio   File: ReflectAvroDatumProvider.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DatumReader<T> createDatumReader(Schema schema) {
    if (readerSchema == null) {
        return new ReflectDatumReader<>(schema);
    } else {
        return new ReflectDatumReader<>(schema, readerSchema);
    }
}
 
Example #15
Source Project: incubator-gobblin   Author: apache   File: TestAvroExtractor.java    License: Apache License 2.0 5 votes vote down vote up
public static List<GenericRecord> getRecordFromFile(String path)
    throws IOException {
  Configuration config = new Configuration();
  SeekableInput input = new FsInput(new Path(path), config);
  DatumReader<GenericRecord> reader1 = new GenericDatumReader<>();
  FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1);
  List<GenericRecord> records = new ArrayList<>();
  for (GenericRecord datum : fileReader) {
    records.add(datum);
  }
  fileReader.close();
  return records;
}
 
Example #16
Source Project: kite   Author: kite-sdk   File: TestDataModelUtil.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGetDatumReaderForGenericType() {
  Class<GenericData.Record> type = GenericData.Record.class;
  Schema writerSchema = StandardEvent.getClassSchema();
  DatumReader result = DataModelUtil.getDatumReaderForType(type, writerSchema);
  assertEquals(GenericDatumReader.class, result.getClass());
}
 
Example #17
Source Project: datafu   Author: apache   File: TestAvroJob.java    License: Apache License 2.0 5 votes vote down vote up
private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)r.get("id");
        Long count = (Long)r.get("count");   
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}
 
Example #18
Source Project: nifi   Author: apache   File: TestJdbcCommon.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToAvroStreamForUnsignedIntegerWithPrecision1ReturnedAsLong_NIFI5612() throws SQLException, IOException {
    final String mockColumnName = "t_int";
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    when(metadata.getColumnCount()).thenReturn(1);
    when(metadata.getColumnType(1)).thenReturn(Types.INTEGER);
    when(metadata.isSigned(1)).thenReturn(false);
    when(metadata.getPrecision(1)).thenReturn(1);
    when(metadata.getColumnName(1)).thenReturn(mockColumnName);
    when(metadata.getTableName(1)).thenReturn("table");

    final ResultSet rs = JdbcCommonTestUtils.resultSetReturningMetadata(metadata);

    final Long ret = 0L;
    when(rs.getObject(Mockito.anyInt())).thenReturn(ret);

    final InputStream instream = JdbcCommonTestUtils.convertResultSetToAvroInputStream(rs);

    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertEquals(Long.toString(ret), record.get(mockColumnName).toString());
        }
    }
}
 
Example #19
Source Project: nifi   Author: apache   File: TestJdbcCommon.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToAvroStreamForUnsignedIntegerWithPrecision10() throws SQLException, IOException {
    final String mockColumnName = "t_int";
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    when(metadata.getColumnCount()).thenReturn(1);
    when(metadata.getColumnType(1)).thenReturn(Types.INTEGER);
    when(metadata.isSigned(1)).thenReturn(false);
    when(metadata.getPrecision(1)).thenReturn(10);
    when(metadata.getColumnName(1)).thenReturn(mockColumnName);
    when(metadata.getTableName(1)).thenReturn("table");

    final ResultSet rs = JdbcCommonTestUtils.resultSetReturningMetadata(metadata);

    final Long ret = 0L;
    when(rs.getObject(Mockito.anyInt())).thenReturn(ret);

    final InputStream instream = JdbcCommonTestUtils.convertResultSetToAvroInputStream(rs);

    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertEquals(Long.toString(ret), record.get(mockColumnName).toString());
        }
    }
}
 
Example #20
Source Project: spork   Author: sigmoidanalytics   File: AvroArrayReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(final InputSplit isplit, final TaskAttemptContext tc)
    throws IOException, InterruptedException {

  FileSplit fsplit = (FileSplit) isplit;
  start  = fsplit.getStart();
  end    = fsplit.getStart() + fsplit.getLength();
  DatumReader<GenericData.Array<Object>> datumReader
    = new GenericDatumReader<GenericData.Array<Object>>(schema);
  reader = DataFileReader.openReader(
      new FsInput(fsplit.getPath(), tc.getConfiguration()),
      datumReader);
  reader.sync(start);
}
 
Example #21
Source Project: kite   Author: kite-sdk   File: AvroEntitySerDe.java    License: Apache License 2.0 5 votes vote down vote up
private DatumReader<Object> buildDatumReader(Schema schema,
    Schema writtenSchema) {
  if (specific) {
    return new SpecificDatumReader<Object>(writtenSchema, schema);
  } else {
    return new GenericDatumReader<Object>(writtenSchema, schema);
  }
}
 
Example #22
Source Project: Flink-CEPplus   Author: ljygz   File: AvroRecordInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This test validates proper serialization with specific (generated POJO) types.
 */
@Test
public void testDeserializeToSpecificType() throws IOException {

	DatumReader<User> datumReader = new SpecificDatumReader<>(userSchema);

	try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
		User rec = dataFileReader.next();

		// check if record has been read correctly
		assertNotNull(rec);
		assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());

		// now serialize it with our framework:
		ExecutionConfig ec = new ExecutionConfig();
		TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);

		assertEquals(AvroTypeInfo.class, te.getClass());
		TypeSerializer<User> tser = te.createSerializer(ec);

		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
			tser.serialize(rec, outView);
		}

		User newRec;
		try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
				new ByteArrayInputStream(out.toByteArray()))) {
			newRec = tser.deserialize(inView);
		}

		// check if it is still the same
		assertNotNull(newRec);
		assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
	}
}
 
Example #23
Source Project: flink   Author: flink-tpc-ds   File: AvroFactory.java    License: Apache License 2.0 5 votes vote down vote up
private AvroFactory(
	GenericData avroData,
	Schema schema,
	DatumReader<T> reader,
	DatumWriter<T> writer) {

	this.avroData = checkNotNull(avroData);
	this.schema = checkNotNull(schema);
	this.writer = checkNotNull(writer);
	this.reader = checkNotNull(reader);
}
 
Example #24
Source Project: datafu   Author: apache   File: PartitionCollapsingTests.java    License: Apache License 2.0 5 votes vote down vote up
private HashMap<Long,Long> loadOutputCounts(String timestamp) throws IOException
{
  HashMap<Long,Long> counts = new HashMap<Long,Long>();
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(_outputPath, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(_outputPath,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      while (dataFileStream.hasNext())
      {
        GenericRecord r = dataFileStream.next();
        Long memberId = (Long)((GenericRecord)r.get("key")).get("id");
        Long count = (Long)((GenericRecord)r.get("value")).get("count");        
        Assert.assertFalse(counts.containsKey(memberId));
        counts.put(memberId, count);
      }
    }
    finally
    {
      dataFileStream.close();
    }
  }
  return counts;
}
 
Example #25
Source Project: flink   Author: flink-tpc-ds   File: AvroRecordInputFormatTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This test validates proper serialization with specific (generated POJO) types.
 */
@Test
public void testDeserializeToSpecificType() throws IOException {

	DatumReader<User> datumReader = new SpecificDatumReader<>(userSchema);

	try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
		User rec = dataFileReader.next();

		// check if record has been read correctly
		assertNotNull(rec);
		assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());

		// now serialize it with our framework:
		ExecutionConfig ec = new ExecutionConfig();
		TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);

		assertEquals(AvroTypeInfo.class, te.getClass());
		TypeSerializer<User> tser = te.createSerializer(ec);

		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
			tser.serialize(rec, outView);
		}

		User newRec;
		try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
				new ByteArrayInputStream(out.toByteArray()))) {
			newRec = tser.deserialize(inView);
		}

		// check if it is still the same
		assertNotNull(newRec);
		assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
	}
}
 
Example #26
Source Project: iceberg   Author: apache   File: AvroIterable.java    License: Apache License 2.0 5 votes vote down vote up
AvroIterable(InputFile file, DatumReader<D> reader,
             Long start, Long length, boolean reuseContainers) {
  this.file = file;
  this.reader = reader;
  this.start = start;
  this.end = start != null ? start + length : null;
  this.reuseContainers = reuseContainers;
}
 
Example #27
Source Project: iceberg   Author: apache   File: ProjectionDatumReader.java    License: Apache License 2.0 5 votes vote down vote up
public ProjectionDatumReader(Function<Schema, DatumReader<?>> getReader,
                             org.apache.iceberg.Schema expectedSchema,
                             Map<String, String> renames,
                             NameMapping nameMapping) {
  this.getReader = getReader;
  this.expectedSchema = expectedSchema;
  this.renames = renames;
  this.nameMapping = nameMapping;
}
 
Example #28
Source Project: localization_nifi   Author: wangrenlei   File: TestJdbcCommon.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToBytes() throws ClassNotFoundException, SQLException, IOException {
    final Statement st = con.createStatement();
    st.executeUpdate("insert into restaurants values (1, 'Irifunes', 'San Mateo')");
    st.executeUpdate("insert into restaurants values (2, 'Estradas', 'Daly City')");
    st.executeUpdate("insert into restaurants values (3, 'Prime Rib House', 'San Francisco')");

    final ResultSet resultSet = st.executeQuery("select R.*, ROW_NUMBER() OVER () as rownr from restaurants R");

    final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    JdbcCommon.convertToAvroStream(resultSet, outStream, false);

    final byte[] serializedBytes = outStream.toByteArray();
    assertNotNull(serializedBytes);
    System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);

    st.close();

    // Deserialize bytes to records
    final InputStream instream = new ByteArrayInputStream(serializedBytes);

    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            // Reuse record object by passing it to next(). This saves us from
            // allocating and garbage collecting many objects for files with
            // many items.
            record = dataFileReader.next(record);
            System.out.println(record);
        }
    }
}
 
Example #29
Source Project: kite   Author: kite-sdk   File: DataModelUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get the DatumReader for the given type.
 *
 * @param <E> The entity type
 * @param type The Java class of the entity type
 * @param writerSchema The {@link Schema} for entities
 * @return The DatumReader for the given type
 */
@SuppressWarnings("unchecked")
public static <E> DatumReader<E> getDatumReaderForType(Class<E> type, Schema writerSchema) {
  Schema readerSchema = getReaderSchema(type, writerSchema);
  GenericData dataModel = getDataModelForType(type);
  if (dataModel instanceof ReflectData) {
    return new ReflectDatumReader<E>(writerSchema, readerSchema, (ReflectData)dataModel);
  } else if (dataModel instanceof SpecificData) {
    return new SpecificDatumReader<E>(writerSchema, readerSchema, (SpecificData)dataModel);
  } else {
    return new GenericDatumReader<E>(writerSchema, readerSchema, dataModel);
  }
}
 
Example #30
Source Project: datacollector   Author: streamsets   File: ClusterHdfsSource.java    License: Apache License 2.0 5 votes vote down vote up
private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize) throws IOException {
  int previewCount = previewBuffer.size();
  Path filePath = fileStatus.getPath();
  SeekableInput input = new FsInput(filePath, hadoopConf);
  DatumReader<GenericRecord> reader = new GenericDatumReader<>();
  List<Map.Entry> batch = new ArrayList<>();
  try (FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader)) {
    int count = 0;
    while (fileReader.hasNext() && batch.size() < batchSize && previewCount < batchSize) {
      GenericRecord datum = fileReader.next();
      ByteArrayOutputStream out = new ByteArrayOutputStream();
      DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>
          (datum.getSchema()));
      try {
        dataFileWriter.create(datum.getSchema(), out);
        dataFileWriter.append(datum);
      } finally {
        dataFileWriter.close();
        out.close();
      }
      batch.add(new Pair(filePath.toUri().getPath() + "::" + count, out.toByteArray()));
      count++;
      previewCount++;
    }
  }
  return batch;
}