org.apache.arrow.vector.ipc.SeekableReadChannel Java Examples

The following examples show how to use org.apache.arrow.vector.ipc.SeekableReadChannel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Pair<Schema, ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(9223372036854775807L);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch, retSchema, reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);
    return Pair.of(retSchema, ret);
}
 
Example #2
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Pair<Schema, ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(9223372036854775807L);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch, retSchema, reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);
    return Pair.of(retSchema, ret);
}
 
Example #3
Source File: Twister2ArrowFileReader.java    From twister2 with Apache License 2.0 6 votes vote down vote up
public void initInputFile() {
  try {
    LOG.info("arrow schema:" + Schema.fromJSON(arrowSchema));
    Path path = new Path(arrowInputFile);
    this.fileSystem = FileSystemUtils.get(path);
    this.fsDataInputStream = fileSystem.open(path);
    this.fileInputStream = new FileInputStream(arrowInputFile);
    this.arrowFileReader = new ArrowFileReader(new SeekableReadChannel(
        fileInputStream.getChannel()), rootAllocator);
    this.root = arrowFileReader.getVectorSchemaRoot();
    arrowBlocks = arrowFileReader.getRecordBlocks();
    LOG.info("\nReading the arrow file : " + arrowInputFile
        + "\tFile size:" + arrowInputFile.length()
        + "\tschema:" + root.getSchema().toString()
        + "\tArrow Blocks Size: " + arrowBlocks.size());
  } catch (FileNotFoundException e) {
    throw new Twister2RuntimeException("File Not Found", e);
  } catch (Exception ioe) {
    throw new Twister2RuntimeException("IOException Occured", ioe);
  }
}
 
Example #4
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given arrow file.
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #5
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given bytes (usually expected to be an arrow format file)
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #6
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given arrow file.
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #7
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given bytes (usually expected to be an arrow format file)
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #8
Source File: ArrowRead.java    From ArrowExample with Apache License 2.0 4 votes vote down vote up
public void makeRead(String filename) throws Exception {
    File arrowFile = validateFile(filename, true);
    FileInputStream fileInputStream = new FileInputStream(arrowFile);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();

    ArrowFileReader arrowFileReader = new ArrowFileReader(new SeekableReadChannel(fileInputStream.getChannel()),
            this.ra);
    System.out.println("\nReading the arrow file : " + filename);
    VectorSchemaRoot root  = arrowFileReader.getVectorSchemaRoot();
    System.out.println("File size : " + arrowFile.length() +
            " schema is "  + root.getSchema().toString());

    List<ArrowBlock> arrowBlocks = arrowFileReader.getRecordBlocks();
    System.out.println("Number of arrow blocks are " + arrowBlocks.size());
    for (int i = 0; i < arrowBlocks.size(); i++) {
        ArrowBlock rbBlock = arrowBlocks.get(i);
        if (!arrowFileReader.loadRecordBatch(rbBlock)) {
            throw new IOException("Expected to read record batch");
        }
        System.out.println("\t["+i+"] ArrowBlock, offset: " + rbBlock.getOffset() +
                ", metadataLength: " + rbBlock.getMetadataLength() +
                ", bodyLength " + rbBlock.getBodyLength());
        /* we can now process this block, it is now loaded */
        System.out.println("\t["+i+"] row count for this block is " + root.getRowCount());
        List<FieldVector> fieldVector = root.getFieldVectors();
        System.out.println("\t["+i+"] number of fieldVectors (corresponding to columns) : " + fieldVector.size());
        for(int j = 0; j < fieldVector.size(); j++){
            Types.MinorType mt = fieldVector.get(j).getMinorType();
            switch(mt){
                case INT: showIntAccessor(fieldVector.get(j)); break;
                case BIGINT: showBigIntAccessor(fieldVector.get(j)); break;
                case VARBINARY: showVarBinaryAccessor(fieldVector.get(j)); break;
                case FLOAT4: showFloat4Accessor(fieldVector.get(j));break;
                case FLOAT8: showFloat8Accessor(fieldVector.get(j));break;
                default: throw new Exception(" MinorType " + mt);
            }
            //showAccessor(fieldVector.get(j).getAccessor());
            //System.out.println("\t["+i+"] accessor " + j + " | " + getAccessorString(accessor));
        }
    }
    System.out.println("Done processing the file");
    arrowFileReader.close();
    long s1 = this.intCsum + this.longCsum + this.arrCsum + this.floatCsum;
    System.out.println("intSum " + intCsum + " longSum " + longCsum + " arrSum " + arrCsum + " floatSum " + floatCsum + " = " + s1);
    System.err.println("Colsum Checksum > " + this.checkSumx + " , difference " + (s1 - this.checkSumx));
}