org.apache.arrow.vector.ipc.ArrowFileReader Java Examples

The following examples show how to use org.apache.arrow.vector.ipc.ArrowFileReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: YosegiArrowWriter.java    From yosegi with Apache License 2.0 6 votes vote down vote up
/**
 * Append from arrow byte array.
 */
public void append( final byte[] buffer ) throws IOException {
  ArrowFileReader arrowReader = new ArrowFileReader(
      new SeekableInMemoryByteChannel( buffer ) , new RootAllocator( Integer.MAX_VALUE ) );
  List<ArrowBlock> blockList = arrowReader.getRecordBlocks();
  for ( ArrowBlock block : blockList ) {
    VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
    arrowReader.loadRecordBatch(block);
    append( root );
  }
}
 
Example #2
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Pair<Schema, ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(9223372036854775807L);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch, retSchema, reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);
    return Pair.of(retSchema, ret);
}
 
Example #3
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Pair<Schema, ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(9223372036854775807L);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch, retSchema, reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);
    return Pair.of(retSchema, ret);
}
 
Example #4
Source File: Twister2ArrowFileReader.java    From twister2 with Apache License 2.0 6 votes vote down vote up
public void initInputFile() {
  try {
    LOG.info("arrow schema:" + Schema.fromJSON(arrowSchema));
    Path path = new Path(arrowInputFile);
    this.fileSystem = FileSystemUtils.get(path);
    this.fsDataInputStream = fileSystem.open(path);
    this.fileInputStream = new FileInputStream(arrowInputFile);
    this.arrowFileReader = new ArrowFileReader(new SeekableReadChannel(
        fileInputStream.getChannel()), rootAllocator);
    this.root = arrowFileReader.getVectorSchemaRoot();
    arrowBlocks = arrowFileReader.getRecordBlocks();
    LOG.info("\nReading the arrow file : " + arrowInputFile
        + "\tFile size:" + arrowInputFile.length()
        + "\tschema:" + root.getSchema().toString()
        + "\tArrow Blocks Size: " + arrowBlocks.size());
  } catch (FileNotFoundException e) {
    throw new Twister2RuntimeException("File Not Found", e);
  } catch (Exception ioe) {
    throw new Twister2RuntimeException("IOException Occured", ioe);
  }
}
 
Example #5
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given arrow file.
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #6
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given bytes (usually expected to be an arrow format file)
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #7
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given arrow file.
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #8
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given bytes (usually expected to be an arrow format file)
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #9
Source File: ConvertArrowFormatToMDS.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
public static int run( final String[] args ) throws IOException{
  CommandLine cl;
  try{
    CommandLineParser clParser = new GnuParser();
    cl = clParser.parse( createOptions( args ) , args );
  }catch( ParseException e ){
    printHelp( args );
    throw new IOException( e );
  }

  if( cl.hasOption( "help" ) ){
    printHelp( args );
    return 0;
  }

  String input = cl.getOptionValue( "input" , null );
  String output = cl.getOptionValue( "output" , null );

  Configuration config = new Configuration();

  ArrowFileReader arrowReader = new ArrowFileReader( new FileInputStream( input ).getChannel() , new RootAllocator( Integer.MAX_VALUE ) );
  OutputStream out = FileUtil.create( output );
  MDSWriter writer = new MDSWriter( out , config );
  List<ArrowBlock> blockList = arrowReader.getRecordBlocks();
  for( ArrowBlock block : blockList ){
    VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
    arrowReader.loadRecordBatch(block);
    List<FieldVector> fieldVectorList = root.getFieldVectors();
    Spread spread = ArrowSpreadUtil.toSpread( root.getRowCount() , fieldVectorList );
    writer.append( spread );
  }
  arrowReader.close();
  writer.close();

  return 0;
}
 
Example #10
Source File: ArrowRead.java    From ArrowExample with Apache License 2.0 5 votes vote down vote up
public void someDeadCode(ArrowFileReader arrowFileReader) throws IOException {
    for (ArrowBlock rbBlock : arrowFileReader.getRecordBlocks()) {
        if (!arrowFileReader.loadRecordBatch(rbBlock)) {
            throw new IOException("Expected to read record batch");
        }
        System.out.println(" \t\t " + rbBlock);
    }
}
 
Example #11
Source File: TestConvertArrowFormatTool.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  byte[] mdsFile = createTestData();
  InputStream in = new ByteArrayInputStream( mdsFile );
  MDSReader reader = new MDSReader();
  Configuration config = new Configuration();
  reader.setNewStream( in , mdsFile.length , config );
  MDSArrowReader arrowReader = new MDSArrowReader( reader , config );
  File testFile = new File( "target/TestConvertArrowFormatTool_T_convert_1.mds" );
  if( testFile.exists() ){
    testFile.delete();
  }
  FileOutputStream out = new FileOutputStream( testFile );
  ConvertArrowFormatTool.convert( arrowReader , out , config );

  FileInputStream arrowIn = new FileInputStream( testFile ); 
  ArrowFileReader ar = new ArrowFileReader( arrowIn.getChannel() , new RootAllocator( Integer.MAX_VALUE ) );
  VectorSchemaRoot root  = ar.getVectorSchemaRoot();
  ArrowBlock rbBlock = ar.getRecordBlocks().get(0);
  ar.loadRecordBatch(rbBlock);
  List<FieldVector> fieldVectorList = root.getFieldVectors();
  Map<String,FieldVector> vectorMap = new HashMap<String,FieldVector>();
  for( FieldVector v : fieldVectorList ){
    vectorMap.put( v.getField().getName() , v );
  }

  assertTrue( vectorMap.containsKey( "col1" ) );
  assertTrue( vectorMap.containsKey( "col2" ) );
  assertTrue( vectorMap.containsKey( "col3" ) );

  BigIntVector col1 = (BigIntVector)( vectorMap.get( "col1" ) );
  VarCharVector col2 = (VarCharVector)( vectorMap.get( "col2" ) );
  VarCharVector col3 = (VarCharVector)( vectorMap.get( "col3" ) );


  assertEquals( col1.get(0) , 100L );
  assertEquals( col1.get(1) , 200L );
  assertEquals( col1.get(2) , 300L );

  assertEquals( col2.getObject(0).toString() , "aaa" );
  assertTrue( col2.isNull(1) );
  assertTrue( col2.isNull(2) );

  assertTrue( col3.isNull(0) );
  assertEquals( col3.getObject(1).toString() , "BBB" );
  assertEquals( col3.getObject(2).toString() , "CCC" );

  testFile.delete();
  ar.close();
}
 
Example #12
Source File: ArrowRead.java    From ArrowExample with Apache License 2.0 4 votes vote down vote up
public void makeRead(String filename) throws Exception {
    File arrowFile = validateFile(filename, true);
    FileInputStream fileInputStream = new FileInputStream(arrowFile);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();

    ArrowFileReader arrowFileReader = new ArrowFileReader(new SeekableReadChannel(fileInputStream.getChannel()),
            this.ra);
    System.out.println("\nReading the arrow file : " + filename);
    VectorSchemaRoot root  = arrowFileReader.getVectorSchemaRoot();
    System.out.println("File size : " + arrowFile.length() +
            " schema is "  + root.getSchema().toString());

    List<ArrowBlock> arrowBlocks = arrowFileReader.getRecordBlocks();
    System.out.println("Number of arrow blocks are " + arrowBlocks.size());
    for (int i = 0; i < arrowBlocks.size(); i++) {
        ArrowBlock rbBlock = arrowBlocks.get(i);
        if (!arrowFileReader.loadRecordBatch(rbBlock)) {
            throw new IOException("Expected to read record batch");
        }
        System.out.println("\t["+i+"] ArrowBlock, offset: " + rbBlock.getOffset() +
                ", metadataLength: " + rbBlock.getMetadataLength() +
                ", bodyLength " + rbBlock.getBodyLength());
        /* we can now process this block, it is now loaded */
        System.out.println("\t["+i+"] row count for this block is " + root.getRowCount());
        List<FieldVector> fieldVector = root.getFieldVectors();
        System.out.println("\t["+i+"] number of fieldVectors (corresponding to columns) : " + fieldVector.size());
        for(int j = 0; j < fieldVector.size(); j++){
            Types.MinorType mt = fieldVector.get(j).getMinorType();
            switch(mt){
                case INT: showIntAccessor(fieldVector.get(j)); break;
                case BIGINT: showBigIntAccessor(fieldVector.get(j)); break;
                case VARBINARY: showVarBinaryAccessor(fieldVector.get(j)); break;
                case FLOAT4: showFloat4Accessor(fieldVector.get(j));break;
                case FLOAT8: showFloat8Accessor(fieldVector.get(j));break;
                default: throw new Exception(" MinorType " + mt);
            }
            //showAccessor(fieldVector.get(j).getAccessor());
            //System.out.println("\t["+i+"] accessor " + j + " | " + getAccessorString(accessor));
        }
    }
    System.out.println("Done processing the file");
    arrowFileReader.close();
    long s1 = this.intCsum + this.longCsum + this.arrCsum + this.floatCsum;
    System.out.println("intSum " + intCsum + " longSum " + longCsum + " arrSum " + arrCsum + " floatSum " + floatCsum + " = " + s1);
    System.err.println("Colsum Checksum > " + this.checkSumx + " , difference " + (s1 - this.checkSumx));
}