org.apache.arrow.vector.ipc.message.ArrowBlock Java Examples

The following examples show how to use org.apache.arrow.vector.ipc.message.ArrowBlock. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: YosegiArrowWriter.java    From yosegi with Apache License 2.0 6 votes vote down vote up
/**
 * Append from arrow byte array.
 */
public void append( final byte[] buffer ) throws IOException {
  ArrowFileReader arrowReader = new ArrowFileReader(
      new SeekableInMemoryByteChannel( buffer ) , new RootAllocator( Integer.MAX_VALUE ) );
  List<ArrowBlock> blockList = arrowReader.getRecordBlocks();
  for ( ArrowBlock block : blockList ) {
    VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
    arrowReader.loadRecordBatch(block);
    append( root );
  }
}
 
Example #2
Source File: ConvertArrowFormatToMDS.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
public static int run( final String[] args ) throws IOException{
  CommandLine cl;
  try{
    CommandLineParser clParser = new GnuParser();
    cl = clParser.parse( createOptions( args ) , args );
  }catch( ParseException e ){
    printHelp( args );
    throw new IOException( e );
  }

  if( cl.hasOption( "help" ) ){
    printHelp( args );
    return 0;
  }

  String input = cl.getOptionValue( "input" , null );
  String output = cl.getOptionValue( "output" , null );

  Configuration config = new Configuration();

  ArrowFileReader arrowReader = new ArrowFileReader( new FileInputStream( input ).getChannel() , new RootAllocator( Integer.MAX_VALUE ) );
  OutputStream out = FileUtil.create( output );
  MDSWriter writer = new MDSWriter( out , config );
  List<ArrowBlock> blockList = arrowReader.getRecordBlocks();
  for( ArrowBlock block : blockList ){
    VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
    arrowReader.loadRecordBatch(block);
    List<FieldVector> fieldVectorList = root.getFieldVectors();
    Spread spread = ArrowSpreadUtil.toSpread( root.getRowCount() , fieldVectorList );
    writer.append( spread );
  }
  arrowReader.close();
  writer.close();

  return 0;
}
 
Example #3
Source File: ArrowRead.java    From ArrowExample with Apache License 2.0 5 votes vote down vote up
public void someDeadCode(ArrowFileReader arrowFileReader) throws IOException {
    for (ArrowBlock rbBlock : arrowFileReader.getRecordBlocks()) {
        if (!arrowFileReader.loadRecordBatch(rbBlock)) {
            throw new IOException("Expected to read record batch");
        }
        System.out.println(" \t\t " + rbBlock);
    }
}
 
Example #4
Source File: TestConvertArrowFormatTool.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  byte[] mdsFile = createTestData();
  InputStream in = new ByteArrayInputStream( mdsFile );
  MDSReader reader = new MDSReader();
  Configuration config = new Configuration();
  reader.setNewStream( in , mdsFile.length , config );
  MDSArrowReader arrowReader = new MDSArrowReader( reader , config );
  File testFile = new File( "target/TestConvertArrowFormatTool_T_convert_1.mds" );
  if( testFile.exists() ){
    testFile.delete();
  }
  FileOutputStream out = new FileOutputStream( testFile );
  ConvertArrowFormatTool.convert( arrowReader , out , config );

  FileInputStream arrowIn = new FileInputStream( testFile ); 
  ArrowFileReader ar = new ArrowFileReader( arrowIn.getChannel() , new RootAllocator( Integer.MAX_VALUE ) );
  VectorSchemaRoot root  = ar.getVectorSchemaRoot();
  ArrowBlock rbBlock = ar.getRecordBlocks().get(0);
  ar.loadRecordBatch(rbBlock);
  List<FieldVector> fieldVectorList = root.getFieldVectors();
  Map<String,FieldVector> vectorMap = new HashMap<String,FieldVector>();
  for( FieldVector v : fieldVectorList ){
    vectorMap.put( v.getField().getName() , v );
  }

  assertTrue( vectorMap.containsKey( "col1" ) );
  assertTrue( vectorMap.containsKey( "col2" ) );
  assertTrue( vectorMap.containsKey( "col3" ) );

  BigIntVector col1 = (BigIntVector)( vectorMap.get( "col1" ) );
  VarCharVector col2 = (VarCharVector)( vectorMap.get( "col2" ) );
  VarCharVector col3 = (VarCharVector)( vectorMap.get( "col3" ) );


  assertEquals( col1.get(0) , 100L );
  assertEquals( col1.get(1) , 200L );
  assertEquals( col1.get(2) , 300L );

  assertEquals( col2.getObject(0).toString() , "aaa" );
  assertTrue( col2.isNull(1) );
  assertTrue( col2.isNull(2) );

  assertTrue( col3.isNull(0) );
  assertEquals( col3.getObject(1).toString() , "BBB" );
  assertEquals( col3.getObject(2).toString() , "CCC" );

  testFile.delete();
  ar.close();
}
 
Example #5
Source File: ArrowRead.java    From ArrowExample with Apache License 2.0 4 votes vote down vote up
public void makeRead(String filename) throws Exception {
    File arrowFile = validateFile(filename, true);
    FileInputStream fileInputStream = new FileInputStream(arrowFile);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();

    ArrowFileReader arrowFileReader = new ArrowFileReader(new SeekableReadChannel(fileInputStream.getChannel()),
            this.ra);
    System.out.println("\nReading the arrow file : " + filename);
    VectorSchemaRoot root  = arrowFileReader.getVectorSchemaRoot();
    System.out.println("File size : " + arrowFile.length() +
            " schema is "  + root.getSchema().toString());

    List<ArrowBlock> arrowBlocks = arrowFileReader.getRecordBlocks();
    System.out.println("Number of arrow blocks are " + arrowBlocks.size());
    for (int i = 0; i < arrowBlocks.size(); i++) {
        ArrowBlock rbBlock = arrowBlocks.get(i);
        if (!arrowFileReader.loadRecordBatch(rbBlock)) {
            throw new IOException("Expected to read record batch");
        }
        System.out.println("\t["+i+"] ArrowBlock, offset: " + rbBlock.getOffset() +
                ", metadataLength: " + rbBlock.getMetadataLength() +
                ", bodyLength " + rbBlock.getBodyLength());
        /* we can now process this block, it is now loaded */
        System.out.println("\t["+i+"] row count for this block is " + root.getRowCount());
        List<FieldVector> fieldVector = root.getFieldVectors();
        System.out.println("\t["+i+"] number of fieldVectors (corresponding to columns) : " + fieldVector.size());
        for(int j = 0; j < fieldVector.size(); j++){
            Types.MinorType mt = fieldVector.get(j).getMinorType();
            switch(mt){
                case INT: showIntAccessor(fieldVector.get(j)); break;
                case BIGINT: showBigIntAccessor(fieldVector.get(j)); break;
                case VARBINARY: showVarBinaryAccessor(fieldVector.get(j)); break;
                case FLOAT4: showFloat4Accessor(fieldVector.get(j));break;
                case FLOAT8: showFloat8Accessor(fieldVector.get(j));break;
                default: throw new Exception(" MinorType " + mt);
            }
            //showAccessor(fieldVector.get(j).getAccessor());
            //System.out.println("\t["+i+"] accessor " + j + " | " + getAccessorString(accessor));
        }
    }
    System.out.println("Done processing the file");
    arrowFileReader.close();
    long s1 = this.intCsum + this.longCsum + this.arrCsum + this.floatCsum;
    System.out.println("intSum " + intCsum + " longSum " + longCsum + " arrSum " + arrCsum + " floatSum " + floatCsum + " = " + s1);
    System.err.println("Colsum Checksum > " + this.checkSumx + " , difference " + (s1 - this.checkSumx));
}