org.apache.arrow.memory.RootAllocator Java Examples

The following examples show how to use org.apache.arrow.memory.RootAllocator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given bytes (usually expected to be an arrow format file)
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #2
Source File: YosegiArrowWriter.java    From yosegi with Apache License 2.0 6 votes vote down vote up
/**
 * Append from arrow byte array.
 */
public void append( final byte[] buffer ) throws IOException {
  ArrowFileReader arrowReader = new ArrowFileReader(
      new SeekableInMemoryByteChannel( buffer ) , new RootAllocator( Integer.MAX_VALUE ) );
  List<ArrowBlock> blockList = arrowReader.getRecordBlocks();
  for ( ArrowBlock block : blockList ) {
    VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
    arrowReader.loadRecordBatch(block);
    append( root );
  }
}
 
Example #3
Source File: GlobalDictionaryBuilder.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public static void main(String []args) {
  try (final BufferAllocator bufferAllocator = new RootAllocator(VM.getMaxDirectMemory())) {
    final Path tableDir  = Path.of(args[0]);
    final Configuration conf = new Configuration();
    final CompressionCodecFactory codecFactory = CodecFactory.createDirectCodecFactory(conf, new ParquetDirectByteBufferAllocator(bufferAllocator), 0);
    final FileSystem fs = HadoopFileSystem.get(tableDir, conf);
    if (fs.exists(tableDir) && fs.isDirectory(tableDir)) {
      Map<ColumnDescriptor, Path> dictionaryEncodedColumns = createGlobalDictionaries(codecFactory, fs, tableDir, bufferAllocator).getColumnsToDictionaryFiles();
      long version = getDictionaryVersion(fs, tableDir);
      Path dictionaryRootDir = getDictionaryVersionedRootPath(fs, tableDir, version);
      for (ColumnDescriptor columnDescriptor: dictionaryEncodedColumns.keySet()) {
        final VectorContainer data = readDictionary(fs, dictionaryRootDir, columnDescriptor, bufferAllocator);
        System.out.println("Dictionary for column [" + columnDescriptor.toString() + " size " + data.getRecordCount());
        BatchPrinter.printBatch(data);
        data.clear();
      }
    }
  } catch (IOException ioe) {
    logger.error("Failed ", ioe);
  }
}
 
Example #4
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Pair<Schema, ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(9223372036854775807L);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch, retSchema, reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);
    return Pair.of(retSchema, ret);
}
 
Example #5
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Pair<Schema, ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(9223372036854775807L);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch, retSchema, reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);
    return Pair.of(retSchema, ret);
}
 
Example #6
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given bytes (usually expected to be an arrow format file)
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input));
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #7
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given arrow file.
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #8
Source File: FlightDataReader.java    From flight-spark-source with Apache License 2.0 6 votes vote down vote up
private void start() {
  if (allocator != null) {
    return;
  }
  FlightDataSourceReader.FactoryOptions options = this.options.getValue();
  this.parallel = options.isParallel();
  this.allocator = new RootAllocator();
  logger.warn("setting up a data reader at host {} and port {} with ticket {}", options.getHost(), options.getPort(), new String(ticket.getBytes()));
  clientFactory = new FlightClientFactory(location, options.getUsername(), options.getPassword(), parallel);
  client = clientFactory.apply();
  stream = client.getStream(ticket);
  if (parallel) {
    logger.debug("doing create action for ticket {}", new String(ticket.getBytes()));
    client.doAction(new Action("create", ticket.getBytes())).forEachRemaining(Object::toString);
    logger.debug("completed create action for ticket {}", new String(ticket.getBytes()));
  }
}
 
Example #9
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Read a datavec schema and record set
 * from the given arrow file.
 * @param input the input to read
 * @return the associated datavec schema and record
 */
public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
    Schema retSchema = null;
    ArrowWritableRecordBatch ret = null;
    SeekableReadChannel channel = new SeekableReadChannel(input.getChannel());
    ArrowFileReader reader = new ArrowFileReader(channel, allocator);
    reader.loadNextBatch();
    retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema());
    //load the batch
    VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
    VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot());
    ArrowRecordBatch recordBatch = unloader.getRecordBatch();

    vectorLoader.load(recordBatch);
    ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot());
    ret.setUnloader(unloader);

    return Pair.of(retSchema,ret);

}
 
Example #10
Source File: TestArrowBooleanMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setBoolean_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.BOOLEAN , "target" , allocator , parent , 4 );

  memoryAllocator.setBoolean( 0 , true );
  memoryAllocator.setBoolean( 1 , false );
  memoryAllocator.setBoolean( 5 , true );
  memoryAllocator.setBoolean( 1000 , true );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( reader.readBoolean().booleanValue() , true );
  reader.setPosition( 1 );
  assertEquals( reader.readBoolean().booleanValue() , false );
  reader.setPosition( 5 );
  assertEquals( reader.readBoolean().booleanValue() , true );
  for( int i = 6 ; i < 1000 ; i++ ){
    reader.setPosition( i );
    assertEquals( reader.readBoolean() , null );
  }
  reader.setPosition( 1000 );
  assertEquals( reader.readBoolean().booleanValue() , true );
}
 
Example #11
Source File: LocalDictionariesReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  try (final BufferAllocator bufferAllocator = new RootAllocator(VM.getMaxDirectMemory())) {
    final Configuration fsConf = new Configuration();
    final FileSystem fs = HadoopFileSystem.getLocal(fsConf);
    final Path filePath = Path.of(args[0]);
    final CompressionCodecFactory codecFactory = CodecFactory.createDirectCodecFactory(fsConf, new ParquetDirectByteBufferAllocator(bufferAllocator), 0);
    final Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> dictionaries = readDictionaries(fs, filePath, codecFactory);
    for (Map.Entry<ColumnDescriptor, Dictionary> entry :  dictionaries.getLeft().entrySet()) {
      printDictionary(entry.getKey(), entry.getValue());
    }
    System.out.println("Binary columns which are not dictionary encoded: " + dictionaries.getRight());
  } catch (IOException ioe) {
    logger.error("Failed ", ioe);
  }
}
 
Example #12
Source File: TestArrowBooleanMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setBoolean_2() throws IOException{
  IColumn column = new PrimitiveColumn( ColumnType.BOOLEAN , "boolean" );
  column.add( ColumnType.BOOLEAN , new BooleanObj( true ) , 0 );
  column.add( ColumnType.BOOLEAN , new BooleanObj( false ) , 1 );
  column.add( ColumnType.BOOLEAN , new BooleanObj( true ) , 5 );

  ColumnBinaryMakerConfig defaultConfig = new ColumnBinaryMakerConfig();
  ColumnBinaryMakerCustomConfigNode configNode = new ColumnBinaryMakerCustomConfigNode( "root" , defaultConfig );

  IColumnBinaryMaker maker = new DumpBooleanColumnBinaryMaker();
  ColumnBinary columnBinary = maker.toBinary( defaultConfig , null , column );

  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.BOOLEAN , "target" , allocator , parent , 3 );
  maker.loadInMemoryStorage( columnBinary , memoryAllocator );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( reader.readBoolean().booleanValue() , true );
  reader.setPosition( 1 );
  assertEquals( reader.readBoolean().booleanValue() , false );
  reader.setPosition( 5 );
  assertEquals( reader.readBoolean().booleanValue() , true );
  reader.setPosition( 2 );
  assertEquals( reader.readBoolean() , null );
  reader.setPosition( 3 );
  assertEquals( reader.readBoolean() , null );
  reader.setPosition( 4 );
  assertEquals( reader.readBoolean() , null );
}
 
Example #13
Source File: ConvertArrowFormatToMDS.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
public static int run( final String[] args ) throws IOException{
  CommandLine cl;
  try{
    CommandLineParser clParser = new GnuParser();
    cl = clParser.parse( createOptions( args ) , args );
  }catch( ParseException e ){
    printHelp( args );
    throw new IOException( e );
  }

  if( cl.hasOption( "help" ) ){
    printHelp( args );
    return 0;
  }

  String input = cl.getOptionValue( "input" , null );
  String output = cl.getOptionValue( "output" , null );

  Configuration config = new Configuration();

  ArrowFileReader arrowReader = new ArrowFileReader( new FileInputStream( input ).getChannel() , new RootAllocator( Integer.MAX_VALUE ) );
  OutputStream out = FileUtil.create( output );
  MDSWriter writer = new MDSWriter( out , config );
  List<ArrowBlock> blockList = arrowReader.getRecordBlocks();
  for( ArrowBlock block : blockList ){
    VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
    arrowReader.loadRecordBatch(block);
    List<FieldVector> fieldVectorList = root.getFieldVectors();
    Spread spread = ArrowSpreadUtil.toSpread( root.getRowCount() , fieldVectorList );
    writer.append( spread );
  }
  arrowReader.close();
  writer.close();

  return 0;
}
 
Example #14
Source File: TestArrowShortMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setShort_2() throws IOException{
  IColumn column = new PrimitiveColumn( ColumnType.SHORT , "boolean" );
  column.add( ColumnType.SHORT , new ShortObj( (short)100 ) , 0 );
  column.add( ColumnType.SHORT , new ShortObj( (short)200 ) , 1 );
  column.add( ColumnType.SHORT , new ShortObj( (short)255 ) , 5 );

  ColumnBinaryMakerConfig defaultConfig = new ColumnBinaryMakerConfig();
  ColumnBinaryMakerCustomConfigNode configNode = new ColumnBinaryMakerCustomConfigNode( "root" , defaultConfig );

  IColumnBinaryMaker maker = new OptimizeLongColumnBinaryMaker();
  ColumnBinary columnBinary = maker.toBinary( defaultConfig , null , column );

  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.SHORT , "target" , allocator , parent , 3 );
  maker.loadInMemoryStorage( columnBinary , memoryAllocator );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( (short)( reader.readShort() ) , (short)100 );
  reader.setPosition( 1 );
  assertEquals( (short)( reader.readShort() ) , (short)200 );
  reader.setPosition( 5 );
  assertEquals( (short)( reader.readShort() ) , (short)255 );
  reader.setPosition( 2 );
  assertEquals( reader.readShort() , null );
  reader.setPosition( 3 );
  assertEquals( reader.readShort() , null );
  reader.setPosition( 4 );
  assertEquals( reader.readShort() , null );
}
 
Example #15
Source File: TestArrowByteMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setByte_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.BYTE , "target" , allocator , parent , 1001 );

  memoryAllocator.setByte( 0 , (byte)100 );
  memoryAllocator.setByte( 1 , (byte)200 );
  memoryAllocator.setByte( 5 , (byte)255 );
  memoryAllocator.setByte( 1000 , (byte)10 );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( reader.readByte().byteValue() , (byte)100 );
  reader.setPosition( 1 );
  assertEquals( reader.readByte().byteValue() , (byte)200 );
  reader.setPosition( 5 );
  assertEquals( reader.readByte().byteValue() , (byte)255 );
  for( int i = 6 ; i < 1000 ; i++ ){
    reader.setPosition( i );
    assertEquals( reader.readByte() , null );
  }
  reader.setPosition( 1000 );
  assertEquals( reader.readByte().byteValue() , (byte)10 );
}
 
Example #16
Source File: SFArrowResultSet.java    From snowflake-jdbc with Apache License 2.0 5 votes vote down vote up
public static void closeRootAllocator(RootAllocator rootAllocator)
{
  long rest = rootAllocator.getAllocatedMemory();
  int count = 3;
  try
  {
    while (rest > 0 && count-- > 0)
    {
      // this case should only happen when the resultSet is closed before consuming all chunks
      // otherwise, the memory usage for each chunk will be cleared right after it has been fully consumed

      // The reason is that it is possible that one downloading thread is pending to close when the main thread
      // reaches here. A retry is to wait for the downloading thread to finish closing incoming streams and arrow
      // resources.

      Thread.sleep(10);
      rest = rootAllocator.getAllocatedMemory();
    }
    if (rest == 0)
    {
      rootAllocator.close();
    }
  }
  catch (InterruptedException ie)
  {
    logger.debug("interrupted during closing root allocator");
  }
  catch (Exception e)
  {
    logger.debug("Exception happened when closing rootAllocator: ",
                 e.getLocalizedMessage());
  }
}
 
Example #17
Source File: ArrowRead.java    From ArrowExample with Apache License 2.0 5 votes vote down vote up
public ArrowRead(){
    this.ra = new RootAllocator(Integer.MAX_VALUE);
    this.nullEntries = 0;
    this.checkSumx = 0;
    this.intCsum = 0;
    this.longCsum = 0;
    this.arrCsum = 0;
    this.floatCsum = 0;
}
 
Example #18
Source File: Twister2ArrowFileWriter.java    From twister2 with Apache License 2.0 5 votes vote down vote up
public Twister2ArrowFileWriter(String arrowfile, boolean flag, String schema,
                               int parallelism) {
  this.arrowFile = arrowfile;
  this.flag = flag;
  this.arrowSchema = schema;
  this.batchSize = 1000;
  this.rootAllocator = new RootAllocator(Integer.MAX_VALUE);
  this.parallel = parallelism;
}
 
Example #19
Source File: TestArrowShortMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setShort_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.SHORT , "target" , allocator , parent , 1001 );

  memoryAllocator.setShort( 0 , (short)100 );
  memoryAllocator.setShort( 1 , (short)200 );
  memoryAllocator.setShort( 5 , (short)255 );
  memoryAllocator.setShort( 1000 , (short)10 );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( (short)( reader.readShort() ) , (short)100 );
  reader.setPosition( 1 );
  assertEquals( (short)( reader.readShort() ) , (short)200 );
  reader.setPosition( 5 );
  assertEquals( (short)( reader.readShort() ) , (short)255 );
  for( int i = 6 ; i < 1000 ; i++ ){
    reader.setPosition( i );
    assertEquals( reader.readShort() , null );
  }
  reader.setPosition( 1000 );
  assertEquals( (short)( reader.readShort() ) , (short)10 );
}
 
Example #20
Source File: TestTimeStampMilliAccessor.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  valueVector = new TimeStampMilliVector("t", new RootAllocator());
  valueVector.allocateNew(3);
  valueVector.set(0, NON_NULL_VALUE);
  valueVector.set(1, DST_VALUE);
  valueVector.setNull(2);

  accessor = new TimeStampMilliAccessor(valueVector, UTC_CALENDAR.getTimeZone());
}
 
Example #21
Source File: TestArrowDoubleMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setDouble_2() throws IOException{
  IColumn column = new PrimitiveColumn( ColumnType.DOUBLE , "boolean" );
  column.add( ColumnType.DOUBLE , new DoubleObj( (double)100 ) , 0 );
  column.add( ColumnType.DOUBLE , new DoubleObj( (double)200 ) , 1 );
  column.add( ColumnType.DOUBLE , new DoubleObj( (double)255 ) , 5 );

  ColumnBinaryMakerConfig defaultConfig = new ColumnBinaryMakerConfig();
  ColumnBinaryMakerCustomConfigNode configNode = new ColumnBinaryMakerCustomConfigNode( "root" , defaultConfig );

  IColumnBinaryMaker maker = new OptimizeDoubleColumnBinaryMaker();
  ColumnBinary columnBinary = maker.toBinary( defaultConfig , null , column );

  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.DOUBLE , "target" , allocator , parent , 3 );

  maker.loadInMemoryStorage( columnBinary , memoryAllocator );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( reader.readDouble().doubleValue() , (double)100 );
  reader.setPosition( 1 );
  assertEquals( reader.readDouble().doubleValue() , (double)200 );
  reader.setPosition( 5 );
  assertEquals( reader.readDouble().doubleValue() , (double)255 );
  reader.setPosition( 2 );
  assertEquals( reader.readDouble() , null );
  reader.setPosition( 3 );
  assertEquals( reader.readDouble() , null );
  reader.setPosition( 4 );
  assertEquals( reader.readDouble() , null );
}
 
Example #22
Source File: TestArrowDoubleMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setDouble_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.DOUBLE , "target" , allocator , parent , 4 );

  memoryAllocator.setDouble( 0 , (double)0.1 );
  memoryAllocator.setDouble( 1 , (double)0.2 );
  memoryAllocator.setDouble( 5 , (double)0.255 );
  memoryAllocator.setDouble( 1000 , (double)0.1 );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( reader.readDouble().doubleValue() , (double)0,1 );
  reader.setPosition( 1 );
  assertEquals( reader.readDouble().doubleValue() , (double)0.2 );
  reader.setPosition( 5 );
  assertEquals( reader.readDouble().doubleValue() , (double)0.255 );
  for( int i = 6 ; i < 1000 ; i++ ){
    reader.setPosition( i );
    assertEquals( reader.readDouble() , null );
  }
  reader.setPosition( 1000 );
  assertEquals( reader.readDouble().doubleValue() , (double)0.1 );
}
 
Example #23
Source File: TestArrowLongMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_setLong_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  SchemaChangeCallBack callBack = new SchemaChangeCallBack();
  StructVector parent = new StructVector("root", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
  parent.allocateNew();
  IMemoryAllocator memoryAllocator = ArrowMemoryAllocatorFactory.getFromStructVector( ColumnType.LONG , "target" , allocator , parent , 1001 );

  memoryAllocator.setLong( 0 , (long)100 );
  memoryAllocator.setLong( 1 , (long)200 );
  memoryAllocator.setLong( 5 , (long)255 );
  memoryAllocator.setLong( 1000 , (long)10 );

  StructReader rootReader = parent.getReader();
  FieldReader reader = rootReader.reader( "target" );
  reader.setPosition( 0 );
  assertEquals( reader.readLong().longValue() , (long)100 );
  reader.setPosition( 1 );
  assertEquals( reader.readLong().longValue() , (long)200 );
  reader.setPosition( 5 );
  assertEquals( reader.readLong().longValue() , (long)255 );
  for( int i = 6 ; i < 1000 ; i++ ){
    reader.setPosition( i );
    assertEquals( reader.readLong() , null );
  }
  reader.setPosition( 1000 );
  assertEquals( reader.readLong().longValue() , (long)10 );
}
 
Example #24
Source File: TestArrowLongConnector.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  BigIntVector vector = new BigIntVector( "test" , allocator );
  vector.allocateNew();
  vector.setSafe( 0 , (long)0 );  
  vector.setSafe( 1 , (long)1 );  
  vector.setSafe( 2 , (long)0 );  
  vector.setNull( 3 );  
  vector.setSafe( 4 , (long)1 );  
  vector.setSafe( 5 , (long)1 );  
  vector.setSafe( 6 , (long)1 );  
  vector.setNull( 7 );  
  vector.setValueCount( 8 );

  IColumn column = ArrowColumnFactory.convert( "test" , vector );
  assertEquals( column.getColumnName() , "test" );
  assertEquals( column.size() , 8 );
  assertTrue( ( column.getColumnType() == ColumnType.LONG ) );
  assertEquals( ( (PrimitiveObject)( column.get(0).getRow() ) ).getLong() , (long)0  );
  assertEquals( ( (PrimitiveObject)( column.get(1).getRow() ) ).getLong() , (long)1  );
  assertEquals( ( (PrimitiveObject)( column.get(2).getRow() ) ).getLong() , (long)0  );
  assertEquals( column.get(3).getRow() , null  );
  assertEquals( ( (PrimitiveObject)( column.get(4).getRow() ) ).getLong() , (long)1 );
  assertEquals( ( (PrimitiveObject)( column.get(5).getRow() ) ).getLong() , (long)1 );
  assertEquals( ( (PrimitiveObject)( column.get(6).getRow() ) ).getLong() , (long)1 );
  assertEquals( column.get(7).getRow() , null  );
}
 
Example #25
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testDates() {
    Date now = new Date();
    BufferAllocator bufferAllocator = new RootAllocator(Long.MAX_VALUE);
    TimeStampMilliVector timeStampMilliVector = ArrowConverter.vectorFor(bufferAllocator, "col1", new Date[]{now});
    assertEquals(now.getTime(),timeStampMilliVector.get(0));
}
 
Example #26
Source File: TestArrowByteConnector.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  TinyIntVector vector = new TinyIntVector( "test" , allocator );
  vector.allocateNew();
  vector.setSafe( 0 , (byte)0 );  
  vector.setSafe( 1 , (byte)1 );  
  vector.setSafe( 2 , (byte)0 );  
  vector.setNull( 3 );  
  vector.setSafe( 4 , (byte)1 );  
  vector.setSafe( 5 , (byte)1 );  
  vector.setSafe( 6 , (byte)1 );  
  vector.setNull( 7 );  
  vector.setValueCount( 8 );

  IColumn column = ArrowColumnFactory.convert( "test" , vector );
  assertEquals( column.getColumnName() , "test" );
  assertEquals( column.size() , 8 );
  assertTrue( ( column.getColumnType() == ColumnType.BYTE ) );
  assertEquals( ( (PrimitiveObject)( column.get(0).getRow() ) ).getByte() , (byte)0  );
  assertEquals( ( (PrimitiveObject)( column.get(1).getRow() ) ).getByte() , (byte)1  );
  assertEquals( ( (PrimitiveObject)( column.get(2).getRow() ) ).getByte() , (byte)0  );
  assertEquals( column.get(3).getRow() , null  );
  assertEquals( ( (PrimitiveObject)( column.get(4).getRow() ) ).getByte() , (byte)1 );
  assertEquals( ( (PrimitiveObject)( column.get(5).getRow() ) ).getByte() , (byte)1 );
  assertEquals( ( (PrimitiveObject)( column.get(6).getRow() ) ).getByte() , (byte)1 );
  assertEquals( column.get(7).getRow() , null  );
}
 
Example #27
Source File: TestArrowBooleanConnector.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  BitVector vector = new BitVector( "test" , allocator );
  vector.allocateNew();
  vector.setSafe( 0 , 0 );  
  vector.setSafe( 1 , 1 );  
  vector.setSafe( 2 , 0 );  
  vector.setNull( 3 );  
  vector.setSafe( 4 , 1 );  
  vector.setSafe( 5 , 1 );  
  vector.setSafe( 6 , 1 );  
  vector.setNull( 7 );  
  vector.setValueCount( 8 );

  IColumn column = ArrowColumnFactory.convert( "test" , vector );
  assertEquals( column.getColumnName() , "test" );
  assertEquals( column.size() , 8 );
  assertTrue( ( column.getColumnType() == ColumnType.BOOLEAN ) );
  assertEquals( ( (PrimitiveObject)( column.get(0).getRow() ) ).getBoolean() , false  );
  assertEquals( ( (PrimitiveObject)( column.get(1).getRow() ) ).getBoolean() , true  );
  assertEquals( ( (PrimitiveObject)( column.get(2).getRow() ) ).getBoolean() , false  );
  assertEquals( column.get(3).getRow() , null  );
  assertEquals( ( (PrimitiveObject)( column.get(4).getRow() ) ).getBoolean() , true  );
  assertEquals( ( (PrimitiveObject)( column.get(5).getRow() ) ).getBoolean() , true  );
  assertEquals( ( (PrimitiveObject)( column.get(6).getRow() ) ).getBoolean() , true  );
  assertEquals( column.get(7).getRow() , null  );
}
 
Example #28
Source File: ArrowBinaryIterator.java    From spark-bigquery-connector with Apache License 2.0 5 votes vote down vote up
public ArrowBinaryIterator(List<String> columnsInOrder, ByteString schema, ByteString rowsInBytes) {
    BufferAllocator allocator = (new RootAllocator(maxAllocation)).newChildAllocator("ArrowBinaryIterator",
            0, maxAllocation);

    SequenceInputStream bytesWithSchemaStream = new SequenceInputStream(
            new ByteArrayInputStream(schema.toByteArray()),
            new ByteArrayInputStream(rowsInBytes.toByteArray()));

    ArrowStreamReader arrowStreamReader = new ArrowStreamReader(bytesWithSchemaStream, allocator);
    arrowReaderIterator = new ArrowReaderIterator(arrowStreamReader);
    currentIterator = ImmutableList.<InternalRow>of().iterator();
    this.columnsInOrder = columnsInOrder;
}
 
Example #29
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSchemaAndRecordsFromByteArray() throws Exception {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);

    int valueCount = 3;
    List<Field> fields = new ArrayList<>();
    fields.add(ArrowConverter.field("field1",new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
    fields.add(ArrowConverter.intField("field2"));

    List<FieldVector> fieldVectors = new ArrayList<>();
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field1",new float[] {1,2,3}));
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field2",new int[] {1,2,3}));


    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(fields);

    VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount);
    VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
    vectorUnloader.getRecordBatch();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    try(ArrowFileWriter arrowFileWriter = new ArrowFileWriter(schemaRoot1,null,newChannel(byteArrayOutputStream))) {
        arrowFileWriter.writeBatch();
    } catch (IOException e) {
        log.error("",e);
    }

    byte[] arr = byteArrayOutputStream.toByteArray();
    val arr2 = ArrowConverter.readFromBytes(arr);
    assertEquals(2,arr2.getFirst().numColumns());
    assertEquals(3,arr2.getRight().size());

    val arrowCols = ArrowConverter.toArrowColumns(allocator,arr2.getFirst(),arr2.getRight());
    assertEquals(2,arrowCols.size());
    assertEquals(valueCount,arrowCols.get(0).getValueCount());
}
 
Example #30
Source File: HashAggMemoryEstimator.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static HashAggMemoryEstimator create(
  final List<NamedExpression> groupByExpressions,
  final List<NamedExpression> aggregateExpressions,
  final BatchSchema schema,
  final BatchSchema childSchema,
  final FunctionLookupContext functionLookupContext,
  final OptionManager options) {

  try (final BaseAllocator allocator = new RootAllocator();
    final VectorContainer incoming = new VectorContainer(allocator)) {
    incoming.addSchema(childSchema);

    final int hashTableBatchSize = computeHashTableSize(options, schema);

    // construct pivot info using the group-by exprs.
    final List<LogicalExpression> materializedGroupByExprs = materializeExprs(groupByExpressions,
      childSchema,
      functionLookupContext);
    final PivotInfo pivotInfo = getPivotInfo(materializedGroupByExprs, incoming);

    // construct accumulator types using the agg exprs.
    final List<LogicalExpression> materializedAggExprs = materializeExprs(aggregateExpressions,
      childSchema,
      functionLookupContext);
    final MaterializedAggExpressionsResult accumulatorTypes =
      AccumulatorBuilder.getAccumulatorTypesFromMaterializedExpressions(
        aggregateExpressions, materializedAggExprs, incoming);

    return create(pivotInfo, accumulatorTypes, hashTableBatchSize, options);
  }
}