Java Code Examples for org.apache.arrow.vector.BigIntVector#allocateNew()

The following examples show how to use org.apache.arrow.vector.BigIntVector#allocateNew() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestData.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private Pair<BigIntVector, ResultVerifier> testBigIntVector() {
  final String colName = "colBigInt";
  final List<Long> values = asList(null, 50L, -2000L, 327345234234L, 0L);

  BigIntVector valuesVector = new BigIntVector(colName, allocator);
  valuesVector.allocateNew(values.size());
  for (int i = 0; i < values.size(); i++) {
    if (values.get(i) == null) {
      valuesVector.setNull(i);
    } else {
      valuesVector.set(i, values.get(i));
    }
  }

  ResultVerifier verifier = new ResultVerifier() {
    @Override
    public void verify(DataPOJO output) {
      verifyIntValues(values, output, colName);
    }
  };

  return Pair.of(valuesVector, verifier);
}
 
Example 2
Source File: TestArrowLongConnector.java    From yosegi with Apache License 2.0 5 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  BigIntVector vector = new BigIntVector( "test" , allocator );
  vector.allocateNew();
  vector.setSafe( 0 , (long)0 );
  vector.setSafe( 1 , (long)1 );
  vector.setSafe( 2 , (long)0 );
  vector.setNull( 3 );
  vector.setSafe( 4 , (long)1 );
  vector.setSafe( 5 , (long)1 );
  vector.setSafe( 6 , (long)1 );
  vector.setNull( 7 );
  vector.setValueCount( 8 );

  IColumn column = ArrowColumnFactory.convert( "test" , vector );
  assertEquals( column.getColumnName() , "test" );
  assertEquals( column.size() , 8 );
  assertTrue( ( column.getColumnType() == ColumnType.LONG ) );
  assertEquals( ( (PrimitiveObject)( column.get(0).getRow() ) ).getLong() , (long)0  );
  assertEquals( ( (PrimitiveObject)( column.get(1).getRow() ) ).getLong() , (long)1  );
  assertEquals( ( (PrimitiveObject)( column.get(2).getRow() ) ).getLong() , (long)0  );
  assertEquals( column.get(3).getRow() , null  );
  assertEquals( ( (PrimitiveObject)( column.get(4).getRow() ) ).getLong() , (long)1 );
  assertEquals( ( (PrimitiveObject)( column.get(5).getRow() ) ).getLong() , (long)1 );
  assertEquals( ( (PrimitiveObject)( column.get(6).getRow() ) ).getLong() , (long)1 );
  assertEquals( column.get(7).getRow() , null  );
}
 
Example 3
Source File: TestArrowLongConnector.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Test
public void T_convert_1() throws IOException{
  BufferAllocator allocator = new RootAllocator( 1024 * 1024 * 10 );
  BigIntVector vector = new BigIntVector( "test" , allocator );
  vector.allocateNew();
  vector.setSafe( 0 , (long)0 );  
  vector.setSafe( 1 , (long)1 );  
  vector.setSafe( 2 , (long)0 );  
  vector.setNull( 3 );  
  vector.setSafe( 4 , (long)1 );  
  vector.setSafe( 5 , (long)1 );  
  vector.setSafe( 6 , (long)1 );  
  vector.setNull( 7 );  
  vector.setValueCount( 8 );

  IColumn column = ArrowColumnFactory.convert( "test" , vector );
  assertEquals( column.getColumnName() , "test" );
  assertEquals( column.size() , 8 );
  assertTrue( ( column.getColumnType() == ColumnType.LONG ) );
  assertEquals( ( (PrimitiveObject)( column.get(0).getRow() ) ).getLong() , (long)0  );
  assertEquals( ( (PrimitiveObject)( column.get(1).getRow() ) ).getLong() , (long)1  );
  assertEquals( ( (PrimitiveObject)( column.get(2).getRow() ) ).getLong() , (long)0  );
  assertEquals( column.get(3).getRow() , null  );
  assertEquals( ( (PrimitiveObject)( column.get(4).getRow() ) ).getLong() , (long)1 );
  assertEquals( ( (PrimitiveObject)( column.get(5).getRow() ) ).getLong() , (long)1 );
  assertEquals( ( (PrimitiveObject)( column.get(6).getRow() ) ).getLong() , (long)1 );
  assertEquals( column.get(7).getRow() , null  );
}
 
Example 4
Source File: Twister2ArrowFileWriter.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public <T extends FieldVector> void generate(T bigIntVector1, int from, int items, int isSet) {
  BigIntVector bigIntVector = (BigIntVector) bigIntVector1;
  bigIntVector.setInitialCapacity(items);
  bigIntVector.allocateNew();
  for (int i = 0; i < items; i++) {
    Long l = new Long(dataList.get(from + i).toString());
    bigIntVector.setSafe(i, isSet, l);
  }
  bigIntVector.setValueCount(items);
}
 
Example 5
Source File: GlobalDictionaryBuilder.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final BigIntVector longVector = input.addOrGet(field);
  longVector.allocateNew();
  SortedSet<Long> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToLong(i));
    }
  }
  if (existingDict != null) {
    final BigIntVector existingDictValues = existingDict.getValueAccessorById(BigIntVector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.get(i));
    }
  }
  final Iterator<Long> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    longVector.setSafe(recordCount++, iter.next());
  }
  longVector.setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
Example 6
Source File: TestVectorizedHashAggPartitionSerializable.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void populateBigInt(BigIntVector vector, Long[] data) {
  vector.allocateNew();
  Random r = new Random();
  for(int i =0; i < data.length; i++){
    Long val = data[i];
    if(val != null){
      vector.setSafe(i, val);
    } else {
      vector.setSafe(i, 0, r.nextLong());
    }
  }
  vector.setValueCount(data.length);
}
 
Example 7
Source File: TestBoundedPivots.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
static Long[] populate8ByteValues(BigIntVector vector, int size){
  vector.allocateNew();
  Long values[] = new Long[size];
  for(int i = 0; i < values.length; i++){
    if (RAND.nextBoolean()) {
      values[i] = RAND.nextLong();
      vector.setSafe(i, values[i]);
    }
  }
  vector.setValueCount(values.length);
  return values;
}
 
Example 8
Source File: TestBoundedPivots.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
static Long[] populate8ByteValuesWithoutNull(BigIntVector vector, int size){
  vector.allocateNew();
  Long values[] = new Long[size];
  for(int i = 0; i < values.length; i++){
    values[i] = RAND.nextLong();
    vector.setSafe(i, values[i]);
  }
  vector.setValueCount(values.length);
  return values;
}
 
Example 9
Source File: ArrowLongMemoryAllocator.java    From yosegi with Apache License 2.0 4 votes vote down vote up
public ArrowLongMemoryAllocator( final BigIntVector vector , final int rowCount ) {
  vector.allocateNew( rowCount );
  this.vector = vector;
}
 
Example 10
Source File: ArrowLongMemoryAllocator.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
public ArrowLongMemoryAllocator( final BigIntVector vector , final int rowCount ){
  vector.allocateNew( rowCount );
  this.vector = vector;
}
 
Example 11
Source File: TestParquetWriter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testFileSize() throws Exception {
  final Path tmpSchemaPath = new Path(getDfsTestTmpSchemaLocation());
  final Path targetPath = new Path(tmpSchemaPath, "testFileSize");

  final Configuration hadoopConf = new Configuration();
  final FileSystem newFs = targetPath.getFileSystem(hadoopConf);
  assertTrue(newFs.mkdirs(targetPath));

  final BufferAllocator ALLOCATOR = allocatorRule.newAllocator("test-parquet-writer", 0, Long.MAX_VALUE);

  OptionManager optionManager = mock(OptionManager.class);
  when(optionManager.getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR)).thenReturn("none"); //compression shouldn't matter
  when(optionManager.getOption(ExecConstants.PARQUET_PAGE_SIZE_VALIDATOR)).thenReturn(256L);
  when(optionManager.getOption(ExecConstants.PARQUET_MAXIMUM_PARTITIONS_VALIDATOR)).thenReturn(1L);
  when(optionManager.getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR)).thenReturn(4096L);

  OperatorStats operatorStats = mock(OperatorStats.class);

  OperatorContext opContext = mock(OperatorContext.class);
  when(opContext.getFragmentHandle()).thenReturn(ExecProtos.FragmentHandle.newBuilder().setMajorFragmentId(2323).setMinorFragmentId(234234).build());
  when(opContext.getAllocator()).thenReturn(ALLOCATOR);
  when(opContext.getOptions()).thenReturn(optionManager);
  when(opContext.getStats()).thenReturn(operatorStats);

  ParquetWriter writerConf = mock(ParquetWriter.class);
  when(writerConf.getLocation()).thenReturn(targetPath.toUri().toString());
  OpProps props = mock(OpProps.class);
  when(writerConf.getProps()).thenReturn(props);
  when(writerConf.getProps().getUserName()).thenReturn("testuser");

  ParquetFormatPlugin formatPlugin = mock(ParquetFormatPlugin.class);
  FileSystemPlugin fsPlugin = mock(FileSystemPlugin.class);
  when(fsPlugin.createFS((String) notNull(), (OperatorContext) notNull())).thenReturn(HadoopFileSystem.getLocal(hadoopConf));
  when(writerConf.getFormatPlugin()).thenReturn(formatPlugin);
  when(formatPlugin.getFsPlugin()).thenReturn(fsPlugin);

  ParquetRecordWriter writer = new ParquetRecordWriter(opContext, writerConf, new ParquetFormatConfig());

  RecordWriter.OutputEntryListener outputEntryListener = mock(RecordWriter.OutputEntryListener.class);
  RecordWriter.WriteStatsListener writeStatsListener = mock(RecordWriter.WriteStatsListener.class);
  ArgumentCaptor<Long> recordWrittenCaptor = ArgumentCaptor.forClass(long.class);
  ArgumentCaptor<Long> fileSizeCaptor = ArgumentCaptor.forClass(long.class);
  ArgumentCaptor<String> pathCaptor = ArgumentCaptor.forClass(String.class);
  ArgumentCaptor<byte[]> metadataCaptor = ArgumentCaptor.forClass(byte[].class);
  ArgumentCaptor<Integer> partitionCaptor = ArgumentCaptor.forClass(Integer.class);
  ArgumentCaptor<byte[]> icebergMetadataCaptor = ArgumentCaptor.forClass(byte[].class);

  BigIntVector bigIntVector = new BigIntVector("key", ALLOCATOR);
  bigIntVector.allocateNew(2);
  bigIntVector.set(0, 52459253098448904L);
  bigIntVector.set(1, 1116675951L);

  VectorContainer container = new VectorContainer();
  container.add(bigIntVector);
  container.setRecordCount(2);
  container.buildSchema(BatchSchema.SelectionVectorMode.NONE);

  writer.setup(container, outputEntryListener, writeStatsListener);
  writer.startPartition(WritePartition.NONE);
  writer.writeBatch(0, container.getRecordCount());

  container.clear();
  writer.close();

  verify(outputEntryListener, times(1)).recordsWritten(recordWrittenCaptor.capture(),
    fileSizeCaptor.capture(), pathCaptor.capture(), metadataCaptor.capture(),
    partitionCaptor.capture(), icebergMetadataCaptor.capture());

  for (FileStatus file : newFs.listStatus(targetPath)) {
    if (file.getPath().toString().endsWith(".parquet")) { //complex243_json is in here for some reason?
      assertEquals(Long.valueOf(fileSizeCaptor.getValue()), Long.valueOf(file.getLen()));
      break;
    }
  }

  container.close();
  ALLOCATOR.close();
}
 
Example 12
Source File: TestParquetWriter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testOutOfMemory() throws Exception {
  final Path tmpSchemaPath = new Path(getDfsTestTmpSchemaLocation());
  final Path targetPath = new Path(tmpSchemaPath, "testOutOfMemory");

  final Configuration hadoopConf = new Configuration();
  final FileSystem newFs = targetPath.getFileSystem(hadoopConf);
  assertTrue(newFs.mkdirs(targetPath));

  final BufferAllocator ALLOCATOR = allocatorRule.newAllocator("test-parquet-writer", 0, 128);

  OptionManager optionManager = mock(OptionManager.class);
  when(optionManager.getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR)).thenReturn("none"); //compression shouldn't matter
  when(optionManager.getOption(ExecConstants.PARQUET_PAGE_SIZE_VALIDATOR)).thenReturn(256L);
  when(optionManager.getOption(ExecConstants.PARQUET_MAXIMUM_PARTITIONS_VALIDATOR)).thenReturn(1L);
  when(optionManager.getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR)).thenReturn(4096L);

  OperatorStats operatorStats = mock(OperatorStats.class);

  OperatorContext opContext = mock(OperatorContext.class);
  when(opContext.getFragmentHandle()).thenReturn(ExecProtos.FragmentHandle.newBuilder().setMajorFragmentId(2323).setMinorFragmentId(234235).build());
  when(opContext.getAllocator()).thenReturn(ALLOCATOR);
  when(opContext.getOptions()).thenReturn(optionManager);
  when(opContext.getStats()).thenReturn(operatorStats);

  ParquetWriter writerConf = mock(ParquetWriter.class);
  when(writerConf.getLocation()).thenReturn(targetPath.toUri().toString());
  OpProps props = mock(OpProps.class);
  when(writerConf.getProps()).thenReturn(props);
  when(writerConf.getProps().getUserName()).thenReturn("testuser");

  ParquetFormatPlugin formatPlugin = mock(ParquetFormatPlugin.class);
  FileSystemPlugin fsPlugin = mock(FileSystemPlugin.class);
  when(writerConf.getFormatPlugin()).thenReturn(formatPlugin);
  when(fsPlugin.createFS((String) notNull(), (OperatorContext) notNull())).thenReturn(HadoopFileSystem.getLocal(hadoopConf));
  when(formatPlugin.getFsPlugin()).thenReturn(fsPlugin);

  ParquetRecordWriter writer = new ParquetRecordWriter(opContext, writerConf, new ParquetFormatConfig());

  RecordWriter.OutputEntryListener outputEntryListener = mock(RecordWriter.OutputEntryListener.class);
  RecordWriter.WriteStatsListener writeStatsListener = mock(RecordWriter.WriteStatsListener.class);

  BigIntVector bigIntVector = new BigIntVector("key", ALLOCATOR);
  bigIntVector.allocateNew(2);
  bigIntVector.set(0, 52459253098448904L);
  bigIntVector.set(1, 1116675951L);

  VectorContainer container = new VectorContainer();
  container.add(bigIntVector);
  container.setRecordCount(2);
  container.buildSchema(BatchSchema.SelectionVectorMode.NONE);

  writer.setup(container, outputEntryListener, writeStatsListener);
  writer.startPartition(WritePartition.NONE);
  writer.writeBatch(0, container.getRecordCount());

  container.clear();
  try {
    writer.close();
  } catch (Exception e) {
    // ignore any exception in close(), but all the buffers should be released.
  }

  container.close();
  ALLOCATOR.close();
}
 
Example 13
Source File: TestDictionaryLookup.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testDictionaryLookup() throws Throwable {


  try (final VectorContainer dict1 = new VectorContainer(getTestAllocator());
       final VectorContainer dict2 = new VectorContainer(getTestAllocator());
       final VectorContainer dict3 = new VectorContainer(getTestAllocator())) {

    final Map<String, GlobalDictionaryFieldInfo> dictionaryFieldInfoMap = Maps.newHashMap();
    final Field field1 = new Field(SchemaPath.getSimplePath("c0").getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
    final BigIntVector longVector = dict1.addOrGet(field1);
    longVector.allocateNew();
    longVector.setSafe(0, 10L);
    longVector.setSafe(1, 20L);
    longVector.setSafe(2, 30L);
    longVector.setSafe(3, 40L);
    longVector.setSafe(4, 50L);
    longVector.setValueCount(5);
    dict1.setRecordCount(5);
    dict1.buildSchema(BatchSchema.SelectionVectorMode.NONE);


    dictionaryFieldInfoMap.put("c0", new GlobalDictionaryFieldInfo(0, "c0", null, field1.getType(), "local"));

    final Field field2 = new Field(SchemaPath.getSimplePath("c1").getAsUnescapedPath(), true, new ArrowType.Binary(), null);
    final VarBinaryVector binaryVector = dict2.addOrGet(field2);
    binaryVector.allocateNew();
    binaryVector.setSafe(0, "abc".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(1, "bcd".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(2, "cde".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(3, "def".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(4, "efg".getBytes(UTF8), 0, 3);
    binaryVector.setValueCount(5);
    dict2.setRecordCount(5);
    dict2.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    dictionaryFieldInfoMap.put("c1", new GlobalDictionaryFieldInfo(0, "c1", null, field2.getType(), "local"));

    final Field field3 = new Field(SchemaPath.getSimplePath("c2").getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
    final Float8Vector doubleVector = dict3.addOrGet(field3);
    doubleVector.allocateNew();
    doubleVector.setSafe(0, 100.1);
    doubleVector.setSafe(1, 200.2);
    doubleVector.setSafe(2, 300.3);
    doubleVector.setSafe(3, 400.4);
    doubleVector.setSafe(4, 500.5);
    doubleVector.setValueCount(5);
    dict3.setRecordCount(5);
    dict3.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    dictionaryFieldInfoMap.put("c2", new GlobalDictionaryFieldInfo(0, "c2", null, field3.getType(), "local"));

    OperatorCreatorRegistry registry = Mockito.mock(OperatorCreatorRegistry.class);
    Mockito.when(registry.getSingleInputOperator(Matchers.any(OperatorContext.class), Matchers.any(PhysicalOperator.class)))
      .thenAnswer(new Answer<SingleInputOperator>() {
        public SingleInputOperator answer(InvocationOnMock invocation) throws Exception {
          Object[] args = invocation.getArguments();
          DictionaryLookupOperator dictionaryLookupOperator = Mockito.spy(new DictionaryLookupOperator(
            (OperatorContext)args[0], (DictionaryLookupPOP)args[1]));

          Mockito.doReturn(dict1).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c0"));
          Mockito.doReturn(dict2).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c1"));
          Mockito.doReturn(dict3).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c2"));
          return dictionaryLookupOperator;
        }
      });

    BaseTestOperator.testContext.setRegistry(registry);

    DictionaryLookupPOP lookup = new DictionaryLookupPOP(null, PROPS, null, dictionaryFieldInfoMap);
    Table input = t(
      th("c0", "c1", "c2"),
      tr(0, 1, 2),
      tr(1, 2, 0),
      tr(2, 0, 1)
    );

    Table output = t(
      th("c0", "c1", "c2"),
      tr(10L, "bcd".getBytes(UTF8), 300.3),
      tr(20L, "cde".getBytes(UTF8), 100.1),
      tr(30L, "abc".getBytes(UTF8), 200.2)
    );

    validateSingle(lookup, DictionaryLookupOperator.class, input, output);
  }
}