Java Code Examples for org.apache.arrow.vector.VarBinaryVector#setSafe()

The following examples show how to use org.apache.arrow.vector.VarBinaryVector#setSafe() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseNdvAccumulatorNoSpill.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Override
public void output(int batchIndex) {
  HllAccumHolder ah = accumulators[batchIndex];

  HllSketch[] sketches = ah.getAccums();

  int total_size = 0;
  for (int i = 0; i < sketches.length; ++i) {
    total_size += sketches[i].getCompactSerializationBytes();
  }

  ((VariableWidthVector) output).allocateNew(total_size, LBlockHashTableNoSpill.MAX_VALUES_PER_BATCH);
  VarBinaryVector outVec = (VarBinaryVector) output;

  for (int i = 0; i < sketches.length; ++i) {
    byte[] ba = sketches[i].toCompactByteArray();
    outVec.setSafe(i, ba, 0, ba.length);
  }
}
 
Example 2
Source File: BaseNdvUnionAccumulatorNoSpill.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Override
public void output(int batchIndex) {
  HllUnionAccumHolder ah = accumulators[batchIndex];

  Union[] sketches = ah.getAccums();

  int total_size = 0;
  for (int i = 0; i < sketches.length; ++i) {
    total_size += sketches[i].getCompactSerializationBytes();
  }

  ((VariableWidthVector) output).allocateNew(total_size, LBlockHashTableNoSpill.MAX_VALUES_PER_BATCH);
  VarBinaryVector outVec = (VarBinaryVector) output;

  for (int i = 0; i < sketches.length; ++i) {
    byte[] ba = sketches[i].toCompactByteArray();
    outVec.setSafe(i, ba, 0, ba.length);
  }
}
 
Example 3
Source File: TestVarBinaryPivot.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
static void populate(VarBinaryVector vector, byte[][] values){
  vector.allocateNew();
  Random r = new Random();
  for(int i =0; i < values.length; i++){
    byte[] val = values[i];
    if(val != null){
      vector.setSafe(i, val, 0, val.length);
    } else {
      // add noise. this confirms that after pivot, noise is gone.
      byte[] bytes = new byte[r.nextInt(15)];
      r.nextBytes(bytes);
      vector.setSafe(i, bytes, 0, bytes.length);
      vector.setNull(i);
    }
  }
  vector.setValueCount(values.length);
}
 
Example 4
Source File: VectorizedParquetDefinitionLevelReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static void setBinaryInVector(
    VarBinaryVector vector,
    int typeWidth,
    ValuesAsBytesReader valuesReader,
    int bufferIdx, NullabilityHolder nullabilityHolder) {
  ByteBuffer buffer = valuesReader.getBuffer(typeWidth);
  vector.setSafe(bufferIdx, buffer.array(), buffer.position() + buffer.arrayOffset(),
      buffer.limit() - buffer.position());
  nullabilityHolder.setNotNull(bufferIdx);
}
 
Example 5
Source File: GlobalDictionaryBuilder.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final VarBinaryVector binaryVector = input.addOrGet(field);
  binaryVector.allocateNew();
  final SortedSet<Binary> values = new TreeSet<>();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToBinary(i));
    }
  }
  if (existingDict != null) {
    final VarBinaryVector existingDictValues = existingDict.getValueAccessorById(VarBinaryVector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(Binary.fromConstantByteArray(existingDictValues.get(i)));
    }
  }
  final Iterator<Binary> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    final byte[] data = iter.next().getBytes();
    binaryVector.setSafe(recordCount++, data, 0, data.length);
  }
  binaryVector.setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
Example 6
Source File: TestDictionaryLookup.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testDictionaryLookup() throws Throwable {


  try (final VectorContainer dict1 = new VectorContainer(getTestAllocator());
       final VectorContainer dict2 = new VectorContainer(getTestAllocator());
       final VectorContainer dict3 = new VectorContainer(getTestAllocator())) {

    final Map<String, GlobalDictionaryFieldInfo> dictionaryFieldInfoMap = Maps.newHashMap();
    final Field field1 = new Field(SchemaPath.getSimplePath("c0").getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
    final BigIntVector longVector = dict1.addOrGet(field1);
    longVector.allocateNew();
    longVector.setSafe(0, 10L);
    longVector.setSafe(1, 20L);
    longVector.setSafe(2, 30L);
    longVector.setSafe(3, 40L);
    longVector.setSafe(4, 50L);
    longVector.setValueCount(5);
    dict1.setRecordCount(5);
    dict1.buildSchema(BatchSchema.SelectionVectorMode.NONE);


    dictionaryFieldInfoMap.put("c0", new GlobalDictionaryFieldInfo(0, "c0", null, field1.getType(), "local"));

    final Field field2 = new Field(SchemaPath.getSimplePath("c1").getAsUnescapedPath(), true, new ArrowType.Binary(), null);
    final VarBinaryVector binaryVector = dict2.addOrGet(field2);
    binaryVector.allocateNew();
    binaryVector.setSafe(0, "abc".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(1, "bcd".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(2, "cde".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(3, "def".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(4, "efg".getBytes(UTF8), 0, 3);
    binaryVector.setValueCount(5);
    dict2.setRecordCount(5);
    dict2.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    dictionaryFieldInfoMap.put("c1", new GlobalDictionaryFieldInfo(0, "c1", null, field2.getType(), "local"));

    final Field field3 = new Field(SchemaPath.getSimplePath("c2").getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
    final Float8Vector doubleVector = dict3.addOrGet(field3);
    doubleVector.allocateNew();
    doubleVector.setSafe(0, 100.1);
    doubleVector.setSafe(1, 200.2);
    doubleVector.setSafe(2, 300.3);
    doubleVector.setSafe(3, 400.4);
    doubleVector.setSafe(4, 500.5);
    doubleVector.setValueCount(5);
    dict3.setRecordCount(5);
    dict3.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    dictionaryFieldInfoMap.put("c2", new GlobalDictionaryFieldInfo(0, "c2", null, field3.getType(), "local"));

    OperatorCreatorRegistry registry = Mockito.mock(OperatorCreatorRegistry.class);
    Mockito.when(registry.getSingleInputOperator(Matchers.any(OperatorContext.class), Matchers.any(PhysicalOperator.class)))
      .thenAnswer(new Answer<SingleInputOperator>() {
        public SingleInputOperator answer(InvocationOnMock invocation) throws Exception {
          Object[] args = invocation.getArguments();
          DictionaryLookupOperator dictionaryLookupOperator = Mockito.spy(new DictionaryLookupOperator(
            (OperatorContext)args[0], (DictionaryLookupPOP)args[1]));

          Mockito.doReturn(dict1).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c0"));
          Mockito.doReturn(dict2).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c1"));
          Mockito.doReturn(dict3).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c2"));
          return dictionaryLookupOperator;
        }
      });

    BaseTestOperator.testContext.setRegistry(registry);

    DictionaryLookupPOP lookup = new DictionaryLookupPOP(null, PROPS, null, dictionaryFieldInfoMap);
    Table input = t(
      th("c0", "c1", "c2"),
      tr(0, 1, 2),
      tr(1, 2, 0),
      tr(2, 0, 1)
    );

    Table output = t(
      th("c0", "c1", "c2"),
      tr(10L, "bcd".getBytes(UTF8), 300.3),
      tr(20L, "cde".getBytes(UTF8), 100.1),
      tr(30L, "abc".getBytes(UTF8), 200.2)
    );

    validateSingle(lookup, DictionaryLookupOperator.class, input, output);
  }
}
 
Example 7
Source File: VarBinaryToBinaryConverterTest.java    From snowflake-jdbc with Apache License 2.0 4 votes vote down vote up
@Test
public void testConvertToString() throws SFException
{
  final int rowCount = 1000;
  List<byte[]> expectedValues = new ArrayList<>();
  Set<Integer> nullValIndex = new HashSet<>();
  for (int i = 0; i < rowCount; i++)
  {
    expectedValues.add(RandomStringUtils.random(20).getBytes());
  }

  Map<String, String> customFieldMeta = new HashMap<>();
  customFieldMeta.put("logicalType", "BINARY");

  FieldType fieldType = new FieldType(true,
                                      Types.MinorType.VARBINARY.getType(),
                                      null, customFieldMeta);

  VarBinaryVector vector = new VarBinaryVector("col_one", fieldType,
                                               allocator);
  for (int i = 0; i < rowCount; i++)
  {
    boolean isNull = random.nextBoolean();
    if (isNull)
    {
      vector.setNull(i);
      nullValIndex.add(i);
    }
    else
    {
      vector.setSafe(i, expectedValues.get(i));
    }
  }

  ArrowVectorConverter converter = new VarBinaryToBinaryConverter(
      vector, 0, this);

  for (int i = 0; i < rowCount; i++)
  {
    String stringVal = converter.toString(i);
    Object objectVal = converter.toObject(i);
    byte[] bytesVal = converter.toBytes(i);

    if (nullValIndex.contains(i))
    {
      assertThat(stringVal, is(nullValue()));
      assertThat(objectVal, is(nullValue()));
      assertThat(bytesVal, is(nullValue()));
      assertThat(false, is(converter.toBoolean(i)));
    }
    else
    {
      String base64Expected =
          Base64.getEncoder().encodeToString(expectedValues.get(i));
      assertThat(stringVal, is(base64Expected));
      assertThat(bytesVal, is(expectedValues.get(i)));
      assertThat(objectVal, is(expectedValues.get(i)));
      final int x = i;
      TestUtil.assertSFException(invalidConversionErrorCode,
                                 () -> converter.toBoolean(x));
    }
  }
  vector.clear();
}