Java Code Examples for org.apache.spark.sql.vectorized.ColumnarBatch#setNumRows()

The following examples show how to use org.apache.spark.sql.vectorized.ColumnarBatch#setNumRows() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: iceberg   File: ColumnarBatchReader.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public final ColumnarBatch read(ColumnarBatch reuse, int numRowsToRead) {
  Preconditions.checkArgument(numRowsToRead > 0, "Invalid number of rows to read: %s", numRowsToRead);
  ColumnVector[] arrowColumnVectors = new ColumnVector[readers.length];

  if (reuse == null) {
    closeVectors();
  }

  for (int i = 0; i < readers.length; i += 1) {
    vectorHolders[i] = readers[i].read(vectorHolders[i], numRowsToRead);
    int numRowsInVector = vectorHolders[i].numValues();
    Preconditions.checkState(
        numRowsInVector == numRowsToRead,
        "Number of rows in the vector %s didn't match expected %s ", numRowsInVector,
        numRowsToRead);
    arrowColumnVectors[i] =
        IcebergArrowColumnVector.forHolder(vectorHolders[i], numRowsInVector);
  }
  ColumnarBatch batch = new ColumnarBatch(arrowColumnVectors);
  batch.setNumRows(numRowsToRead);
  return batch;
}
 
Example 2
@Override public ColumnarBatch get() {
  //Spark asks you to convert one column at a time so that different
  //column types can be handled differently.
  //NumOfCols << NumOfRows so this is negligible
  List<FieldVector> fieldVectors = wrapperWritable.getVectorSchemaRoot().getFieldVectors();
  if(columnVectors == null) {
    //Lazy create ColumnarBatch/ColumnVector[] instance
    columnVectors = new ColumnVector[fieldVectors.size()];
    columnarBatch = new ColumnarBatch(columnVectors);
  }
  Iterator<FieldVector> iterator = fieldVectors.iterator();
  int rowCount = -1;
  for (int i = 0; i < columnVectors.length; i++) {
    FieldVector fieldVector = iterator.next();
    columnVectors[i] = new ArrowColumnVector(fieldVector);
    if (rowCount == -1) {
      //All column vectors have same length so we can get rowCount from any column
      rowCount = fieldVector.getValueCount();
    }
  }
  columnarBatch.setNumRows(rowCount);
  return columnarBatch;
}
 
Example 3
private Iterator<InternalRow> toArrowRows(VectorSchemaRoot root, List<String> namesInOrder) {
    ColumnVector[] columns = namesInOrder.stream()
            .map(name -> root.getVector(name))
            .map(vector -> new ArrowSchemaConverter(vector))
            .collect(Collectors.toList()).toArray(new ColumnVector[0]);

    ColumnarBatch batch = new ColumnarBatch(columns);
    batch.setNumRows(root.getRowCount());
    return batch.rowIterator();
}
 
Example 4
@Override
public ColumnarBatch get() {
  start();
  ColumnarBatch batch = new ColumnarBatch(
    stream.getRoot().getFieldVectors()
      .stream()
      .map(FlightArrowColumnVector::new)
      .toArray(ColumnVector[]::new)
  );
  batch.setNumRows(stream.getRoot().getRowCount());
  return batch;
}
 
Example 5
Source Project: spark-llap   File: CountDataReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override public ColumnarBatch get() {
  int size = (numRows >= 1000) ? 1000 : (int) numRows;
  OnHeapColumnVector vector = new OnHeapColumnVector(size, DataTypes.LongType);
  for(int i = 0; i < size; i++) {
    vector.putLong(0, numRows);
  }
  numRows -= size;
  ColumnarBatch batch = new ColumnarBatch(new ColumnVector[] {vector});
  batch.setNumRows(size);
  return batch;
}