org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method that creates {@link VectorizedRowBatch}. For each selected column an input vector is created in the
 * batch. For unselected columns the vector entry is going to be null. The order of input vectors in batch should
 * match the order the columns in ORC file.
 *
 * @param rowOI Used to find the ordinal of the selected column.
 * @return
 */
private VectorizedRowBatch createVectorizedRowBatch(StructObjectInspector rowOI, boolean isOriginal) {
  final List<? extends StructField> fieldRefs = rowOI.getAllStructFieldRefs();
  final List<ColumnVector> vectors = getVectors(rowOI);

  final VectorizedRowBatch result = new VectorizedRowBatch(fieldRefs.size());

  ColumnVector[] vectorArray =  vectors.toArray(new ColumnVector[0]);

  if (!isOriginal) {
    vectorArray = createTransactionalVectors(vectorArray);
  }

  result.cols = vectorArray;
  result.numCols = fieldRefs.size();
  result.reset();
  return result;
}
 
Example #2
Source File: OrcFlowFileWriter.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
    if (buildIndex) {
        // Batch the writes up to the rowIndexStride so that we can get the
        // right size indexes.
        int posn = 0;
        while (posn < batch.size) {
            int chunkSize = Math.min(batch.size - posn,
                    rowIndexStride - rowsInIndex);
            treeWriter.writeRootBatch(batch, posn, chunkSize);
            posn += chunkSize;
            rowsInIndex += chunkSize;
            rowsInStripe += chunkSize;
            if (rowsInIndex >= rowIndexStride) {
                createRowIndexEntry();
            }
        }
    } else {
        rowsInStripe += batch.size;
        treeWriter.writeRootBatch(batch, 0, batch.size);
    }
    memoryManager.addedRow();
}
 
Example #3
Source File: OrcFlowFileWriter.java    From nifi with Apache License 2.0 6 votes vote down vote up
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
    if (buildIndex) {
        // Batch the writes up to the rowIndexStride so that we can get the
        // right size indexes.
        int posn = 0;
        while (posn < batch.size) {
            int chunkSize = Math.min(batch.size - posn,
                    rowIndexStride - rowsInIndex);
            treeWriter.writeRootBatch(batch, posn, chunkSize);
            posn += chunkSize;
            rowsInIndex += chunkSize;
            rowsInStripe += chunkSize;
            if (rowsInIndex >= rowIndexStride) {
                createRowIndexEntry();
            }
        }
    } else {
        rowsInStripe += batch.size;
        treeWriter.writeRootBatch(batch, 0, batch.size);
    }
    memoryManager.addedRow();
}
 
Example #4
Source File: OrcBulkWriterTestUtil.java    From flink with Apache License 2.0 6 votes vote down vote up
private static List<Record> getResults(Reader reader) throws IOException {
	List<Record> results = new ArrayList<>();

	RecordReader recordReader = reader.rows();
	VectorizedRowBatch batch = reader.getSchema().createRowBatch();

	while (recordReader.nextBatch(batch)) {
		BytesColumnVector stringVector = (BytesColumnVector)  batch.cols[0];
		LongColumnVector intVector = (LongColumnVector) batch.cols[1];
		for (int r = 0; r < batch.size; r++) {
			String name = new String(stringVector.vector[r], stringVector.start[r], stringVector.length[r]);
			int age = (int) intVector.vector[r];

			results.add(new Record(name, age));
		}
		recordReader.close();
	}

	return results;
}
 
Example #5
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method that creates {@link VectorizedRowBatch}. For each selected column an input vector is created in the
 * batch. For unselected columns the vector entry is going to be null. The order of input vectors in batch should
 * match the order the columns in ORC file.
 *
 * @param rowOI Used to find the ordinal of the selected column.
 * @return
 */
private VectorizedRowBatch createVectorizedRowBatch(StructObjectInspector rowOI, boolean isOriginal) {
  final List<? extends StructField> fieldRefs = rowOI.getAllStructFieldRefs();
  final List<ColumnVector> vectors = getVectors(rowOI);

  final VectorizedRowBatch result = new VectorizedRowBatch(fieldRefs.size());

  ColumnVector[] vectorArray =  vectors.toArray(new ColumnVector[0]);

  if (!isOriginal) {
    vectorArray = createTransactionalVectors(vectorArray);
  }

  result.cols = vectorArray;
  result.numCols = fieldRefs.size();
  result.reset();
  return result;
}
 
Example #6
Source File: OrcShimV200.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public boolean nextBatch(RecordReader reader, VectorizedRowBatch rowBatch) throws IOException {
	try {
		if (hasNextMethod == null) {
			hasNextMethod = Class.forName("org.apache.hadoop.hive.ql.io.orc.RecordReader")
					.getMethod("hasNext");
			hasNextMethod.setAccessible(true);
		}
		if (nextBatchMethod == null) {
			nextBatchMethod = RecordReader.class.getMethod("nextBatch", VectorizedRowBatch.class);
			nextBatchMethod.setAccessible(true);
		}
		boolean hasNext = (boolean) hasNextMethod.invoke(reader);
		if (hasNext) {
			nextBatchMethod.invoke(reader, rowBatch);
			return true;
		} else {
			return false;
		}
	} catch (IllegalAccessException |
			InvocationTargetException |
			NoSuchMethodException |
			ClassNotFoundException e) {
		throw new IOException(e);
	}
}
 
Example #7
Source File: AvroToOrcRecordConverter.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public static void addAvroRecord(
    VectorizedRowBatch batch,
    GenericRecord record,
    TypeDescription orcSchema,
    int orcBatchSize,
    Writer writer
) throws IOException {

  for (int c = 0; c < batch.numCols; c++) {
    ColumnVector colVector = batch.cols[c];
    final String thisField = orcSchema.getFieldNames().get(c);
    final TypeDescription type = orcSchema.getChildren().get(c);

    Object fieldValue = record.get(thisField);
    Schema.Field avroField = record.getSchema().getField(thisField);
    addToVector(type, colVector, avroField.schema(), fieldValue, batch.size);
  }

  batch.size++;

  if (batch.size % orcBatchSize == 0 || batch.size == batch.getMaxSize()) {
    writer.addRowBatch(batch);
    batch.reset();
    batch.size = 0;
  }
}
 
Example #8
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector getPrimitiveColumnVector(PrimitiveObjectInspector poi) {
    switch (poi.getPrimitiveCategory()) {
    case BOOLEAN:
    case BYTE:
    case SHORT:
    case INT:
    case LONG:
    case DATE:
      return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case TIMESTAMP:
      return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case FLOAT:
    case DOUBLE:
      return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case BINARY:
    case STRING:
    case CHAR:
    case VARCHAR:
      return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case DECIMAL:
      DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo();
      return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
        tInfo.precision(), tInfo.scale()
      );
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", poi.getPrimitiveCategory())
        .build(logger);
    }
}
 
Example #9
Source File: HiveORCCopiers.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to create {@link ORCCopier}s based on given input, output vector types and projected column ordinals.
 *
 * @param projectedColOrdinals ordinals of the columns that we are interested in reading from the file.
 * @param output
 * @param input
 * @return
 */
public static ORCCopier[] createCopiers(final HiveColumnVectorData columnVectorData,
                                        final List<Integer> projectedColOrdinals,
                                        int[] ordinalIdsFromOrcFile,
                                        final ValueVector[] output,
                                        final VectorizedRowBatch input,
                                        boolean isOriginal,
                                        HiveOperatorContextOptions operatorContextOptions) {
  final int numColumns = output.length;
  final ORCCopier[] copiers = new ORCCopier[numColumns];
  final ColumnVector[] cols = isOriginal ? input.cols : ((StructColumnVector) input.cols[HiveORCVectorizedReader.TRANS_ROW_COLUMN_INDEX]).fields;
  for (int i = 0; i < numColumns; i++) {
    boolean copierCreated = false;
    if (i < projectedColOrdinals.size()) {
      int projectedColOrdinal = projectedColOrdinals.get(i);
      if (projectedColOrdinal < ordinalIdsFromOrcFile.length && projectedColOrdinal < cols.length) {
        int ordinalId = ordinalIdsFromOrcFile[ projectedColOrdinal ];
        copiers[i] = createCopier(columnVectorData, ordinalId, output[i], cols[projectedColOrdinal], operatorContextOptions);
        copierCreated = true;
      }
    }
    if (!copierCreated) {
      copiers[i] = new NoOpCopier(null, null);
    }
  }
  return copiers;
}
 
Example #10
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector[] createTransactionalVectors(ColumnVector[] dataVectors) {
  ColumnVector[] transVectors = new ColumnVector[6];

  transVectors[0] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[1] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[2] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[3] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[4] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);

  transVectors[5] = new StructColumnVector(dataVectors.length, dataVectors);

  return transVectors;
}
 
Example #11
Source File: OrcBatchReader.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Fills an ORC batch into an array of Row.
 *
 * @param rows The batch of rows need to be filled.
 * @param schema The schema of the ORC data.
 * @param batch The ORC data.
 * @param selectedFields The list of selected ORC fields.
 * @return The number of rows that were filled.
 */
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {

	int rowsToRead = Math.min((int) batch.count(), rows.length);

	List<TypeDescription> fieldTypes = schema.getChildren();
	// read each selected field
	for (int fieldIdx = 0; fieldIdx < selectedFields.length; fieldIdx++) {
		int orcIdx = selectedFields[fieldIdx];
		readField(rows, fieldIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], rowsToRead);
	}
	return rowsToRead;
}
 
Example #12
Source File: SqlInterpreterTest.java    From zeppelin with Apache License 2.0 5 votes vote down vote up
public File createORCFile(int[] values) throws IOException {
  File file = File.createTempFile("zeppelin-flink-input", ".orc");
  file.delete();
  Path path = new Path(file.getAbsolutePath());
  Configuration conf = new Configuration();
  conf.set("orc.compress", "snappy");
  TypeDescription schema = TypeDescription.fromString("struct<msg:int>");
  Writer writer = OrcFile.createWriter(path,
          OrcFile.writerOptions(conf)
                  .setSchema(schema));
  VectorizedRowBatch batch = schema.createRowBatch();
  LongColumnVector x = (LongColumnVector) batch.cols[0];
  for (int i = 0; i < values.length; ++i) {
    int row = batch.size++;
    x.vector[row] = values[i];
    // If the batch is full, write it out and start over.
    if (batch.size == batch.getMaxSize()) {
      writer.addRowBatch(batch);
      batch.reset();
    }
  }
  if (batch.size != 0) {
    writer.addRowBatch(batch);
    batch.reset();
  }
  writer.close();
  return file;
}
 
Example #13
Source File: OrcToSdcRecordConverter.java    From datacollector with Apache License 2.0 5 votes vote down vote up
private static void populateRecordFromRow(
    Record record,
    TypeDescription schema,
    VectorizedRowBatch batch,
    int rowNum
) {
  record.getHeader().setAttribute(ORC_SCHEMA_RECORD_ATTRIBUTE, schema.toString());
  record.set(Field.create(new LinkedHashMap<>()));
  for (int c = 0; c < batch.numCols; c++) {
    populateRecordFromRow(record, "/" + schema.getFieldNames().get(c), schema.getChildren().get(c), batch.cols[c], rowNum);
  }
}
 
Example #14
Source File: TestAvroToOrcRecordConverter.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void recordConversion() throws IOException {
  Path outputFilePath = new Path(createTempFile());

  Schema.Parser schemaParser = new Schema.Parser();
  Schema schema = schemaParser.parse(
      "{\"type\": \"record\", \"name\": \"MyRecord\", \"fields\": [{\"name\": \"first\", \"type\": \"int\"},{" +
          "\"name\": \"second\", \"type\": {\"type\": \"record\", \"name\": \"MySubRecord\", \"fields\":" +
          " [{\"name\": \"sub1\", \"type\": \"string\"}, {\"name\": \"sub2\", \"type\": \"int\"}] } }, {\"name\":" +
          " \"somedate\", \"type\": { \"type\" : \"int\", \"logicalType\": \"date\"} } ]}"
  );

  TypeDescription orcSchema = AvroToOrcSchemaConverter.getOrcSchema(schema);

  Writer orcWriter = AvroToOrcRecordConverter.createOrcWriter(
      new Properties(),
      new Configuration(),
      outputFilePath,
      orcSchema
  );

  GenericRecord avroRecord = new GenericData.Record(schema);
  avroRecord.put("first", 1);
  avroRecord.put("somedate", 17535);

  GenericData.Record subRecord = new GenericData.Record(schema.getField("second").schema());
  subRecord.put("sub1", new Utf8("value1"));
  subRecord.put("sub2", 42);

  avroRecord.put("second", subRecord);

  VectorizedRowBatch batch = orcSchema.createRowBatch();

  AvroToOrcRecordConverter.addAvroRecord(batch, avroRecord, orcSchema, 1000, orcWriter);
  orcWriter.addRowBatch(batch);
  batch.reset();
  orcWriter.close();

  // TODO: add code to read the ORC file and validate the contents
}
 
Example #15
Source File: OrcBatchReader.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Fills an ORC batch into an array of Row.
 *
 * @param rows The batch of rows need to be filled.
 * @param schema The schema of the ORC data.
 * @param batch The ORC data.
 * @param selectedFields The list of selected ORC fields.
 * @return The number of rows that were filled.
 */
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {

	int rowsToRead = Math.min((int) batch.count(), rows.length);

	List<TypeDescription> fieldTypes = schema.getChildren();
	// read each selected field
	for (int fieldIdx = 0; fieldIdx < selectedFields.length; fieldIdx++) {
		int orcIdx = selectedFields[fieldIdx];
		readField(rows, fieldIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], rowsToRead);
	}
	return rowsToRead;
}
 
Example #16
Source File: OrcShim.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create shim from hive version.
 */
static OrcShim<VectorizedRowBatch> createShim(String hiveVersion) {
	if (hiveVersion.startsWith("2.0")) {
		return new OrcShimV200();
	} else if (hiveVersion.startsWith("2.1")) {
		return new OrcShimV210();
	} else if (hiveVersion.startsWith("2.2") ||
			hiveVersion.startsWith("2.3") ||
			hiveVersion.startsWith("3.")) {
		return new OrcShimV230();
	} else {
		throw new UnsupportedOperationException(
				"Unsupported hive version for orc shim: " + hiveVersion);
	}
}
 
Example #17
Source File: RowDataVectorizer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void vectorize(RowData row, VectorizedRowBatch batch) {
	int rowId = batch.size++;
	for (int i = 0; i < row.getArity(); ++i) {
		setColumn(rowId, batch.cols[i], fieldTypes[i], row, i);
	}
}
 
Example #18
Source File: RecordVectorizer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void vectorize(Record element, VectorizedRowBatch batch) throws IOException {
	BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
	LongColumnVector intColVector = (LongColumnVector) batch.cols[1];

	int row = batch.size++;

	stringVector.setVal(row, element.getName().getBytes(StandardCharsets.UTF_8));
	intColVector.vector[row] = element.getAge();

	this.addUserMetadata(OrcBulkWriterTestUtil.USER_METADATA_KEY, OrcBulkWriterTestUtil.USER_METADATA_VALUE);
}
 
Example #19
Source File: JsonFieldFiller.java    From secor with Apache License 2.0 5 votes vote down vote up
public static void processRow(JSONWriter writer, VectorizedRowBatch batch,
        TypeDescription schema, int row) throws JSONException {
    if (schema.getCategory() == TypeDescription.Category.STRUCT) {
        List<TypeDescription> fieldTypes = schema.getChildren();
        List<String> fieldNames = schema.getFieldNames();
        writer.object();
        for (int c = 0; c < batch.cols.length; ++c) {
            writer.key(fieldNames.get(c));
            setValue(writer, batch.cols[c], fieldTypes.get(c), row);
        }
        writer.endObject();
    } else {
        setValue(writer, batch.cols[0], schema, row);
    }
}
 
Example #20
Source File: VectorColumnFiller.java    From secor with Apache License 2.0 5 votes vote down vote up
public static void fillRow(int rowIndex, JsonConverter[] converters,
        TypeDescription schema, VectorizedRowBatch batch, JsonObject data) {
    List<String> fieldNames = schema.getFieldNames();
    for (int c = 0; c < converters.length; ++c) {
        JsonElement field = data.get(fieldNames.get(c));
        if (field == null) {
            batch.cols[c].noNulls = false;
            batch.cols[c].isNull[rowIndex] = true;
        } else {
            converters[c].convert(field, batch.cols[c], rowIndex);
        }
    }
}
 
Example #21
Source File: OrcConverter.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
public RowMetaAndData convertFromOrc( VectorizedRowBatch batch, int currentBatchRow,
                                      List<? extends IOrcInputField> dialogInputFields,
                                      TypeDescription typeDescription,
                                      Map<String, Integer> schemaToOrcSubcripts,
                                      List<? extends IOrcInputField> orcInputFields ) {
  return convertFromOrc( new RowMetaAndData(), batch, currentBatchRow, dialogInputFields, typeDescription,
    schemaToOrcSubcripts, orcInputFields );
}
 
Example #22
Source File: OrcConverter.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
RowMetaAndData convertFromOrc( RowMetaAndData rowMetaAndData, VectorizedRowBatch batch, int currentBatchRow,
                               List<? extends IOrcInputField> dialogInputFields, TypeDescription typeDescription,
                               Map<String, Integer> schemaToOrcSubcripts,
                               List<? extends IOrcInputField> orcInputFields ) {

  int orcColumn;
  for ( IOrcInputField inputField : dialogInputFields ) {
    IOrcInputField orcField = getFormatField( inputField.getFormatFieldName(), orcInputFields );
    if ( inputField != null ) {
      ColumnVector columnVector = batch.cols[ schemaToOrcSubcripts.get( inputField.getPentahoFieldName() ) ];
      Object orcToPentahoValue =
        convertFromSourceToTargetDataType( columnVector, currentBatchRow, orcField.getPentahoType() );

      Object convertToSchemaValue = null;
      try {
        String dateFormatStr = inputField.getStringFormat();
        if ( ( dateFormatStr == null ) || ( dateFormatStr.trim().length() == 0 ) ) {
          dateFormatStr = ValueMetaBase.DEFAULT_DATE_FORMAT_MASK;
        }
        valueMetaConverter.setDatePattern( new SimpleDateFormat( dateFormatStr ) );
        convertToSchemaValue = valueMetaConverter
          .convertFromSourceToTargetDataType( orcField.getPentahoType(), inputField.getPentahoType(),
            orcToPentahoValue );
      } catch ( ValueMetaConversionException e ) {
        logger.error( e );
      }
      rowMetaAndData.addValue( inputField.getPentahoFieldName(), inputField.getPentahoType(), convertToSchemaValue );
      String stringFormat = inputField.getStringFormat();
      if ( ( stringFormat != null ) && ( stringFormat.trim().length() > 0 ) ) {
        rowMetaAndData.getValueMeta( rowMetaAndData.size() - 1 ).setConversionMask( stringFormat );
      }
    }
  }

  return rowMetaAndData;
}
 
Example #23
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector getStructColumnVector(StructObjectInspector soi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends StructField> members = soi.getAllStructFieldRefs();
  for (StructField structField: members) {
    vectors.add(getColumnVector(structField.getFieldObjectInspector()));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example #24
Source File: MDSHiveDirectVectorizedReader.java    From multiple-dimension-spread with Apache License 2.0 5 votes vote down vote up
@Override
public boolean next( final NullWritable key, final VectorizedRowBatch outputBatch ) throws IOException {
  outputBatch.reset();
  setting.setPartitionValues( outputBatch );

  if( indexSize <= currentIndex ){
    if( ! currentReader.hasNext() ){
      updateCounter( currentReader.getReadStats() );
      outputBatch.endOfFile = true;
      isEnd = true;
      return false;
    }
    while( ! setSpread() ){
      if( ! currentReader.hasNext() ){
        updateCounter( currentReader.getReadStats() );
        outputBatch.endOfFile = true;
        isEnd = true;
        return false;
      }
    }
  }
  int maxSize = outputBatch.getMaxSize();
  if( indexSize < currentIndex + maxSize ){
    maxSize = indexSize - currentIndex;
  }

  for( int colIndex : needColumnIds ){
    assignors[colIndex].setColumnVector( outputBatch.cols[colIndex] , currentIndexList , currentIndex , maxSize );
  }
  outputBatch.size = maxSize;

  currentIndex += maxSize;
  if( indexSize <= currentIndex && ! currentReader.hasNext() ){
    outputBatch.endOfFile = true;
  }

  return outputBatch.size > 0;
}
 
Example #25
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector getUnionColumnVector(UnionObjectInspector uoi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends ObjectInspector> members = uoi.getObjectInspectors();
  for (ObjectInspector unionField: members) {
    vectors.add(getColumnVector(unionField));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example #26
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector getStructColumnVector(StructObjectInspector soi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends StructField> members = soi.getAllStructFieldRefs();
  for (StructField structField: members) {
    vectors.add(getColumnVector(structField.getFieldObjectInspector()));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example #27
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector getPrimitiveColumnVector(PrimitiveObjectInspector poi) {
    switch (poi.getPrimitiveCategory()) {
    case BOOLEAN:
    case BYTE:
    case SHORT:
    case INT:
    case LONG:
    case DATE:
      return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case TIMESTAMP:
      return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case FLOAT:
    case DOUBLE:
      return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case BINARY:
    case STRING:
    case CHAR:
    case VARCHAR:
      return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case DECIMAL:
      DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo();
      return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
        tInfo.precision(), tInfo.scale()
      );
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", poi.getPrimitiveCategory())
        .build(logger);
    }
}
 
Example #28
Source File: HiveORCVectorizedResolver.java    From pxf with Apache License 2.0 5 votes vote down vote up
@Override
public List<List<OneField>> getFieldsForBatch(OneRow batch) {

    VectorizedRowBatch vectorizedBatch = (VectorizedRowBatch) batch.getData();

    /* Allocate empty result set */
    int columnsNumber = context.getColumns();
    resolvedBatch = new ArrayList<>(vectorizedBatch.size);

    /* Create empty template row */
    ArrayList<OneField> templateRow = new ArrayList<OneField>(columnsNumber);
    ArrayList<OneField> currentRow;
    for (int j = 0; j < context.getColumns(); j++) {
        templateRow.add(null);
    }
    /* Replicate template row*/
    for (int i = 0; i < vectorizedBatch.size; i++) {
        currentRow = new ArrayList<>(templateRow);
        resolvedBatch.add(currentRow);
    }

    /* process all columns*/
    List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
    for (int columnIndex = 0; columnIndex < vectorizedBatch.numCols; columnIndex++) {
        ObjectInspector oi = allStructFieldRefs.get(columnIndex).getFieldObjectInspector();
        if (oi.getCategory() == Category.PRIMITIVE) {
            resolvePrimitiveColumn(columnIndex, oi, vectorizedBatch);
        } else {
            throw new UnsupportedTypeException("Unable to resolve column index:" + columnIndex
                    + ". Only primitive types are supported.");
        }
    }

    return resolvedBatch;
}
 
Example #29
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private ColumnVector[] createTransactionalVectors(ColumnVector[] dataVectors) {
  ColumnVector[] transVectors = new ColumnVector[6];

  transVectors[0] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[1] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[2] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[3] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[4] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);

  transVectors[5] = new StructColumnVector(dataVectors.length, dataVectors);

  return transVectors;
}
 
Example #30
Source File: HiveORCCopiers.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to create {@link ORCCopier}s based on given input, output vector types and projected column ordinals.
 *
 * @param projectedColOrdinals ordinals of the columns that we are interested in reading from the file.
 * @param output
 * @param input
 * @return
 */
public static ORCCopier[] createCopiers(final HiveColumnVectorData columnVectorData,
                                        final List<Integer> projectedColOrdinals,
                                        int[] ordinalIdsFromOrcFile,
                                        final ValueVector[] output,
                                        final VectorizedRowBatch input,
                                        boolean isOriginal,
                                        HiveOperatorContextOptions operatorContextOptions) {
  final int numColumns = output.length;
  final ORCCopier[] copiers = new ORCCopier[numColumns];
  final ColumnVector[] cols = isOriginal ? input.cols : ((StructColumnVector) input.cols[HiveORCVectorizedReader.TRANS_ROW_COLUMN_INDEX]).fields;
  for (int i = 0; i < numColumns; i++) {
    boolean copierCreated = false;
    if (i < projectedColOrdinals.size()) {
      int projectedColOrdinal = projectedColOrdinals.get(i);
      if (projectedColOrdinal < ordinalIdsFromOrcFile.length && projectedColOrdinal < cols.length) {
        int ordinalId = ordinalIdsFromOrcFile[ projectedColOrdinal ];
        copiers[i] = createCopier(columnVectorData, ordinalId, output[i], cols[projectedColOrdinal], operatorContextOptions);
        copierCreated = true;
      }
    }
    if (!copierCreated) {
      copiers[i] = new NoOpCopier(null, null);
    }
  }
  return copiers;
}