Java Code Examples for org.apache.hadoop.hive.ql.exec.vector.ColumnVector

The following examples show how to use org.apache.hadoop.hive.ql.exec.vector.ColumnVector. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 6 votes vote down vote up
private ColumnVector getColumnVector(ObjectInspector oi) {
  Category category = oi.getCategory();
  switch (category) {

    case PRIMITIVE:
      return getPrimitiveColumnVector((PrimitiveObjectInspector)oi);
    case LIST:
      return getListColumnVector((ListObjectInspector)oi);
    case STRUCT:
      return getStructColumnVector((StructObjectInspector)oi);
    case MAP:
      return getMapColumnVector((MapObjectInspector)oi);
    case UNION:
      return getUnionColumnVector((UnionObjectInspector)oi);
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", category)
        .build(logger);
  }
}
 
Example 2
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method that creates {@link VectorizedRowBatch}. For each selected column an input vector is created in the
 * batch. For unselected columns the vector entry is going to be null. The order of input vectors in batch should
 * match the order the columns in ORC file.
 *
 * @param rowOI Used to find the ordinal of the selected column.
 * @return
 */
private VectorizedRowBatch createVectorizedRowBatch(StructObjectInspector rowOI, boolean isOriginal) {
  final List<? extends StructField> fieldRefs = rowOI.getAllStructFieldRefs();
  final List<ColumnVector> vectors = getVectors(rowOI);

  final VectorizedRowBatch result = new VectorizedRowBatch(fieldRefs.size());

  ColumnVector[] vectorArray =  vectors.toArray(new ColumnVector[0]);

  if (!isOriginal) {
    vectorArray = createTransactionalVectors(vectorArray);
  }

  result.cols = vectorArray;
  result.numCols = fieldRefs.size();
  result.reset();
  return result;
}
 
Example 3
Source Project: dremio-oss   Source File: HiveORCCopiers.java    License: Apache License 2.0 6 votes vote down vote up
UnionCopier(HiveColumnVectorData columnVectorData,
            int ordinalId,
            UnionColumnVector inputVector,
            UnionVector outputVector,
            HiveOperatorContextOptions operatorContextOptions) {
  this.inputVector = inputVector;
  this.outputVector = outputVector;
  // The loop below assumes that the getChildrenFromFields() API returns
  // the list of children in the same order as was provided when building the UnionVector.
  List<FieldVector> childArrowFields = outputVector.getChildrenFromFields();
  int childPos = ordinalId + 1; // first field is immediately next to union vector itself
  for (int idx=0; idx<childArrowFields.size(); ++idx) {
    if (idx < inputVector.fields.length) {
      ColumnVector hiveFieldVector = inputVector.fields[idx];
      ValueVector arrowfieldVector = childArrowFields.get(idx);
      arrowFieldVectors.add(arrowfieldVector);
      ORCCopier childCopier = createCopier(columnVectorData, childPos, arrowfieldVector, hiveFieldVector, operatorContextOptions);
      fieldCopiers.add(childCopier);
      childPos += columnVectorData.getTotalVectorCount(childPos);
    } else {
      fieldCopiers.add(new NoOpCopier(null, null));
    }
  }
}
 
Example 4
Source Project: dremio-oss   Source File: HiveORCCopiers.java    License: Apache License 2.0 6 votes vote down vote up
StructCopier(HiveColumnVectorData columnVectorData,
             int ordinalId,
             StructColumnVector inputVector,
             StructVector outputVector, HiveOperatorContextOptions operatorContextOptions) {
  this.inputVector = inputVector;
  this.outputVector = outputVector;

  int fieldCount = inputVector.fields.length;
  int arrowIdx = 0;
  int childPos = ordinalId + 1; // first child is immediately next to struct vector itself

  for (int idx=0; idx<fieldCount; ++idx) {
    if (columnVectorData.isColumnVectorIncluded(childPos)) {
      ValueVector arrowElementVector = outputVector.getVectorById(arrowIdx);
      ColumnVector hiveElementVector = inputVector.fields[idx];
      ORCCopier childCopier = createCopier(columnVectorData, childPos,
        arrowElementVector, hiveElementVector, operatorContextOptions);
      fieldCopiers.add(childCopier);
      arrowIdx++;
    }
    else {
      fieldCopiers.add(new NoOpCopier(null, null));
    }
    childPos += columnVectorData.getTotalVectorCount(childPos);
  }
}
 
Example 5
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 6 votes vote down vote up
private ColumnVector getColumnVector(ObjectInspector oi) {
  Category category = oi.getCategory();
  switch (category) {

    case PRIMITIVE:
      return getPrimitiveColumnVector((PrimitiveObjectInspector)oi);
    case LIST:
      return getListColumnVector((ListObjectInspector)oi);
    case STRUCT:
      return getStructColumnVector((StructObjectInspector)oi);
    case MAP:
      return getMapColumnVector((MapObjectInspector)oi);
    case UNION:
      return getUnionColumnVector((UnionObjectInspector)oi);
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", category)
        .build(logger);
  }
}
 
Example 6
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method that creates {@link VectorizedRowBatch}. For each selected column an input vector is created in the
 * batch. For unselected columns the vector entry is going to be null. The order of input vectors in batch should
 * match the order the columns in ORC file.
 *
 * @param rowOI Used to find the ordinal of the selected column.
 * @return
 */
private VectorizedRowBatch createVectorizedRowBatch(StructObjectInspector rowOI, boolean isOriginal) {
  final List<? extends StructField> fieldRefs = rowOI.getAllStructFieldRefs();
  final List<ColumnVector> vectors = getVectors(rowOI);

  final VectorizedRowBatch result = new VectorizedRowBatch(fieldRefs.size());

  ColumnVector[] vectorArray =  vectors.toArray(new ColumnVector[0]);

  if (!isOriginal) {
    vectorArray = createTransactionalVectors(vectorArray);
  }

  result.cols = vectorArray;
  result.numCols = fieldRefs.size();
  result.reset();
  return result;
}
 
Example 7
Source Project: dremio-oss   Source File: HiveORCCopiers.java    License: Apache License 2.0 6 votes vote down vote up
UnionCopier(HiveColumnVectorData columnVectorData,
            int ordinalId,
            UnionColumnVector inputVector,
            UnionVector outputVector,
            HiveOperatorContextOptions operatorContextOptions) {
  this.inputVector = inputVector;
  this.outputVector = outputVector;
  // The loop below assumes that the getChildrenFromFields() API returns
  // the list of children in the same order as was provided when building the UnionVector.
  List<FieldVector> childArrowFields = outputVector.getChildrenFromFields();
  int childPos = ordinalId + 1; // first field is immediately next to union vector itself
  for (int idx=0; idx<childArrowFields.size(); ++idx) {
    if (idx < inputVector.fields.length) {
      ColumnVector hiveFieldVector = inputVector.fields[idx];
      ValueVector arrowfieldVector = childArrowFields.get(idx);
      arrowFieldVectors.add(arrowfieldVector);
      ORCCopier childCopier = createCopier(columnVectorData, childPos, arrowfieldVector, hiveFieldVector, operatorContextOptions);
      fieldCopiers.add(childCopier);
      childPos += columnVectorData.getTotalVectorCount(childPos);
    } else {
      fieldCopiers.add(new NoOpCopier(null, null));
    }
  }
}
 
Example 8
Source Project: dremio-oss   Source File: HiveORCCopiers.java    License: Apache License 2.0 6 votes vote down vote up
StructCopier(HiveColumnVectorData columnVectorData,
             int ordinalId,
             StructColumnVector inputVector,
             StructVector outputVector, HiveOperatorContextOptions operatorContextOptions) {
  this.inputVector = inputVector;
  this.outputVector = outputVector;

  int fieldCount = inputVector.fields.length;
  int arrowIdx = 0;
  int childPos = ordinalId + 1; // first child is immediately next to struct vector itself

  for (int idx=0; idx<fieldCount; ++idx) {
    if (columnVectorData.isColumnVectorIncluded(childPos)) {
      ValueVector arrowElementVector = outputVector.getVectorById(arrowIdx);
      ColumnVector hiveElementVector = inputVector.fields[idx];
      ORCCopier childCopier = createCopier(columnVectorData, childPos,
        arrowElementVector, hiveElementVector, operatorContextOptions);
      fieldCopiers.add(childCopier);
      arrowIdx++;
    }
    else {
      fieldCopiers.add(new NoOpCopier(null, null));
    }
    childPos += columnVectorData.getTotalVectorCount(childPos);
  }
}
 
Example 9
Source Project: datacollector   Source File: AvroToOrcRecordConverter.java    License: Apache License 2.0 6 votes vote down vote up
public static void addAvroRecord(
    VectorizedRowBatch batch,
    GenericRecord record,
    TypeDescription orcSchema,
    int orcBatchSize,
    Writer writer
) throws IOException {

  for (int c = 0; c < batch.numCols; c++) {
    ColumnVector colVector = batch.cols[c];
    final String thisField = orcSchema.getFieldNames().get(c);
    final TypeDescription type = orcSchema.getChildren().get(c);

    Object fieldValue = record.get(thisField);
    Schema.Field avroField = record.getSchema().getField(thisField);
    addToVector(type, colVector, avroField.schema(), fieldValue, batch.size);
  }

  batch.size++;

  if (batch.size % orcBatchSize == 0 || batch.size == batch.getMaxSize()) {
    writer.addRowBatch(batch);
    batch.reset();
    batch.size = 0;
  }
}
 
Example 10
Source Project: secor   Source File: VectorColumnFiller.java    License: Apache License 2.0 6 votes vote down vote up
public void convert(JsonElement value, ColumnVector vect, int row) {
    if (value == null || value.isJsonNull()) {
        vect.noNulls = false;
        vect.isNull[row] = true;
    } else {
        MapColumnVector vector = (MapColumnVector) vect;
        JsonObject obj = value.getAsJsonObject();
        vector.lengths[row] = obj.size();
        vector.offsets[row] = row > 0 ? vector.offsets[row - 1] + vector.lengths[row - 1] : 0;

        // Ensure enough space is available to store the keys and the values
        vector.keys.ensureSize((int) vector.offsets[row] + obj.size(), true);
        vector.values.ensureSize((int) vector.offsets[row] + obj.size(), true);

        int i = 0;
        for (String key : obj.keySet()) {
            childConverters[0].convert(new JsonPrimitive(key), vector.keys, (int) vector.offsets[row] + i);
            childConverters[1].convert(obj.get(key), vector.values, (int) vector.offsets[row] + i);
            i++;
        }
    }
}
 
Example 11
Source Project: secor   Source File: VectorColumnFiller.java    License: Apache License 2.0 6 votes vote down vote up
public void convert(JsonElement value, ColumnVector vect, int row) {
    if (value == null || value.isJsonNull()) {
        vect.noNulls = false;
        vect.isNull[row] = true;
    } else if (value.isJsonPrimitive()) {
        UnionColumnVector vector = (UnionColumnVector) vect;
        JsonPrimitive primitive = value.getAsJsonPrimitive();

        JsonType jsonType = getJsonType(primitive);
        ConverterInfo converterInfo = childConverters.get(jsonType);
        if (converterInfo == null) {
            String message = String.format("Unable to infer type for '%s'", primitive);
            throw new IllegalArgumentException(message);
        }

        int vectorIndex = converterInfo.getVectorIndex();
        JsonConverter converter = converterInfo.getConverter();
        vector.tags[row] = vectorIndex;
        converter.convert(value, vector.fields[vectorIndex], row);
    } else {
        // It would be great to support non-primitive types in union type.
        // Let's leave this for another PR in the future.
        throw new UnsupportedOperationException();
    }
}
 
Example 12
Source Project: secor   Source File: VectorColumnFiller.java    License: Apache License 2.0 6 votes vote down vote up
public void convert(JsonElement value, ColumnVector vect, int row) {
    if (value == null || value.isJsonNull()) {
        vect.noNulls = false;
        vect.isNull[row] = true;
    } else {
        ListColumnVector vector = (ListColumnVector) vect;
        JsonArray obj = value.getAsJsonArray();
        vector.lengths[row] = obj.size();
        vector.offsets[row] = vector.childCount;
        vector.childCount += vector.lengths[row];
        vector.child.ensureSize(vector.childCount, true);
        for (int c = 0; c < obj.size(); ++c) {
            childrenConverter.convert(obj.get(c), vector.child,
                    (int) vector.offsets[row] + c);
        }
    }
}
 
Example 13
@Override
public void setColumnVector( final ColumnVector vector , final IExpressionIndex indexList , final int start , final int length ) throws IOException{
  LongColumnVector columnVector = (LongColumnVector)vector;
  PrimitiveObject[] primitiveObjectArray = column.getPrimitiveObjectArray( indexList , start , length );
  for( int i = 0 ; i < length ; i++ ){
    if( primitiveObjectArray[i] == null ){
      VectorizedBatchUtil.setNullColIsNullValue( columnVector , i );
    }
    else{
      setter.set( primitiveObjectArray , columnVector , i );
    }
  }
}
 
Example 14
@Override
public void setColumnVector( final ColumnVector vector , final IExpressionIndex indexList , final int start , final int length ) throws IOException{
  DoubleColumnVector columnVector = (DoubleColumnVector)vector;
  PrimitiveObject[] primitiveObjectArray = column.getPrimitiveObjectArray( indexList , start , length );
  for( int i = 0 ; i < length ; i++ ){
    if( primitiveObjectArray[i] == null ){
      VectorizedBatchUtil.setNullColIsNullValue( columnVector , i );
    }
    else{
      setter.set( primitiveObjectArray , columnVector , i );
    }
  }
}
 
Example 15
@Override
public void setColumnVector( final ColumnVector vector , final IExpressionIndex indexList , final int start , final int length ) throws IOException{
  BytesColumnVector columnVector = (BytesColumnVector)vector;

  PrimitiveObject[] primitiveObjectArray = column.getPrimitiveObjectArray( indexList , start , length );
  for( int i = 0 ; i < length ; i++ ){
    if( primitiveObjectArray[i] == null ){
      VectorizedBatchUtil.setNullColIsNullValue( columnVector , i );
    }
    else{
      if( primitiveObjectArray[i] instanceof IBytesLink ){
        IBytesLink linkObj = (IBytesLink)primitiveObjectArray[i];
        columnVector.vector[i] = linkObj.getLinkBytes();
        columnVector.start[i] = linkObj.getStart();
        columnVector.length[i] = linkObj.getLength();
      }
      else{
        byte[] strBytes = primitiveObjectArray[i].getBytes();
        if( strBytes == null ){
          VectorizedBatchUtil.setNullColIsNullValue( columnVector , i );
        }
        else{
          columnVector.vector[i] = strBytes;
          columnVector.start[i] = 0;
          columnVector.length[i] = strBytes.length;
        }
      }
    }
  }
}
 
Example 16
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private List<ColumnVector> getVectors(StructObjectInspector rowOI) {
  return rowOI.getAllStructFieldRefs()
    .stream()
    .map((Function<StructField, ColumnVector>) structField -> {
      Category category = structField.getFieldObjectInspector().getCategory();
      if (!isSupportedType(category)) {
        throw UserException.unsupportedError()
          .message("Vectorized ORC reader is not supported for datatype: %s", category)
          .build(logger);
      }
      return getColumnVector(structField.getFieldObjectInspector());
    })
    .collect(Collectors.toList());

}
 
Example 17
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector getUnionColumnVector(UnionObjectInspector uoi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends ObjectInspector> members = uoi.getObjectInspectors();
  for (ObjectInspector unionField: members) {
    vectors.add(getColumnVector(unionField));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example 18
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector getStructColumnVector(StructObjectInspector soi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends StructField> members = soi.getAllStructFieldRefs();
  for (StructField structField: members) {
    vectors.add(getColumnVector(structField.getFieldObjectInspector()));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example 19
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector getPrimitiveColumnVector(PrimitiveObjectInspector poi) {
    switch (poi.getPrimitiveCategory()) {
    case BOOLEAN:
    case BYTE:
    case SHORT:
    case INT:
    case LONG:
    case DATE:
      return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case TIMESTAMP:
      return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case FLOAT:
    case DOUBLE:
      return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case BINARY:
    case STRING:
    case CHAR:
    case VARCHAR:
      return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case DECIMAL:
      DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo();
      return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
        tInfo.precision(), tInfo.scale()
      );
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", poi.getPrimitiveCategory())
        .build(logger);
    }
}
 
Example 20
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector[] createTransactionalVectors(ColumnVector[] dataVectors) {
  ColumnVector[] transVectors = new ColumnVector[6];

  transVectors[0] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[1] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[2] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[3] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[4] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);

  transVectors[5] = new StructColumnVector(dataVectors.length, dataVectors);

  return transVectors;
}
 
Example 21
Source Project: dremio-oss   Source File: HiveORCCopiers.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to create {@link ORCCopier}s based on given input, output vector types and projected column ordinals.
 *
 * @param projectedColOrdinals ordinals of the columns that we are interested in reading from the file.
 * @param output
 * @param input
 * @return
 */
public static ORCCopier[] createCopiers(final HiveColumnVectorData columnVectorData,
                                        final List<Integer> projectedColOrdinals,
                                        int[] ordinalIdsFromOrcFile,
                                        final ValueVector[] output,
                                        final VectorizedRowBatch input,
                                        boolean isOriginal,
                                        HiveOperatorContextOptions operatorContextOptions) {
  final int numColumns = output.length;
  final ORCCopier[] copiers = new ORCCopier[numColumns];
  final ColumnVector[] cols = isOriginal ? input.cols : ((StructColumnVector) input.cols[HiveORCVectorizedReader.TRANS_ROW_COLUMN_INDEX]).fields;
  for (int i = 0; i < numColumns; i++) {
    boolean copierCreated = false;
    if (i < projectedColOrdinals.size()) {
      int projectedColOrdinal = projectedColOrdinals.get(i);
      if (projectedColOrdinal < ordinalIdsFromOrcFile.length && projectedColOrdinal < cols.length) {
        int ordinalId = ordinalIdsFromOrcFile[ projectedColOrdinal ];
        copiers[i] = createCopier(columnVectorData, ordinalId, output[i], cols[projectedColOrdinal], operatorContextOptions);
        copierCreated = true;
      }
    }
    if (!copierCreated) {
      copiers[i] = new NoOpCopier(null, null);
    }
  }
  return copiers;
}
 
Example 22
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private List<ColumnVector> getVectors(StructObjectInspector rowOI) {
  return rowOI.getAllStructFieldRefs()
    .stream()
    .map((Function<StructField, ColumnVector>) structField -> {
      Category category = structField.getFieldObjectInspector().getCategory();
      if (!isSupportedType(category)) {
        throw UserException.unsupportedError()
          .message("Vectorized ORC reader is not supported for datatype: %s", category)
          .build(logger);
      }
      return getColumnVector(structField.getFieldObjectInspector());
    })
    .collect(Collectors.toList());

}
 
Example 23
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector getUnionColumnVector(UnionObjectInspector uoi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends ObjectInspector> members = uoi.getObjectInspectors();
  for (ObjectInspector unionField: members) {
    vectors.add(getColumnVector(unionField));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example 24
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector getStructColumnVector(StructObjectInspector soi) {
  ArrayList<ColumnVector> vectors = new ArrayList<>();
  List<? extends StructField> members = soi.getAllStructFieldRefs();
  for (StructField structField: members) {
    vectors.add(getColumnVector(structField.getFieldObjectInspector()));
  }
  ColumnVector[] columnVectors = vectors.toArray(new ColumnVector[0]);
  return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, columnVectors);
}
 
Example 25
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector getPrimitiveColumnVector(PrimitiveObjectInspector poi) {
    switch (poi.getPrimitiveCategory()) {
    case BOOLEAN:
    case BYTE:
    case SHORT:
    case INT:
    case LONG:
    case DATE:
      return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case TIMESTAMP:
      return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case FLOAT:
    case DOUBLE:
      return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case BINARY:
    case STRING:
    case CHAR:
    case VARCHAR:
      return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
    case DECIMAL:
      DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo();
      return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE,
        tInfo.precision(), tInfo.scale()
      );
    default:
      throw UserException.unsupportedError()
        .message("Vectorized ORC reader is not supported for datatype: %s", poi.getPrimitiveCategory())
        .build(logger);
    }
}
 
Example 26
Source Project: dremio-oss   Source File: HiveORCVectorizedReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnVector[] createTransactionalVectors(ColumnVector[] dataVectors) {
  ColumnVector[] transVectors = new ColumnVector[6];

  transVectors[0] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[1] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[2] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[3] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
  transVectors[4] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);

  transVectors[5] = new StructColumnVector(dataVectors.length, dataVectors);

  return transVectors;
}
 
Example 27
Source Project: dremio-oss   Source File: HiveORCCopiers.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to create {@link ORCCopier}s based on given input, output vector types and projected column ordinals.
 *
 * @param projectedColOrdinals ordinals of the columns that we are interested in reading from the file.
 * @param output
 * @param input
 * @return
 */
public static ORCCopier[] createCopiers(final HiveColumnVectorData columnVectorData,
                                        final List<Integer> projectedColOrdinals,
                                        int[] ordinalIdsFromOrcFile,
                                        final ValueVector[] output,
                                        final VectorizedRowBatch input,
                                        boolean isOriginal,
                                        HiveOperatorContextOptions operatorContextOptions) {
  final int numColumns = output.length;
  final ORCCopier[] copiers = new ORCCopier[numColumns];
  final ColumnVector[] cols = isOriginal ? input.cols : ((StructColumnVector) input.cols[HiveORCVectorizedReader.TRANS_ROW_COLUMN_INDEX]).fields;
  for (int i = 0; i < numColumns; i++) {
    boolean copierCreated = false;
    if (i < projectedColOrdinals.size()) {
      int projectedColOrdinal = projectedColOrdinals.get(i);
      if (projectedColOrdinal < ordinalIdsFromOrcFile.length && projectedColOrdinal < cols.length) {
        int ordinalId = ordinalIdsFromOrcFile[ projectedColOrdinal ];
        copiers[i] = createCopier(columnVectorData, ordinalId, output[i], cols[projectedColOrdinal], operatorContextOptions);
        copierCreated = true;
      }
    }
    if (!copierCreated) {
      copiers[i] = new NoOpCopier(null, null);
    }
  }
  return copiers;
}
 
Example 28
Source Project: flink   Source File: AbstractOrcColumnVector.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a orc vector from partition spec value.
 * See hive {@code VectorizedRowBatchCtx#addPartitionColsToBatch}.
 */
private static ColumnVector createHiveVectorFromConstant(
		LogicalType type, Object value, int batchSize) {
	switch (type.getTypeRoot()) {
		case CHAR:
		case VARCHAR:
		case BINARY:
		case VARBINARY:
			return createBytesVector(batchSize, value);
		case BOOLEAN:
			return createLongVector(batchSize, (Boolean) value ? 1 : 0);
		case TINYINT:
		case SMALLINT:
		case INTEGER:
		case BIGINT:
			return createLongVector(batchSize, value);
		case DECIMAL:
			DecimalType decimalType = (DecimalType) type;
			return createDecimalVector(
					batchSize, decimalType.getPrecision(), decimalType.getScale(), value);
		case FLOAT:
		case DOUBLE:
			return createDoubleVector(batchSize, value);
		case DATE:
			if (value instanceof LocalDate) {
				value = Date.valueOf((LocalDate) value);
			}
			return createLongVector(batchSize, dateToInternal((Date) value));
		case TIMESTAMP_WITHOUT_TIME_ZONE:
			return createTimestampVector(batchSize, value);
		default:
			throw new UnsupportedOperationException("Unsupported type: " + type);
	}
}
 
Example 29
Source Project: secor   Source File: JsonFieldFiller.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Writes a single row of union type as a JSON object.
 *
 * @throws JSONException
 */
private static void setUnion(JSONWriter writer, UnionColumnVector vector,
                             TypeDescription schema, int row) throws JSONException {
    int tag = vector.tags[row];
    List<TypeDescription> schemaChildren = schema.getChildren();
    ColumnVector columnVector = vector.fields[tag];
    setValue(writer, columnVector, schemaChildren.get(tag), row);
}
 
Example 30
Source Project: secor   Source File: VectorColumnFiller.java    License: Apache License 2.0 5 votes vote down vote up
public void convert(JsonElement value, ColumnVector vect, int row) {
    if (value == null || value.isJsonNull()) {
        vect.noNulls = false;
        vect.isNull[row] = true;
    } else {
        LongColumnVector vector = (LongColumnVector) vect;
        vector.vector[row] = value.getAsBoolean() ? 1 : 0;
    }
}