org.apache.hadoop.hive.ql.util.JavaDataModel Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.util.JavaDataModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcFlowFileWriter.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
    long result = 0;
    long numVals = child.fileStatistics.getNumberOfValues();
    switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case FLOAT:
            return numVals * JavaDataModel.get().primitive1();
        case LONG:
        case DOUBLE:
            return numVals * JavaDataModel.get().primitive2();
        case STRING:
        case VARCHAR:
        case CHAR:
            // ORC strings are converted to java Strings. so use JavaDataModel to
            // compute the overall size of strings
            child = (StringTreeWriter) child;
            StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
            numVals = numVals == 0 ? 1 : numVals;
            int avgStringLen = (int) (scs.getSum() / numVals);
            return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
        case DECIMAL:
            return numVals * JavaDataModel.get().lengthOfDecimal();
        case DATE:
            return numVals * JavaDataModel.get().lengthOfDate();
        case BINARY:
            // get total length of binary blob
            BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
            return bcs.getSum();
        case TIMESTAMP:
            return numVals * JavaDataModel.get().lengthOfTimestamp();
        default:
            LOG.debug("Unknown primitive category.");
            break;
    }

    return result;
}
 
Example #2
Source File: OrcFlowFileWriter.java    From nifi with Apache License 2.0 5 votes vote down vote up
private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
    long result = 0;
    long numVals = child.fileStatistics.getNumberOfValues();
    switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case FLOAT:
            return numVals * JavaDataModel.get().primitive1();
        case LONG:
        case DOUBLE:
            return numVals * JavaDataModel.get().primitive2();
        case STRING:
        case VARCHAR:
        case CHAR:
            // ORC strings are converted to java Strings. so use JavaDataModel to
            // compute the overall size of strings
            child = (StringTreeWriter) child;
            StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
            numVals = numVals == 0 ? 1 : numVals;
            int avgStringLen = (int) (scs.getSum() / numVals);
            return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
        case DECIMAL:
            return numVals * JavaDataModel.get().lengthOfDecimal();
        case DATE:
            return numVals * JavaDataModel.get().lengthOfDate();
        case BINARY:
            // get total length of binary blob
            BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
            return bcs.getSum();
        case TIMESTAMP:
            return numVals * JavaDataModel.get().lengthOfTimestamp();
        default:
            LOG.debug("Unknown primitive category.");
            break;
    }

    return result;
}
 
Example #3
Source File: FMeasureUDAF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Override
public int estimate() {
    JavaDataModel model = JavaDataModel.get();
    return model.primitive2() * 4 + model.lengthFor(average);
}
 
Example #4
Source File: KPAPredictUDAF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Override
public int estimate() {
    return JavaDataModel.PRIMITIVES2;
}
 
Example #5
Source File: MinByUDAF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Override
public int estimate() {
    return JavaDataModel.PRIMITIVES2 * 2; // rough estimate
}
 
Example #6
Source File: MaxByUDAF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Override
public int estimate() {
    return JavaDataModel.PRIMITIVES2 * 2; // rough estimate
}
 
Example #7
Source File: MajorityVoteUDAF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Override
public int estimate() {
    int size = partial.size();
    return JavaDataModel.PRIMITIVES2 * size * 2; // rough estimate
}
 
Example #8
Source File: WriterImpl.java    From tajo with Apache License 2.0 4 votes vote down vote up
private long getRawDataSize(TreeWriter child,
                            TypeDescription schema) {
  long total = 0;
  long numVals = child.fileStatistics.getNumberOfValues();
  switch (schema.getCategory()) {
    case BOOLEAN:
    case BYTE:
    case SHORT:
    case INT:
    case FLOAT:
      return numVals * JavaDataModel.get().primitive1();
    case LONG:
    case DOUBLE:
      return numVals * JavaDataModel.get().primitive2();
    case STRING:
    case VARCHAR:
    case CHAR:
      // ORC strings are converted to java Strings. so use JavaDataModel to
      // compute the overall size of strings
      StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
      numVals = numVals == 0 ? 1 : numVals;
      int avgStringLen = (int) (scs.getSum() / numVals);
      return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
    case DECIMAL:
      return numVals * JavaDataModel.get().lengthOfDecimal();
    case DATE:
      return numVals * JavaDataModel.get().lengthOfDate();
    case BINARY:
      // get total length of binary blob
      BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
      return bcs.getSum();
    case TIMESTAMP:
      return numVals * JavaDataModel.get().lengthOfTimestamp();
    case LIST:
    case MAP:
    case UNION:
    case STRUCT: {
      TreeWriter[] childWriters = child.getChildrenWriters();
      List<TypeDescription> childTypes = schema.getChildren();
      for (int i=0; i < childWriters.length; ++i) {
        total += getRawDataSize(childWriters[i], childTypes.get(i));
      }
      break;
    }
    default:
      LOG.debug("Unknown object inspector category.");
      break;
  }
  return total;
}