Java Code Examples for org.apache.kylin.common.KylinConfig#getJobCuboidSizeTopNRatio()

The following examples show how to use org.apache.kylin.common.KylinConfig#getJobCuboidSizeTopNRatio() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeStatsReader.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * Estimate the cuboid's size
 *
 * @return the cuboid size in M bytes
 */
private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount,
        long baseCuboidId, long baseCuboidCount, List<Integer> rowKeyColumnLength, long sourceRowCount) {

    int rowkeyLength = cubeSegment.getRowKeyPreambleSize();
    KylinConfig kylinConf = cubeSegment.getConfig();

    long mask = Long.highestOneBit(baseCuboidId);
    long parentCuboidIdActualLength = (long) Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & cuboidId) > 0) {
            rowkeyLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
        }
        mask = mask >> 1;
    }

    // add the measure length
    int normalSpace = rowkeyLength;
    int countDistinctSpace = 0;
    double percentileSpace = 0;
    int topNSpace = 0;
    for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
        if (rowCount == 0)
            break;
        DataType returnType = measureDesc.getFunction().getReturnDataType();
        if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_COUNT_DISTINCT)) {
            long estimateDistinctCount = sourceRowCount / rowCount;
            estimateDistinctCount = estimateDistinctCount == 0 ? 1L : estimateDistinctCount;
            countDistinctSpace += returnType.getStorageBytesEstimate(estimateDistinctCount);
        } else if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_PERCENTILE)) {
            percentileSpace += returnType.getStorageBytesEstimate(baseCuboidCount * 1.0 / rowCount);
        } else if (measureDesc.getFunction().getExpression().equals(TopNMeasureType.FUNC_TOP_N)) {
            long estimateTopNCount = sourceRowCount / rowCount;
            estimateTopNCount = estimateTopNCount == 0 ? 1L : estimateTopNCount;
            topNSpace += returnType.getStorageBytesEstimate(estimateTopNCount);
        } else {
            normalSpace += returnType.getStorageBytesEstimate();
        }
    }

    double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
    double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeCountDistinctRatio();
    double cuboidSizeTopNRatio = kylinConf.getJobCuboidSizeTopNRatio();

    double ret = (1.0 * normalSpace * rowCount * cuboidSizeRatio
            + 1.0 * countDistinctSpace * rowCount * cuboidSizeMemHungryRatio + 1.0 * percentileSpace * rowCount
            + 1.0 * topNSpace * rowCount * cuboidSizeTopNRatio) / (1024L * 1024L);
    return ret;
}
 
Example 2
Source File: CubeStatsReader.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * Estimate the cuboid's size
 *
 * @return the cuboid size in M bytes
 */
private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount,
        long baseCuboidId, long baseCuboidCount, List<Integer> rowKeyColumnLength, long sourceRowCount) {

    int rowkeyLength = cubeSegment.getRowKeyPreambleSize();
    KylinConfig kylinConf = cubeSegment.getConfig();

    long mask = Long.highestOneBit(baseCuboidId);
    long parentCuboidIdActualLength = (long) Long.SIZE - Long.numberOfLeadingZeros(baseCuboidId);
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & cuboidId) > 0) {
            rowkeyLength += rowKeyColumnLength.get(i); //colIO.getColumnLength(columnList.get(i));
        }
        mask = mask >> 1;
    }

    // add the measure length
    int normalSpace = rowkeyLength;
    int countDistinctSpace = 0;
    double percentileSpace = 0;
    int topNSpace = 0;
    for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
        if (rowCount == 0)
            break;
        DataType returnType = measureDesc.getFunction().getReturnDataType();
        if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_COUNT_DISTINCT)) {
            long estimateDistinctCount = sourceRowCount / rowCount;
            estimateDistinctCount = estimateDistinctCount == 0 ? 1L : estimateDistinctCount;
            countDistinctSpace += returnType.getStorageBytesEstimate(estimateDistinctCount);
        } else if (measureDesc.getFunction().getExpression().equals(FunctionDesc.FUNC_PERCENTILE)) {
            percentileSpace += returnType.getStorageBytesEstimate(baseCuboidCount * 1.0 / rowCount);
        } else if (measureDesc.getFunction().getExpression().equals(TopNMeasureType.FUNC_TOP_N)) {
            long estimateTopNCount = sourceRowCount / rowCount;
            estimateTopNCount = estimateTopNCount == 0 ? 1L : estimateTopNCount;
            topNSpace += returnType.getStorageBytesEstimate(estimateTopNCount);
        } else {
            normalSpace += returnType.getStorageBytesEstimate();
        }
    }

    double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
    double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeCountDistinctRatio();
    double cuboidSizeTopNRatio = kylinConf.getJobCuboidSizeTopNRatio();

    double ret = (1.0 * normalSpace * rowCount * cuboidSizeRatio
            + 1.0 * countDistinctSpace * rowCount * cuboidSizeMemHungryRatio + 1.0 * percentileSpace * rowCount
            + 1.0 * topNSpace * rowCount * cuboidSizeTopNRatio) / (1024L * 1024L);
    return ret;
}