Java Code Examples for org.apache.kylin.cube.CubeSegment#getCubeDesc()

The following examples show how to use org.apache.kylin.cube.CubeSegment#getCubeDesc() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeStatsReader.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
                                                              long sourceRowCount, boolean origin) {
    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    final List<Integer> rowkeyColumnSize = Lists.newArrayList();
    final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    final List<TblColRef> columnList = baseCuboid.getColumns();
    final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
    final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());

    for (int i = 0; i < columnList.size(); i++) {
        rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
    }

    Map<Long, Double> sizeMap = Maps.newHashMap();
    for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
        sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
                baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
    }

    if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
        optimizeSizeMap(sizeMap, cubeSegment);
    }

    return sizeMap;
}
 
Example 2
Source File: CubeStatsReader.java    From kylin with Apache License 2.0 6 votes vote down vote up
private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
                                                              long sourceRowCount, boolean origin) {
    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    final List<Integer> rowkeyColumnSize = Lists.newArrayList();
    final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    final List<TblColRef> columnList = baseCuboid.getColumns();
    final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
    final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());

    for (int i = 0; i < columnList.size(); i++) {
        rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
    }

    Map<Long, Double> sizeMap = Maps.newHashMap();
    for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
        sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
                baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
    }

    if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
        optimizeSizeMap(sizeMap, cubeSegment);
    }

    return sizeMap;
}
 
Example 3
Source File: CubingJobBuilder.java    From Kylin with Apache License 2.0 5 votes vote down vote up
Pair<AbstractExecutable, AbstractExecutable> addCubingSteps(CubeSegment seg, String cuboidRootPath, CubingJob result) {
    final int groupRowkeyColumnsCount = seg.getCubeDesc().getRowkey().getNCuboidBuildLevels();
    final int totalRowkeyColumnsCount = seg.getCubeDesc().getRowkey().getRowKeyColumns().length;

    final String jobId = result.getId();
    final CubeJoinedFlatTableDesc intermediateTableDesc = new CubeJoinedFlatTableDesc(seg.getCubeDesc(), seg);
    final String intermediateHiveTableName = getIntermediateHiveTableName(intermediateTableDesc, jobId);
    final String intermediateHiveTableLocation = getIntermediateHiveTableLocation(intermediateTableDesc, jobId);
    final String factDistinctColumnsPath = getFactDistinctColumnsPath(seg, jobId);
    final String[] cuboidOutputTempPath = getCuboidOutputPaths(cuboidRootPath, totalRowkeyColumnsCount, groupRowkeyColumnsCount);

    final AbstractExecutable intermediateHiveTableStep = createIntermediateHiveTableStep(intermediateTableDesc, jobId);
    result.addTask(intermediateHiveTableStep);

    result.addTask(createFactDistinctColumnsStep(seg, intermediateHiveTableName, jobId));

    result.addTask(createBuildDictionaryStep(seg, factDistinctColumnsPath));

    // base cuboid step
    final MapReduceExecutable baseCuboidStep = createBaseCuboidStep(seg, intermediateHiveTableLocation, cuboidOutputTempPath);
    result.addTask(baseCuboidStep);

    // n dim cuboid steps
    for (int i = 1; i <= groupRowkeyColumnsCount; i++) {
        int dimNum = totalRowkeyColumnsCount - i;
        result.addTask(createNDimensionCuboidStep(seg, cuboidOutputTempPath, dimNum, totalRowkeyColumnsCount));
    }

    return new Pair<AbstractExecutable, AbstractExecutable>(intermediateHiveTableStep, baseCuboidStep);
}
 
Example 4
Source File: RowKeySplitter.java    From kylin with Apache License 2.0 5 votes vote down vote up
public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}
 
Example 5
Source File: RowKeySplitter.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}
 
Example 6
Source File: KafkaInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public BaseBatchCubingInputSide(CubeSegment seg, IJoinedFlatTableDesc flatDesc) {
    this.conf = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
    this.config = seg.getConfig();
    this.flatDesc = flatDesc;
    this.hiveTableDatabase = config.getHiveDatabaseForIntermediateTable();
    this.seg = seg;
    this.cubeDesc = seg.getCubeDesc();
    this.cubeName = seg.getCubeInstance().getName();
}
 
Example 7
Source File: CuboidSchedulerUtil.java    From kylin with Apache License 2.0 5 votes vote down vote up
public static CuboidScheduler getCuboidScheduler(CubeSegment segment, Set<Long> cuboidSet) {
    try {
        Map<Long, Long> cuboidsWithRowCnt = CuboidStatsReaderUtil.readCuboidStatsFromSegment(cuboidSet, segment);
        Comparator<Long> comparator = cuboidsWithRowCnt == null ? Cuboid.cuboidSelectComparator
                : new TreeCuboidScheduler.CuboidCostComparator(cuboidsWithRowCnt);
        return new TreeCuboidScheduler(segment.getCubeDesc(), Lists.newArrayList(cuboidSet), comparator);
    } catch (IOException e) {
        throw new RuntimeException("Fail to cube stats for segment" + segment + " due to " + e);
    }
}
 
Example 8
Source File: RowKeyDecoder.java    From Kylin with Apache License 2.0 4 votes vote down vote up
public RowKeyDecoder(CubeSegment cubeSegment) {
    this.cubeDesc = cubeSegment.getCubeDesc();
    this.rowKeySplitter = new RowKeySplitter(cubeSegment, 65, 255);
    this.colIO = new RowKeyColumnIO(cubeSegment);
    this.values = new ArrayList<String>();
}
 
Example 9
Source File: CubeDimEncMap.java    From kylin with Apache License 2.0 4 votes vote down vote up
public CubeDimEncMap(CubeSegment seg) {
    this.cubeDesc = seg.getCubeDesc();
    this.seg = seg;
    this.dictionaryMap = null;
}
 
Example 10
Source File: RowKeyDecoder.java    From kylin with Apache License 2.0 4 votes vote down vote up
public RowKeyDecoder(CubeSegment cubeSegment) {
    this.cubeDesc = cubeSegment.getCubeDesc();
    this.rowKeySplitter = new RowKeySplitter(cubeSegment);
    this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap());
    this.values = new ArrayList<String>();
}
 
Example 11
Source File: CubeJoinedFlatTableDesc.java    From kylin with Apache License 2.0 4 votes vote down vote up
public CubeJoinedFlatTableDesc(CubeSegment cubeSegment, boolean includingDerived) {
    this(cubeSegment.getCubeDesc(), cubeSegment, includingDerived);
}
 
Example 12
Source File: CubeJoinedFlatTableDesc.java    From kylin with Apache License 2.0 4 votes vote down vote up
public CubeJoinedFlatTableDesc(CubeSegment cubeSegment) {
    this(cubeSegment.getCubeDesc(), cubeSegment, false);
}
 
Example 13
Source File: CreateHTableJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
public static byte[][] getRegionSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap,
        final KylinConfig kylinConfig, final CubeSegment cubeSegment, final Path hfileSplitsOutputFolder)
        throws IOException {

    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    float cut = cubeDesc.getConfig().getKylinHBaseRegionCut();

    logger.info("Cut for HBase region is {} GB", cut);

    double totalSizeInM = 0;
    for (Double cuboidSize : cubeSizeMap.values()) {
        totalSizeInM += cuboidSize;
    }

    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cubeSizeMap.keySet());
    Collections.sort(allCuboids);

    int nRegion = Math.round((float) (totalSizeInM / (cut * 1024L)));
    nRegion = Math.max(kylinConfig.getHBaseRegionCountMin(), nRegion);
    nRegion = Math.min(kylinConfig.getHBaseRegionCountMax(), nRegion);

    if (cubeSegment.isEnableSharding()) {
        //use prime nRegions to help random sharding
        int original = nRegion;
        if (nRegion == 0) {
            nRegion = 1;
        }

        if (nRegion > Short.MAX_VALUE) {
            logger.info("Too many regions! reduce to {}", Short.MAX_VALUE);
            nRegion = Short.MAX_VALUE;
        }

        if (nRegion != original) {
            logger.info("Region count is adjusted from {} to {} to help random sharding", original, nRegion);
        }
    }

    int mbPerRegion = (int) (totalSizeInM / nRegion);
    mbPerRegion = Math.max(1, mbPerRegion);

    logger.info("Total size {} M (estimated)", totalSizeInM);
    logger.info("Expecting {} regions.", nRegion);
    logger.info("Expecting {} MB per region.", mbPerRegion);

    if (cubeSegment.isEnableSharding()) {
        //each cuboid will be split into different number of shards
        HashMap<Long, Short> cuboidShards = Maps.newHashMap();

        //each shard/region may be split into multiple hfiles; array index: region ID, Map: key: cuboidID, value cuboid size in the region
        List<HashMap<Long, Double>> innerRegionSplits = Lists.newArrayList();
        for (int i = 0; i < nRegion; i++) {
            innerRegionSplits.add(new HashMap<Long, Double>());
        }

        double[] regionSizes = new double[nRegion];
        for (long cuboidId : allCuboids) {
            double estimatedSize = cubeSizeMap.get(cuboidId);
            double magic = 23;
            int shardNum = (int) (estimatedSize * magic / mbPerRegion + 1);
            if (shardNum < 1) {
                shardNum = 1;
            }

            if (shardNum > nRegion) {
                logger.debug(String.format(Locale.ROOT,
                        "Cuboid %d 's estimated size %.2f MB will generate %d regions, " + "reduce to %d", cuboidId,
                        estimatedSize, shardNum, nRegion));
                shardNum = nRegion;
            } else {
                logger.debug(
                        String.format(Locale.ROOT, "Cuboid %d 's estimated size %.2f MB will generate %d regions",
                                cuboidId, estimatedSize, shardNum));
            }

            cuboidShards.put(cuboidId, (short) shardNum);
            short startShard = ShardingHash.getShard(cuboidId, nRegion);
            for (short i = startShard; i < startShard + shardNum; ++i) {
                short j = (short) (i % nRegion);
                regionSizes[j] = regionSizes[j] + estimatedSize / shardNum;
                innerRegionSplits.get(j).put(cuboidId, estimatedSize / shardNum);
            }
        }

        for (int i = 0; i < nRegion; ++i) {
            logger.debug("Region {}'s estimated size is {} MB, accounting for {} percent", i, regionSizes[i],
                    100.0 * regionSizes[i] / totalSizeInM);
        }

        CuboidShardUtil.saveCuboidShards(cubeSegment, cuboidShards, nRegion);
        saveHFileSplits(innerRegionSplits, mbPerRegion, hfileSplitsOutputFolder, kylinConfig);
        return getSplitsByRegionCount(nRegion);
    } else {
        throw new IllegalStateException("Not supported");
    }
}
 
Example 14
Source File: MapReduceUtil.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * @param cuboidScheduler specified can provide more flexibility
 * */
public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,
        double totalMapInputMB, int level)
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    KylinConfig kylinConfig = cubeDesc.getConfig();

    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();
    logger.info("Having per reduce MB " + perReduceInputMB + ", reduce count ratio " + reduceCountRatio + ", level "
            + level);

    CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cuboidScheduler, kylinConfig);

    double parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst;

    if (level == -1) {
        //merge case
        double estimatedSize = cubeStatsReader.estimateCubeSize();
        adjustedCurrentLayerSizeEst = estimatedSize > totalMapInputMB ? totalMapInputMB : estimatedSize;
        logger.debug("estimated size {}, input size {}, adjustedCurrentLayerSizeEst: {}", estimatedSize,
                totalMapInputMB, adjustedCurrentLayerSizeEst);
    } else if (level == 0) {
        //base cuboid case TODO: the estimation could be very WRONG because it has no correction
        adjustedCurrentLayerSizeEst = cubeStatsReader.estimateLayerSize(0);
        logger.debug("adjustedCurrentLayerSizeEst: {}", adjustedCurrentLayerSizeEst);
    } else {
        parentLayerSizeEst = cubeStatsReader.estimateLayerSize(level - 1);
        currentLayerSizeEst = cubeStatsReader.estimateLayerSize(level);
        adjustedCurrentLayerSizeEst = totalMapInputMB / parentLayerSizeEst * currentLayerSizeEst;
        logger.debug(
                "totalMapInputMB: {}, parentLayerSizeEst: {}, currentLayerSizeEst: {}, adjustedCurrentLayerSizeEst: {}",
                totalMapInputMB, parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst);
    }

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(adjustedCurrentLayerSizeEst / perReduceInputMB * reduceCountRatio + 0.99);

    // adjust reducer number for cube which has DISTINCT_COUNT measures for better performance
    if (cubeDesc.hasMemoryHungryMeasures()) {
        logger.debug("Multiply reducer num by 4 to boost performance for memory hungry measures");
        numReduceTasks = numReduceTasks * 4;
    }

    // at least 1 reducer by default
    numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
    // no more than 500 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    return numReduceTasks;
}
 
Example 15
Source File: CubeDimEncMap.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public CubeDimEncMap(CubeSegment seg) {
    this.cubeDesc = seg.getCubeDesc();
    this.seg = seg;
    this.dictionaryMap = null;
}
 
Example 16
Source File: RowKeyDecoder.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public RowKeyDecoder(CubeSegment cubeSegment) {
    this.cubeDesc = cubeSegment.getCubeDesc();
    this.rowKeySplitter = new RowKeySplitter(cubeSegment);
    this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap());
    this.values = new ArrayList<String>();
}
 
Example 17
Source File: CubeJoinedFlatTableDesc.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public CubeJoinedFlatTableDesc(CubeSegment cubeSegment, boolean includingDerived) {
    this(cubeSegment.getCubeDesc(), cubeSegment, includingDerived);
}
 
Example 18
Source File: CubeJoinedFlatTableDesc.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public CubeJoinedFlatTableDesc(CubeSegment cubeSegment) {
    this(cubeSegment.getCubeDesc(), cubeSegment, false);
}
 
Example 19
Source File: CreateHTableJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public static byte[][] getRegionSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap,
        final KylinConfig kylinConfig, final CubeSegment cubeSegment, final Path hfileSplitsOutputFolder)
        throws IOException {

    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    float cut = cubeDesc.getConfig().getKylinHBaseRegionCut();

    logger.info("Cut for HBase region is {} GB", cut);

    double totalSizeInM = 0;
    for (Double cuboidSize : cubeSizeMap.values()) {
        totalSizeInM += cuboidSize;
    }

    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cubeSizeMap.keySet());
    Collections.sort(allCuboids);

    int nRegion = Math.round((float) (totalSizeInM / (cut * 1024L)));
    nRegion = Math.max(kylinConfig.getHBaseRegionCountMin(), nRegion);
    nRegion = Math.min(kylinConfig.getHBaseRegionCountMax(), nRegion);

    if (cubeSegment.isEnableSharding()) {
        //use prime nRegions to help random sharding
        int original = nRegion;
        if (nRegion == 0) {
            nRegion = 1;
        }

        if (nRegion > Short.MAX_VALUE) {
            logger.info("Too many regions! reduce to {}", Short.MAX_VALUE);
            nRegion = Short.MAX_VALUE;
        }

        if (nRegion != original) {
            logger.info("Region count is adjusted from {} to {} to help random sharding", original, nRegion);
        }
    }

    int mbPerRegion = (int) (totalSizeInM / nRegion);
    mbPerRegion = Math.max(1, mbPerRegion);

    logger.info("Total size {} M (estimated)", totalSizeInM);
    logger.info("Expecting {} regions.", nRegion);
    logger.info("Expecting {} MB per region.", mbPerRegion);

    if (cubeSegment.isEnableSharding()) {
        //each cuboid will be split into different number of shards
        HashMap<Long, Short> cuboidShards = Maps.newHashMap();

        //each shard/region may be split into multiple hfiles; array index: region ID, Map: key: cuboidID, value cuboid size in the region
        List<HashMap<Long, Double>> innerRegionSplits = Lists.newArrayList();
        for (int i = 0; i < nRegion; i++) {
            innerRegionSplits.add(new HashMap<Long, Double>());
        }

        double[] regionSizes = new double[nRegion];
        for (long cuboidId : allCuboids) {
            double estimatedSize = cubeSizeMap.get(cuboidId);
            double magic = 23;
            int shardNum = (int) (estimatedSize * magic / mbPerRegion + 1);
            if (shardNum < 1) {
                shardNum = 1;
            }

            if (shardNum > nRegion) {
                logger.debug(String.format(Locale.ROOT,
                        "Cuboid %d 's estimated size %.2f MB will generate %d regions, " + "reduce to %d", cuboidId,
                        estimatedSize, shardNum, nRegion));
                shardNum = nRegion;
            } else {
                logger.debug(
                        String.format(Locale.ROOT, "Cuboid %d 's estimated size %.2f MB will generate %d regions",
                                cuboidId, estimatedSize, shardNum));
            }

            cuboidShards.put(cuboidId, (short) shardNum);
            short startShard = ShardingHash.getShard(cuboidId, nRegion);
            for (short i = startShard; i < startShard + shardNum; ++i) {
                short j = (short) (i % nRegion);
                regionSizes[j] = regionSizes[j] + estimatedSize / shardNum;
                innerRegionSplits.get(j).put(cuboidId, estimatedSize / shardNum);
            }
        }

        for (int i = 0; i < nRegion; ++i) {
            logger.debug("Region {}'s estimated size is {} MB, accounting for {} percent", i, regionSizes[i],
                    100.0 * regionSizes[i] / totalSizeInM);
        }

        CuboidShardUtil.saveCuboidShards(cubeSegment, cuboidShards, nRegion);
        saveHFileSplits(innerRegionSplits, mbPerRegion, hfileSplitsOutputFolder, kylinConfig);
        return getSplitsByRegionCount(nRegion);
    } else {
        throw new IllegalStateException("Not supported");
    }
}
 
Example 20
Source File: MapReduceUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * @param cuboidScheduler specified can provide more flexibility
 * */
public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,
        double totalMapInputMB, int level)
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    KylinConfig kylinConfig = cubeDesc.getConfig();

    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();
    logger.info("Having per reduce MB " + perReduceInputMB + ", reduce count ratio " + reduceCountRatio + ", level "
            + level);

    CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cuboidScheduler, kylinConfig);

    double parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst;

    if (level == -1) {
        //merge case
        double estimatedSize = cubeStatsReader.estimateCubeSize();
        adjustedCurrentLayerSizeEst = estimatedSize > totalMapInputMB ? totalMapInputMB : estimatedSize;
        logger.debug("estimated size {}, input size {}, adjustedCurrentLayerSizeEst: {}", estimatedSize,
                totalMapInputMB, adjustedCurrentLayerSizeEst);
    } else if (level == 0) {
        //base cuboid case TODO: the estimation could be very WRONG because it has no correction
        adjustedCurrentLayerSizeEst = cubeStatsReader.estimateLayerSize(0);
        logger.debug("adjustedCurrentLayerSizeEst: {}", adjustedCurrentLayerSizeEst);
    } else {
        parentLayerSizeEst = cubeStatsReader.estimateLayerSize(level - 1);
        currentLayerSizeEst = cubeStatsReader.estimateLayerSize(level);
        adjustedCurrentLayerSizeEst = totalMapInputMB / parentLayerSizeEst * currentLayerSizeEst;
        logger.debug(
                "totalMapInputMB: {}, parentLayerSizeEst: {}, currentLayerSizeEst: {}, adjustedCurrentLayerSizeEst: {}",
                totalMapInputMB, parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst);
    }

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(adjustedCurrentLayerSizeEst / perReduceInputMB * reduceCountRatio + 0.99);

    // adjust reducer number for cube which has DISTINCT_COUNT measures for better performance
    if (cubeDesc.hasMemoryHungryMeasures()) {
        logger.debug("Multiply reducer num by 4 to boost performance for memory hungry measures");
        numReduceTasks = numReduceTasks * 4;
    }

    // at least 1 reducer by default
    numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
    // no more than 500 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    return numReduceTasks;
}