Java Code Examples for org.apache.kylin.common.KylinConfig#isAutoInmemToOptimize()

The following examples show how to use org.apache.kylin.common.KylinConfig#isAutoInmemToOptimize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: StatisticsDecisionUtil.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public static void decideCubingAlgorithm(CubingJob cubingJob, CubeSegment seg, double mapperOverlapRatio,
        int mapperNumber) throws IOException {
    KylinConfig kylinConf = seg.getConfig();
    String algPref = kylinConf.getCubeAlgorithm();
    CubingJob.AlgorithmEnum alg;
    if (mapperOverlapRatio == 0 && kylinConf.isAutoInmemToOptimize()) { // no source records
        alg = CubingJob.AlgorithmEnum.INMEM;
    } else if (CubingJob.AlgorithmEnum.INMEM.name().equalsIgnoreCase(algPref)) {
        alg = CubingJob.AlgorithmEnum.INMEM;
        if (seg.getCubeDesc().isStreamingCube() && CubingJob.CubingJobTypeEnum
                .getByName(cubingJob.getJobType()) == CubingJob.CubingJobTypeEnum.BUILD) {
            alg = CubingJob.AlgorithmEnum.LAYER;
        }
    } else if (CubingJob.AlgorithmEnum.LAYER.name().equalsIgnoreCase(algPref)) {
        alg = CubingJob.AlgorithmEnum.LAYER;
    } else {
        int memoryHungryMeasures = 0;
        for (MeasureDesc measure : seg.getCubeDesc().getMeasures()) {
            if (measure.getFunction().getMeasureType().isMemoryHungry()) {
                logger.info("This cube has memory-hungry measure " + measure.getFunction().getExpression());
                memoryHungryMeasures++;
            }
        }

        if (memoryHungryMeasures > 0) {
            alg = CubingJob.AlgorithmEnum.LAYER;
        } else if ("random".equalsIgnoreCase(algPref)) { // for testing
            alg = new Random().nextBoolean() ? CubingJob.AlgorithmEnum.INMEM : CubingJob.AlgorithmEnum.LAYER;
        } else { // the default
            int mapperNumLimit = kylinConf.getCubeAlgorithmAutoMapperLimit();
            double overlapThreshold = kylinConf.getCubeAlgorithmAutoThreshold();
            logger.info("mapperNumber for " + seg + " is " + mapperNumber + " and threshold is " + mapperNumLimit);
            logger.info("mapperOverlapRatio for " + seg + " is " + mapperOverlapRatio + " and threshold is "
                    + overlapThreshold);

            // in-mem cubing is good when
            // 1) the cluster has enough mapper slots to run in parallel
            // 2) the mapper overlap ratio is small, meaning the shuffle of in-mem MR has advantage
            alg = (mapperNumber <= mapperNumLimit && mapperOverlapRatio <= overlapThreshold)//
                    ? CubingJob.AlgorithmEnum.INMEM
                    : CubingJob.AlgorithmEnum.LAYER;
        }

    }
    logger.info("The cube algorithm for " + seg + " is " + alg);

    cubingJob.setAlgorithm(alg);
}

Example 2

Source File: StatisticsDecisionUtil.java From kylin with Apache License 2.0

4 votes

public static void decideCubingAlgorithm(CubingJob cubingJob, CubeSegment seg, double mapperOverlapRatio,
        int mapperNumber) throws IOException {
    KylinConfig kylinConf = seg.getConfig();
    String algPref = kylinConf.getCubeAlgorithm();
    CubingJob.AlgorithmEnum alg;
    if (mapperOverlapRatio == 0 && kylinConf.isAutoInmemToOptimize()) { // no source records
        alg = CubingJob.AlgorithmEnum.INMEM;
    } else if (CubingJob.AlgorithmEnum.INMEM.name().equalsIgnoreCase(algPref)) {
        alg = CubingJob.AlgorithmEnum.INMEM;
        if (seg.getCubeDesc().isStreamingCube() && CubingJob.CubingJobTypeEnum
                .getByName(cubingJob.getJobType()) == CubingJob.CubingJobTypeEnum.BUILD) {
            alg = CubingJob.AlgorithmEnum.LAYER;
        }
    } else if (CubingJob.AlgorithmEnum.LAYER.name().equalsIgnoreCase(algPref)) {
        alg = CubingJob.AlgorithmEnum.LAYER;
    } else {
        int memoryHungryMeasures = 0;
        for (MeasureDesc measure : seg.getCubeDesc().getMeasures()) {
            if (measure.getFunction().getMeasureType().isMemoryHungry()) {
                logger.info("This cube has memory-hungry measure " + measure.getFunction().getExpression());
                memoryHungryMeasures++;
            }
        }

        if (memoryHungryMeasures > 0) {
            alg = CubingJob.AlgorithmEnum.LAYER;
        } else if ("random".equalsIgnoreCase(algPref)) { // for testing
            alg = new Random().nextBoolean() ? CubingJob.AlgorithmEnum.INMEM : CubingJob.AlgorithmEnum.LAYER;
        } else { // the default
            int mapperNumLimit = kylinConf.getCubeAlgorithmAutoMapperLimit();
            double overlapThreshold = kylinConf.getCubeAlgorithmAutoThreshold();
            logger.info("mapperNumber for " + seg + " is " + mapperNumber + " and threshold is " + mapperNumLimit);
            logger.info("mapperOverlapRatio for " + seg + " is " + mapperOverlapRatio + " and threshold is "
                    + overlapThreshold);

            // in-mem cubing is good when
            // 1) the cluster has enough mapper slots to run in parallel
            // 2) the mapper overlap ratio is small, meaning the shuffle of in-mem MR has advantage
            alg = (mapperNumber <= mapperNumLimit && mapperOverlapRatio <= overlapThreshold)//
                    ? CubingJob.AlgorithmEnum.INMEM
                    : CubingJob.AlgorithmEnum.LAYER;
        }

    }
    logger.info("The cube algorithm for " + seg + " is " + alg);

    cubingJob.setAlgorithm(alg);
}