Java Code Examples for org.apache.kylin.cube.CubeSegment#getCubeInstance()

The following examples show how to use org.apache.kylin.cube.CubeSegment#getCubeInstance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StatisticsDecisionUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
    CubeInstance cube = segment.getCubeInstance();
    if (!cube.getConfig().isCubePlannerEnabled())
        return false;

    if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
        logger.info("Has read pending segments and will not enable cube planner.");
        return false;
    }
    List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
    List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
    if (newSegments.size() <= 1 && //
            (readySegments.size() == 0 || //
                    (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
                            && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
        return true;
    } else {
        return false;
    }
}
 
Example 2
Source File: StatisticsDecisionUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
    if (isAbleToOptimizeCubingPlan(segment)) {
        logger.info("It's able to trigger cuboid planner algorithm.");
    } else {
        return;
    }

    Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
    if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
        return;
    }

    CubeInstance cube = segment.getCubeInstance();
    CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
    update.setCuboids(recommendCuboidsWithStats);
    CubeManager.getInstance(cube.getConfig()).updateCube(update);
}
 
Example 3
Source File: StatisticsDecisionUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
    CubeInstance cube = segment.getCubeInstance();
    if (!cube.getConfig().isCubePlannerEnabled())
        return false;

    if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
        logger.info("Has read pending segments and will not enable cube planner.");
        return false;
    }
    List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
    List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
    if (newSegments.size() <= 1 && //
            (readySegments.size() == 0 || //
                    (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
                            && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
        return true;
    } else {
        return false;
    }
}
 
Example 4
Source File: StatisticsDecisionUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
    if (isAbleToOptimizeCubingPlan(segment)) {
        logger.info("It's able to trigger cuboid planner algorithm.");
    } else {
        return;
    }

    Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
    if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
        return;
    }

    CubeInstance cube = segment.getCubeInstance();
    CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
    update.setCuboids(recommendCuboidsWithStats);
    CubeManager.getInstance(cube.getConfig()).updateCube(update);
}
 
Example 5
Source File: CubeStatsReader.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private static void optimizeSizeMap(Map<Long, Double> sizeMap, CubeSegment cubeSegment) {
    CubeInstance cubeInstance = cubeSegment.getCubeInstance();
    int totalLevels = cubeInstance.getCuboidScheduler().getBuildLevel();
    List<List<Long>> layeredCuboids = cubeInstance.getCuboidScheduler().getCuboidsByLayer();

    logger.info("cube size is {} before optimize", SumHelper.sumDouble(sizeMap.values()));

    List<Double> levelRating = getHistoricalRating(cubeSegment, cubeInstance, totalLevels);

    if (levelRating == null) {
        logger.info("Fail to optimize, use origin.");
        return;
    }

    for (int level = 0; level <= totalLevels; level++) {
        Double rate = levelRating.get(level);

        for (Long cuboidId : layeredCuboids.get(level)) {
            double oriValue = (sizeMap.get(cuboidId) == null ? 0.0 : sizeMap.get(cuboidId));
            sizeMap.put(cuboidId, oriValue * rate);
        }
    }

    logger.info("cube size is {} after optimize", SumHelper.sumDouble(sizeMap.values()));

    return;
}
 
Example 6
Source File: CubingJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
public List<Double> findEstimateRatio(CubeSegment seg, KylinConfig config) {
    CubeInstance cubeInstance = seg.getCubeInstance();
    CuboidScheduler cuboidScheduler = cubeInstance.getCuboidScheduler();
    List<List<Long>> layeredCuboids = cuboidScheduler.getCuboidsByLayer();
    int totalLevels = cuboidScheduler.getBuildLevel();

    List<Double> result = Lists.newArrayList();

    Map<Long, Double> estimatedSizeMap;

    String cuboidRootPath = getCuboidRootPath(seg, config);

    try {
        estimatedSizeMap = new CubeStatsReader(seg, config).getCuboidSizeMap(true);
    } catch (IOException e) {
        logger.warn("Cannot get segment {} estimated size map", seg.getName());

        return null;
    }

    for (int level = 0; level <= totalLevels; level++) {
        double levelEstimatedSize = 0;
        for (Long cuboidId : layeredCuboids.get(level)) {
            levelEstimatedSize += estimatedSizeMap.get(cuboidId) == null ? 0.0 : estimatedSizeMap.get(cuboidId);
        }

        double levelRealSize = getRealSizeByLevel(cuboidRootPath, level);

        if (levelEstimatedSize == 0.0 || levelRealSize == 0.0){
            result.add(level, -1.0);
        } else {
            result.add(level, levelRealSize / levelEstimatedSize);
        }
    }

    return result;
}
 
Example 7
Source File: CubingJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
private static CubingJob initCubingJob(CubeSegment seg, String jobType, String submitter, JobEngineConfig config) {
    KylinConfig kylinConfig = config.getConfig();
    CubeInstance cube = seg.getCubeInstance();
    List<ProjectInstance> projList = ProjectManager.getInstance(kylinConfig).findProjects(cube.getType(),
            cube.getName());
    if (projList == null || projList.size() == 0) {
        throw new RuntimeException("Cannot find the project containing the cube " + cube.getName() + "!!!");
    } else if (projList.size() >= 2) {
        String msg = "Find more than one project containing the cube " + cube.getName()
                + ". It does't meet the uniqueness requirement!!! ";
        if (!config.getConfig().allowCubeAppearInMultipleProjects()) {
            throw new RuntimeException(msg);
        } else {
            logger.warn(msg);
        }
    }

    CubingJob result = new CubingJob();
    SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
    format.setTimeZone(TimeZone.getTimeZone(config.getTimeZone()));
    result.setDeployEnvName(kylinConfig.getDeployEnv());
    result.setProjectName(projList.get(0).getName());
    result.setJobType(jobType);
    CubingExecutableUtil.setCubeName(seg.getCubeInstance().getName(), result.getParams());
    CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
    CubingExecutableUtil.setSegmentName(seg.getName(), result.getParams());
    result.setName(jobType + " CUBE - " + seg.getCubeInstance().getDisplayName() + " - " + seg.getName() + " - "
            + format.format(new Date(System.currentTimeMillis())));
    result.setSubmitter(submitter);
    result.setNotifyList(seg.getCubeInstance().getDescriptor().getNotifyList());
    return result;
}
 
Example 8
Source File: CuboidRecommenderUtil.java    From kylin with Apache License 2.0 5 votes vote down vote up
/** For future segment level recommend */
public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment, Map<Long, Long> hitFrequencyMap,
        Map<Long, Map<Long, Pair<Long, Long>>> rollingUpCountSourceMap, boolean ifForceRecommend)
        throws IOException {
    if (segment == null) {
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }
    CubeInstance cube = segment.getCubeInstance();
    long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
    if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
        logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
        return null;
    }

    String key = cube.getName() + "-" + segment.getName();
    CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
            cubeStatsReader.getCuboidSizeMap()).setHitFrequencyMap(hitFrequencyMap)
                    .setRollingUpCountSourceMap(rollingUpCountSourceMap).build();
    return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
            ifForceRecommend);
}
 
Example 9
Source File: CuboidRecommenderUtil.java    From kylin with Apache License 2.0 5 votes vote down vote up
/** Trigger cube planner phase one */
public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment) throws IOException {
    if (segment == null) {
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }
    CubeInstance cube = segment.getCubeInstance();
    long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
    if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
        logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
        return null;
    }

    Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids();

    String key = cube.getName();
    CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
            cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).setBPUSMinBenefitRatio(segment.getConfig().getCubePlannerBPUSMinBenefitRatio()).build();
    return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
            !mandatoryCuboids.isEmpty());
}
 
Example 10
Source File: CubeStatsReader.java    From kylin with Apache License 2.0 5 votes vote down vote up
private static void optimizeSizeMap(Map<Long, Double> sizeMap, CubeSegment cubeSegment) {
    CubeInstance cubeInstance = cubeSegment.getCubeInstance();
    int totalLevels = cubeInstance.getCuboidScheduler().getBuildLevel();
    List<List<Long>> layeredCuboids = cubeInstance.getCuboidScheduler().getCuboidsByLayer();

    logger.info("cube size is {} before optimize", SumHelper.sumDouble(sizeMap.values()));

    List<Double> levelRating = getHistoricalRating(cubeSegment, cubeInstance, totalLevels);

    if (levelRating == null) {
        logger.info("Fail to optimize, use origin.");
        return;
    }

    for (int level = 0; level <= totalLevels; level++) {
        Double rate = levelRating.get(level);

        for (Long cuboidId : layeredCuboids.get(level)) {
            double oriValue = (sizeMap.get(cuboidId) == null ? 0.0 : sizeMap.get(cuboidId));
            sizeMap.put(cuboidId, oriValue * rate);
        }
    }

    logger.info("cube size is {} after optimize", SumHelper.sumDouble(sizeMap.values()));

    return;
}
 
Example 11
Source File: FactDistinctColumnsJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void setupReducer(Path output, CubeSegment cubeSeg)
        throws IOException {
    FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
    int numberOfReducers = reducerMapping.getTotalReducerNum();
    logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
    if (numberOfReducers > 250) {
        throw new IllegalArgumentException(
                "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
                        + numberOfReducers
                        + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
    }

    job.setReducerClass(FactDistinctColumnsReducer.class);
    job.setPartitionerClass(FactDistinctColumnPartitioner.class);
    job.setNumReduceTasks(numberOfReducers);

    // make each reducer output to respective dir
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    // prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    deletePath(job.getConfiguration(), output);
}
 
Example 12
Source File: NSparkMergingJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/**
 * Merge the segments that are contained in the given mergedSegment
 *
 * @param mergedSegment, new segment that expect to merge, which should contains a couple of ready segments.
 */
public static NSparkMergingJob merge(CubeSegment mergedSegment, String submitter, JobTypeEnum jobType, String jobId) {
    CubeInstance cube = mergedSegment.getCubeInstance();

    NSparkMergingJob job = new NSparkMergingJob();
    SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
    format.setTimeZone(TimeZone.getTimeZone(cube.getConfig().getTimeZone()));

    StringBuilder builder = new StringBuilder();
    builder.append(jobType).append(" CUBE - ");
    builder.append(mergedSegment.getCubeInstance().getDisplayName()).append(" - ").append(mergedSegment.getName())
            .append(" - ");

    builder.append(format.format(new Date(System.currentTimeMillis())));
    job.setName(builder.toString());
    job.setId(jobId);
    job.setTargetSubject(mergedSegment.getModel().getUuid());
    job.setTargetSegments(Lists.newArrayList(String.valueOf(mergedSegment.getUuid())));
    job.setProject(mergedSegment.getProject());
    job.setJobType(jobType);
    job.setSubmitter(submitter);

    job.setParam(MetadataConstants.P_JOB_ID, jobId);
    job.setParam(MetadataConstants.P_PROJECT_NAME, cube.getProject());
    job.setParam(MetadataConstants.P_TARGET_MODEL, job.getTargetSubject());
    job.setParam(MetadataConstants.P_CUBE_ID, cube.getId());
    job.setParam(MetadataConstants.P_CUBE_NAME, cube.getName());
    job.setParam(MetadataConstants.P_SEGMENT_IDS, String.join(",", job.getTargetSegments()));
    job.setParam(CubingExecutableUtil.SEGMENT_ID, mergedSegment.getUuid());
    job.setParam(MetadataConstants.P_DATA_RANGE_START, mergedSegment.getSegRange().start.toString());
    job.setParam(MetadataConstants.P_DATA_RANGE_END, mergedSegment.getSegRange().end.toString());
    job.setParam(MetadataConstants.P_OUTPUT_META_URL, cube.getConfig().getMetadataUrl().toString());
    job.setParam(MetadataConstants.P_JOB_TYPE, String.valueOf(JobTypeEnum.INDEX_MERGE));

    JobStepFactory.addStep(job, JobStepType.RESOURCE_DETECT, cube);
    JobStepFactory.addStep(job, JobStepType.MERGING, cube);
    JobStepFactory.addStep(job, JobStepType.CLEAN_UP_AFTER_MERGE, cube);

    return job;
}
 
Example 13
Source File: CubingJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public List<Double> findEstimateRatio(CubeSegment seg, KylinConfig config) {
    CubeInstance cubeInstance = seg.getCubeInstance();
    CuboidScheduler cuboidScheduler = cubeInstance.getCuboidScheduler();
    List<List<Long>> layeredCuboids = cuboidScheduler.getCuboidsByLayer();
    int totalLevels = cuboidScheduler.getBuildLevel();

    List<Double> result = Lists.newArrayList();

    Map<Long, Double> estimatedSizeMap;

    String cuboidRootPath = getCuboidRootPath(seg, config);

    try {
        estimatedSizeMap = new CubeStatsReader(seg, config).getCuboidSizeMap(true);
    } catch (IOException e) {
        logger.warn("Cannot get segment {} estimated size map", seg.getName());

        return null;
    }

    for (int level = 0; level <= totalLevels; level++) {
        double levelEstimatedSize = 0;
        for (Long cuboidId : layeredCuboids.get(level)) {
            levelEstimatedSize += estimatedSizeMap.get(cuboidId) == null ? 0.0 : estimatedSizeMap.get(cuboidId);
        }

        double levelRealSize = getRealSizeByLevel(cuboidRootPath, level);

        if (levelEstimatedSize == 0.0 || levelRealSize == 0.0){
            result.add(level, -1.0);
        } else {
            result.add(level, levelRealSize / levelEstimatedSize);
        }
    }

    return result;
}
 
Example 14
Source File: CubingJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private static CubingJob initCubingJob(CubeSegment seg, String jobType, String submitter, JobEngineConfig config) {
    KylinConfig kylinConfig = config.getConfig();
    CubeInstance cube = seg.getCubeInstance();
    List<ProjectInstance> projList = ProjectManager.getInstance(kylinConfig).findProjects(cube.getType(),
            cube.getName());
    if (projList == null || projList.size() == 0) {
        throw new RuntimeException("Cannot find the project containing the cube " + cube.getName() + "!!!");
    } else if (projList.size() >= 2) {
        String msg = "Find more than one project containing the cube " + cube.getName()
                + ". It does't meet the uniqueness requirement!!! ";
        if (!config.getConfig().allowCubeAppearInMultipleProjects()) {
            throw new RuntimeException(msg);
        } else {
            logger.warn(msg);
        }
    }

    CubingJob result = new CubingJob();
    SimpleDateFormat format = new SimpleDateFormat("z yyyy-MM-dd HH:mm:ss", Locale.ROOT);
    format.setTimeZone(TimeZone.getTimeZone(config.getTimeZone()));
    result.setDeployEnvName(kylinConfig.getDeployEnv());
    result.setProjectName(projList.get(0).getName());
    result.setJobType(jobType);
    CubingExecutableUtil.setCubeName(seg.getCubeInstance().getName(), result.getParams());
    CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
    CubingExecutableUtil.setSegmentName(seg.getName(), result.getParams());
    result.setName(jobType + " CUBE - " + seg.getCubeInstance().getDisplayName() + " - " + seg.getName() + " - "
            + format.format(new Date(System.currentTimeMillis())));
    result.setSubmitter(submitter);
    result.setNotifyList(seg.getCubeInstance().getDescriptor().getNotifyList());
    return result;
}
 
Example 15
Source File: CuboidRecommenderUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/** For future segment level recommend */
public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment, Map<Long, Long> hitFrequencyMap,
        Map<Long, Map<Long, Pair<Long, Long>>> rollingUpCountSourceMap, boolean ifForceRecommend)
        throws IOException {
    if (segment == null) {
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }
    CubeInstance cube = segment.getCubeInstance();
    long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
    if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
        logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
        return null;
    }

    String key = cube.getName() + "-" + segment.getName();
    CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
            cubeStatsReader.getCuboidSizeMap()).setHitFrequencyMap(hitFrequencyMap)
                    .setRollingUpCountSourceMap(rollingUpCountSourceMap).build();
    return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
            ifForceRecommend);
}
 
Example 16
Source File: CuboidRecommenderUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/** Trigger cube planner phase one */
public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment) throws IOException {
    if (segment == null) {
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }
    CubeInstance cube = segment.getCubeInstance();
    long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
    if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
        logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
        return null;
    }

    Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids();

    String key = cube.getName();
    CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
            cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).setBPUSMinBenefitRatio(segment.getConfig().getCubePlannerBPUSMinBenefitRatio()).build();
    return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
            !mandatoryCuboids.isEmpty());
}
 
Example 17
Source File: FactDistinctColumnsJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void setupReducer(Path output, CubeSegment cubeSeg)
        throws IOException {
    FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
    int numberOfReducers = reducerMapping.getTotalReducerNum();
    logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
    if (numberOfReducers > 250) {
        throw new IllegalArgumentException(
                "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
                        + numberOfReducers
                        + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
    }

    job.setReducerClass(FactDistinctColumnsReducer.class);
    job.setPartitionerClass(FactDistinctColumnPartitioner.class);
    job.setNumReduceTasks(numberOfReducers);

    // make each reducer output to respective dir
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    // prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    deletePath(job.getConfiguration(), output);
}
 
Example 18
Source File: JobService.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public JobInstance submitRecoverSegmentOptimizeJob(CubeSegment segment, String submitter)
        throws IOException, JobException {
    CubeInstance cubeInstance = segment.getCubeInstance();

    checkCubeDescSignature(cubeInstance);

    String cubeName = cubeInstance.getName();
    List<JobInstance> jobInstanceList = searchJobsByCubeName(cubeName, null,
            Lists.newArrayList(JobStatusEnum.NEW, JobStatusEnum.PENDING, JobStatusEnum.ERROR),
            JobTimeFilterEnum.ALL, JobSearchMode.CHECKPOINT_ONLY);
    if (jobInstanceList.size() > 1) {
        throw new IllegalStateException("Exist more than one CheckpointExecutable for cube " + cubeName);
    } else if (jobInstanceList.size() == 0) {
        throw new IllegalStateException("There's no CheckpointExecutable for cube " + cubeName);
    }
    CheckpointExecutable checkpointExecutable = (CheckpointExecutable) getExecutableManager()
            .getJob(jobInstanceList.get(0).getId());

    AbstractExecutable toBeReplaced = null;
    for (AbstractExecutable taskForCheck : checkpointExecutable.getSubTasksForCheck()) {
        if (taskForCheck instanceof CubingJob) {
            CubingJob subCubingJob = (CubingJob) taskForCheck;
            String segmentName = CubingExecutableUtil.getSegmentName(subCubingJob.getParams());
            if (segmentName != null && segmentName.equals(segment.getName())) {
                String segmentID = CubingExecutableUtil.getSegmentId(subCubingJob.getParams());
                CubeSegment beingOptimizedSegment = cubeInstance.getSegmentById(segmentID);
                if (beingOptimizedSegment != null) { // beingOptimizedSegment exists & should not be recovered
                    throw new IllegalStateException("Segment " + beingOptimizedSegment.getName() + "-"
                            + beingOptimizedSegment.getUuid()
                            + " still exists. Please delete it or discard the related optimize job first!!!");
                }
                toBeReplaced = taskForCheck;
                break;
            }
        }
    }
    if (toBeReplaced == null) {
        throw new IllegalStateException("There's no CubingJob for segment " + segment.getName()
                + " in CheckpointExecutable " + checkpointExecutable.getName());
    }

    /** Add CubingJob for the related segment **/
    CubeSegment optimizeSegment = getCubeManager().appendSegment(cubeInstance, segment.getTSRange());

    DefaultChainedExecutable optimizeJob = EngineFactory.createBatchOptimizeJob(optimizeSegment, submitter);

    getExecutableManager().addJob(optimizeJob);

    JobInstance optimizeJobInstance = getSingleJobInstance(optimizeJob);

    /** Update the checkpoint job */
    checkpointExecutable.getSubTasksForCheck().set(checkpointExecutable.getSubTasksForCheck().indexOf(toBeReplaced),
            optimizeJob);

    getExecutableManager().updateCheckpointJob(checkpointExecutable.getId(),
            checkpointExecutable.getSubTasksForCheck());

    return optimizeJobInstance;
}
 
Example 19
Source File: CubeHTableUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public static void createHTable(CubeSegment cubeSegment, byte[][] splitKeys) throws IOException {
    String tableName = cubeSegment.getStorageLocationIdentifier();
    CubeInstance cubeInstance = cubeSegment.getCubeInstance();
    CubeDesc cubeDesc = cubeInstance.getDescriptor();
    KylinConfig kylinConfig = cubeDesc.getConfig();

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(cubeSegment.getStorageLocationIdentifier()));
    tableDesc.setValue(HTableDescriptor.SPLIT_POLICY, DisabledRegionSplitPolicy.class.getName());
    tableDesc.setValue(IRealizationConstants.HTableTag, kylinConfig.getMetadataUrlPrefix());
    tableDesc.setValue(IRealizationConstants.HTableCreationTime, String.valueOf(System.currentTimeMillis()));

    if (!StringUtils.isEmpty(kylinConfig.getKylinOwner())) {
        //HTableOwner is the team that provides kylin service
        tableDesc.setValue(IRealizationConstants.HTableOwner, kylinConfig.getKylinOwner());
    }

    String commitInfo = KylinVersion.getGitCommitInfo();
    if (!StringUtils.isEmpty(commitInfo)) {
        tableDesc.setValue(IRealizationConstants.HTableGitTag, commitInfo);
    }

    //HTableUser is the cube owner, which will be the "user"
    tableDesc.setValue(IRealizationConstants.HTableUser, cubeInstance.getOwner());

    tableDesc.setValue(IRealizationConstants.HTableSegmentTag, cubeSegment.toString());

    Configuration conf = HBaseConnection.getCurrentHBaseConfiguration();
    Connection conn = HBaseConnection.get(kylinConfig.getStorageUrl());
    Admin admin = conn.getAdmin();

    try {
        if (User.isHBaseSecurityEnabled(conf)) {
            // add coprocessor for bulk load
            tableDesc.addCoprocessor("org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint");
        }

        for (HBaseColumnFamilyDesc cfDesc : cubeDesc.getHbaseMapping().getColumnFamily()) {
            HColumnDescriptor cf = createColumnFamily(kylinConfig, cfDesc.getName(), cfDesc.isMemoryHungry());
            tableDesc.addFamily(cf);
        }

        if (admin.tableExists(TableName.valueOf(tableName))) {
            // admin.disableTable(tableName);
            // admin.deleteTable(tableName);
            throw new RuntimeException("HBase table " + tableName + " exists!");
        }

        DeployCoprocessorCLI.deployCoprocessor(tableDesc);

        admin.createTable(tableDesc, splitKeys);
        Preconditions.checkArgument(admin.isTableAvailable(TableName.valueOf(tableName)), "table " + tableName + " created, but is not available due to some reasons");
        logger.info("create hbase table " + tableName + " done.");
    } finally {
        IOUtils.closeQuietly(admin);
    }

}
 
Example 20
Source File: JobService.java    From kylin with Apache License 2.0 4 votes vote down vote up
public JobInstance submitRecoverSegmentOptimizeJob(CubeSegment segment, String submitter)
        throws IOException, JobException {
    CubeInstance cubeInstance = segment.getCubeInstance();

    checkCubeDescSignature(cubeInstance);

    String cubeName = cubeInstance.getName();
    List<JobInstance> jobInstanceList = searchJobsByCubeName(cubeName, null,
            Lists.newArrayList(JobStatusEnum.NEW, JobStatusEnum.PENDING, JobStatusEnum.ERROR),
            JobTimeFilterEnum.ALL, JobSearchMode.CHECKPOINT_ONLY);
    if (jobInstanceList.size() > 1) {
        throw new IllegalStateException("Exist more than one CheckpointExecutable for cube " + cubeName);
    } else if (jobInstanceList.size() == 0) {
        throw new IllegalStateException("There's no CheckpointExecutable for cube " + cubeName);
    }
    CheckpointExecutable checkpointExecutable = (CheckpointExecutable) getExecutableManager()
            .getJob(jobInstanceList.get(0).getId());

    AbstractExecutable toBeReplaced = null;
    for (AbstractExecutable taskForCheck : checkpointExecutable.getSubTasksForCheck()) {
        if (taskForCheck instanceof CubingJob) {
            CubingJob subCubingJob = (CubingJob) taskForCheck;
            String segmentName = CubingExecutableUtil.getSegmentName(subCubingJob.getParams());
            if (segmentName != null && segmentName.equals(segment.getName())) {
                String segmentID = CubingExecutableUtil.getSegmentId(subCubingJob.getParams());
                CubeSegment beingOptimizedSegment = cubeInstance.getSegmentById(segmentID);
                if (beingOptimizedSegment != null) { // beingOptimizedSegment exists & should not be recovered
                    throw new IllegalStateException("Segment " + beingOptimizedSegment.getName() + "-"
                            + beingOptimizedSegment.getUuid()
                            + " still exists. Please delete it or discard the related optimize job first!!!");
                }
                toBeReplaced = taskForCheck;
                break;
            }
        }
    }
    if (toBeReplaced == null) {
        throw new IllegalStateException("There's no CubingJob for segment " + segment.getName()
                + " in CheckpointExecutable " + checkpointExecutable.getName());
    }

    /** Add CubingJob for the related segment **/
    CubeSegment optimizeSegment = getCubeManager().appendSegment(cubeInstance, segment.getTSRange());

    DefaultChainedExecutable optimizeJob = EngineFactory.createBatchOptimizeJob(optimizeSegment, submitter);

    getExecutableManager().addJob(optimizeJob);

    JobInstance optimizeJobInstance = getSingleJobInstance(optimizeJob);

    /** Update the checkpoint job */
    checkpointExecutable.getSubTasksForCheck().set(checkpointExecutable.getSubTasksForCheck().indexOf(toBeReplaced),
            optimizeJob);

    getExecutableManager().updateCheckpointJob(checkpointExecutable.getId(),
            checkpointExecutable.getSubTasksForCheck());

    return optimizeJobInstance;
}