Java Code Examples for org.apache.kylin.cube.CubeInstance#getSegment()

The following examples show how to use org.apache.kylin.cube.CubeInstance#getSegment() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeController.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
/**
 * Get SQL of a Cube segment
 *
 * @param cubeName    Cube Name
 * @param segmentName Segment Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = {
        "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {

    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment " + segmentName);
    }

    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example 2
Source File: CubeController.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
/**
 * Delete a cube segment
 *
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}", method = { RequestMethod.DELETE }, produces = {
        "application/json" })
@ResponseBody
public CubeInstance deleteSegment(@PathVariable String cubeName, @PathVariable String segmentName) {
    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment '" + segmentName + "'");
    }

    try {
        return cubeService.deleteSegment(cube, segmentName);
    } catch (Exception e) {
        logger.error(e.getLocalizedMessage(), e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
}
 
Example 3
Source File: CubeController.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * Delete a cube segment
 *
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}", method = { RequestMethod.DELETE }, produces = {
        "application/json" })
@ResponseBody
public CubeInstance deleteSegment(@PathVariable String cubeName, @PathVariable String segmentName) {
    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment '" + segmentName + "'");
    }

    try {
        return cubeService.deleteSegment(cube, segmentName);
    } catch (Exception e) {
        logger.error(e.getLocalizedMessage(), e);
        throw new InternalErrorException(e.getLocalizedMessage(), e);
    }
}
 
Example 4
Source File: CubeController.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * Get SQL of a Cube segment
 *
 * @param cubeName    Cube Name
 * @param segmentName Segment Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = {
        "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {

    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment " + segmentName);
    }

    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example 5
Source File: CubeService.java    From Kylin with Apache License 2.0 5 votes vote down vote up
public CubeInstance rebuildLookupSnapshot(String cubeName, String segmentName, String lookupTable) throws IOException {
    CubeManager cubeMgr = getCubeManager();
    CubeInstance cube = cubeMgr.getCube(cubeName);
    CubeSegment seg = cube.getSegment(segmentName, SegmentStatusEnum.READY);
    cubeMgr.buildSnapshotTable(seg, lookupTable);

    return cube;
}
 
Example 6
Source File: CubeController.java    From Kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Get hive SQL of the cube
 *
 * @param cubeName Cube Name
 * @return
 * @throws UnknownHostException
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = {RequestMethod.GET})
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.READY);
    CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, cubeSegment);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse repsonse = new GeneralResponse();
    repsonse.setProperty("sql", sql);

    return repsonse;
}
 
Example 7
Source File: CubeService.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public CubeInstance rebuildLookupSnapshot(CubeInstance cube, String segmentName, String lookupTable)
        throws IOException {
    aclEvaluate.checkProjectOperationPermission(cube);
    Message msg = MsgPicker.getMsg();
    TableDesc tableDesc = getTableManager().getTableDesc(lookupTable, cube.getProject());
    if (tableDesc.isView()) {
        throw new BadRequestException(
                String.format(Locale.ROOT, msg.getREBUILD_SNAPSHOT_OF_VIEW(), tableDesc.getName()));
    }
    CubeSegment seg = cube.getSegment(segmentName, SegmentStatusEnum.READY);
    getCubeManager().buildSnapshotTable(seg, lookupTable, null);

    return cube;
}
 
Example 8
Source File: CubeService.java    From kylin with Apache License 2.0 5 votes vote down vote up
public CubeInstance rebuildLookupSnapshot(CubeInstance cube, String segmentName, String lookupTable)
        throws IOException {
    aclEvaluate.checkProjectOperationPermission(cube);
    Message msg = MsgPicker.getMsg();
    TableDesc tableDesc = getTableManager().getTableDesc(lookupTable, cube.getProject());
    if (tableDesc.isView()) {
        throw new BadRequestException(
                String.format(Locale.ROOT, msg.getREBUILD_SNAPSHOT_OF_VIEW(), tableDesc.getName()));
    }
    CubeSegment seg = cube.getSegment(segmentName, SegmentStatusEnum.READY);
    getCubeManager().buildSnapshotTable(seg, lookupTable, null);

    return cube;
}
 
Example 9
Source File: BuildJobSubmitter.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * Check segment which in building state
 *
 * @return true if we need to resubmit a new build job, else false
 */
boolean checkSegmentBuildingJob(SegmentBuildState segmentState, String cubeName, CubeInstance cubeInstance) {
    String jobId = segmentState.getState().getJobId();
    logger.debug("There is segment in building, cube:{} segment:{} jobId:{}", cubeName,
            segmentState.getSegmentName(), jobId);
    long buildStartTime = segmentState.getState().getBuildStartTime();
    if (buildStartTime != 0 && jobId != null) {
        long buildDuration = System.currentTimeMillis() - buildStartTime;

        // Check build state after 15 minutes
        if (buildDuration < 15 * 60 * 1000) {
            return false;
        }
        CubingJob cubingJob = (CubingJob) coordinator.getExecutableManager().getJob(jobId);
        if (cubingJob == null) {
            // Cubing job is dropped manually, or metadata is broken.
            logger.warn("Looks like cubing job is dropped manually, it will be submitted a new one.");
            return true;
        }
        ExecutableState jobState = cubingJob.getStatus();

        // If job is already succeed and HBase segment in ready state, remove the build state
        if (ExecutableState.SUCCEED.equals(jobState)) {
            CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null);
            if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) {
                logger.info("Job:{} is already succeed, and segment:{} is ready, remove segment build state", jobId,
                        segmentState.getSegmentName());
                coordinator.getStreamMetadataStore().removeSegmentBuildState(cubeName,
                        segmentState.getSegmentName());
            }
            return false;
        }

        // If a job is in error state, just retry it
        if (ExecutableState.ERROR.equals(jobState)) {
            logger.info("Job:{} is error, resume the job.", jobId);
            coordinator.getExecutableManager().resumeJob(jobId);
            return false;
        }

        // If a job is discard, we will try to resumbit it later.
        if (ExecutableState.DISCARDED.equals(jobState)) {
            if (KylinConfig.getInstanceFromEnv().isAutoResubmitDiscardJob()) {
                logger.debug("Job:{} is discard, resubmit it later.", jobId);
                return true;
            } else {
                logger.debug("Job:{} is discard, please resubmit yourself.", jobId);
                return false;
            }
        } else {
            logger.info("Job:{} is in running, job state: {}.", jobId, jobState);
        }
    } else {
        logger.info("Unknown state {}", segmentState);
    }
    return false;
}
 
Example 10
Source File: DictionaryGeneratorCLI.java    From Kylin with Apache License 2.0 4 votes vote down vote up
public static void processSegment(KylinConfig config, String cubeName, String segmentName, String factColumnsPath) throws IOException {
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

    processSegment(config, segment, factColumnsPath);
}
 
Example 11
Source File: NDCuboidMapper.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected void setup(Context context) throws IOException {
    super.publishConfiguration(context.getConfiguration());


    cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
    segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
    cubeDesc = cube.getDescriptor();

    // initialize CubiodScheduler
    cuboidScheduler = new CuboidScheduler(cubeDesc);

    rowKeySplitter = new RowKeySplitter(cubeSegment, 65, 256);
}
 
Example 12
Source File: ColumnToRowJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);

        parseOptions(options, args);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(kylinConfig);
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        job.setMapperClass(ColumnToRowMapper.class);
        job.setInputFormatClass(ColumnarSplitDataInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(ColumnToRowReducer.class);
        job.setNumReduceTasks(calReducerNum(input));
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize());
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);

        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
        attachSegmentMetadataWithDict(segment, job.getConfiguration());
        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 13
Source File: MergeDictJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String jobName = getOptionValue(OPTION_JOB_NAME);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        job.setJobName(jobName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        logger.info("MergeDictReducer output path: {}", output);

        // Mapper
        job.setMapperClass(MergeDictMapper.class);
        job.setInputFormatClass(ColumnarSplitDictInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //Reducer
        job.setReducerClass(MergeDictReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        attachCubeMetadata(cube, job.getConfiguration());

        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        logger.error("job {} failed. ", job.getJobName(), e);
        throw e;
    }
}
 
Example 14
Source File: Coordinator.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private List<String> findSegmentsCanBuild(String cubeName) {
    List<String> result = Lists.newArrayList();
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    // in optimization
    if (isInOptimize(cubeInstance)) {
        return result;
    }
    int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments();
    CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment();
    long minSegmentStart = -1;
    if (latestHistoryReadySegment != null) {
        minSegmentStart = latestHistoryReadySegment.getTSRange().end.v;
    } else {
        // there is no ready segment, to make cube planner work, only 1 segment can build
        logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName);
        allowMaxBuildingSegments = 1;
    }

    CubeAssignment assignments = streamMetadataStore.getAssignmentsByCube(cubeName);
    Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs();
    List<SegmentBuildState> segmentStates = streamMetadataStore.getSegmentBuildStates(cubeName);
    Collections.sort(segmentStates);
    // TODO need to check whether it is in optimization
    int inBuildingSegments = cubeInstance.getBuildingSegments().size();
    int leftQuota = allowMaxBuildingSegments - inBuildingSegments;

    for (int i = 0; i < segmentStates.size(); i++) {
        SegmentBuildState segmentState = segmentStates.get(i);
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName());
        if (segmentRange.getFirst() < minSegmentStart) {
            logger.warn("the cube segment state is not clear correctly, cube:{} segment:{}, clear it", cubeName,
                    segmentState.getSegmentName());
            streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
            continue;
        }

        if (segmentState.isInBuilding()) {
            inBuildingSegments++;
            String jobId = segmentState.getState().getJobId();
            logger.info("there is segment in building, cube:{} segment:{} jobId:{}", cubeName,
                    segmentState.getSegmentName(), jobId);
            long buildStartTime = segmentState.getState().getBuildStartTime();
            if (buildStartTime != 0 && jobId != null) {
                long buildDuration = System.currentTimeMillis() - buildStartTime;
                if (buildDuration < 40 * 60 * 1000) { // if build time larger than 40 minutes, check the job status
                    continue;
                }
                CubingJob cubingJob = (CubingJob) getExecutableManager().getJob(jobId);
                ExecutableState jobState = cubingJob.getStatus();
                if (ExecutableState.SUCCEED.equals(jobState)) { // job is already succeed, remove the build state
                    CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null);
                    if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) {
                        logger.info(
                                "job:{} is already succeed, and segment:{} is ready, remove segment build state",
                                jobId, segmentState.getSegmentName());
                        streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
                    }
                    continue;
                } else if (ExecutableState.ERROR.equals(jobState)) {
                    logger.info("job:{} is error, resume the job", jobId);
                    getExecutableManager().resumeJob(jobId);
                    continue;
                } else if (ExecutableState.DISCARDED.equals(jobState)) {
                    // if the job has been discard manually, just think that the segment is not in building
                    logger.info("job:{} is discard, reset the job state in metaStore", jobId);
                    SegmentBuildState.BuildState state = new SegmentBuildState.BuildState();
                    state.setBuildStartTime(0);
                    state.setState(SegmentBuildState.BuildState.State.WAIT);
                    state.setJobId(cubingJob.getId());
                    streamMetadataStore.updateSegmentBuildState(cubeName, segmentState.getSegmentName(), state);
                    segmentState.setState(state);
                    logger.info("segment:{} is discard", segmentState.getSegmentName());
                    continue;
                } else {
                    logger.info("job:{} is in running, job state: {}", jobId, jobState);
                    continue;
                }
            }
        }
        if (leftQuota <= 0) {
            logger.info("No left quota to build segments for cube:{}", cubeName);
            return result;
        }
        if (!checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets)) {
            break;
        }
        result.add(segmentState.getSegmentName());
        leftQuota--;
    }
    return result;
}
 
Example 15
Source File: BuildJobSubmitter.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * <pre>
 * Trace the state of build job for the earliest(NOT ALL) segment for each streaming cube, and
 *  1. try to promote into Ready HBase Segment if job's state is succeed
 *  2. try to resume the build job if job's state is error
 * </pre>
 * 
 * @return all succeed building job
 */
@NonSideEffect
List<SegmentJobBuildInfo> traceEarliestSegmentBuildJob() {
    List<SegmentJobBuildInfo> successJobs = Lists.newArrayList();
    for (Map.Entry<String, ConcurrentSkipListSet<SegmentJobBuildInfo>> entry :
            segmentBuildJobCheckList.entrySet()) {
        ConcurrentSkipListSet<SegmentJobBuildInfo> buildInfos = entry.getValue();
        if (buildInfos.isEmpty()) {
            logger.trace("Skip {}", entry.getKey());
            continue;
        }

        // find the earliest segment build job and try to promote
        SegmentJobBuildInfo segmentBuildJob = buildInfos.first();
        logger.debug("Check the cube:{} segment:{} build status.", segmentBuildJob.cubeName,
                segmentBuildJob.segmentName);
        try {
            CubingJob cubingJob = (CubingJob) coordinator.getExecutableManager().getJob(segmentBuildJob.jobID);
            if (cubingJob == null) {
                logger.error("Cannot find metadata of current job.");
                continue;
            }
            ExecutableState jobState = cubingJob.getStatus();
            logger.debug("Current job state {}", jobState);
            if (ExecutableState.SUCCEED.equals(jobState)) {
                CubeManager cubeManager = coordinator.getCubeManager();
                CubeInstance cubeInstance = cubeManager.getCube(segmentBuildJob.cubeName).latestCopyForWrite();
                CubeSegment cubeSegment = cubeInstance.getSegment(segmentBuildJob.segmentName, null);
                logger.info("The cube:{} segment:{} is ready to be promoted.", segmentBuildJob.cubeName,
                        segmentBuildJob.segmentName);
                coordinator.getClusterManager().segmentBuildComplete(cubingJob, cubeInstance, cubeSegment,
                        segmentBuildJob);
                addToCheckList(cubeInstance.getName());
                successJobs.add(segmentBuildJob);
            } else if (ExecutableState.ERROR.equals(jobState)) {
                if (segmentBuildJob.retryCnt < 5) {
                    logger.info("Job:{} is error, resume the job.", segmentBuildJob);
                    coordinator.getExecutableManager().resumeJob(segmentBuildJob.jobID);
                    segmentBuildJob.retryCnt++;
                } else {
                    logger.warn("Job:{} is error, exceed max retry. Kylin admin could resume it or discard it"
                            + "(to let new building job be sumbitted) .", segmentBuildJob);
                }
            }
        } catch (StoreException storeEx) {
            logger.error("Error when check streaming segment job build state:" + segmentBuildJob, storeEx);
            throw storeEx;
        }
    }
    return successJobs;
}
 
Example 16
Source File: Coordinator.java    From kylin with Apache License 2.0 4 votes vote down vote up
private List<String> findSegmentsCanBuild(String cubeName) {
    List<String> result = Lists.newArrayList();
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    // in optimization
    if (isInOptimize(cubeInstance)) {
        return result;
    }
    int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments();
    CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment();
    long minSegmentStart = -1;
    if (latestHistoryReadySegment != null) {
        minSegmentStart = latestHistoryReadySegment.getTSRange().end.v;
    } else {
        // there is no ready segment, to make cube planner work, only 1 segment can build
        logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName);
        allowMaxBuildingSegments = 1;
    }

    CubeAssignment assignments = streamMetadataStore.getAssignmentsByCube(cubeName);
    Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs();
    List<SegmentBuildState> segmentStates = streamMetadataStore.getSegmentBuildStates(cubeName);
    Collections.sort(segmentStates);
    // TODO need to check whether it is in optimization
    int inBuildingSegments = cubeInstance.getBuildingSegments().size();
    int leftQuota = allowMaxBuildingSegments - inBuildingSegments;

    for (int i = 0; i < segmentStates.size(); i++) {
        SegmentBuildState segmentState = segmentStates.get(i);
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName());
        if (segmentRange.getFirst() < minSegmentStart) {
            logger.warn("the cube segment state is not clear correctly, cube:{} segment:{}, clear it", cubeName,
                    segmentState.getSegmentName());
            streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
            continue;
        }

        if (segmentState.isInBuilding()) {
            inBuildingSegments++;
            String jobId = segmentState.getState().getJobId();
            logger.info("there is segment in building, cube:{} segment:{} jobId:{}", cubeName,
                    segmentState.getSegmentName(), jobId);
            long buildStartTime = segmentState.getState().getBuildStartTime();
            if (buildStartTime != 0 && jobId != null) {
                long buildDuration = System.currentTimeMillis() - buildStartTime;
                if (buildDuration < 40 * 60 * 1000) { // if build time larger than 40 minutes, check the job status
                    continue;
                }
                CubingJob cubingJob = (CubingJob) getExecutableManager().getJob(jobId);
                ExecutableState jobState = cubingJob.getStatus();
                if (ExecutableState.SUCCEED.equals(jobState)) { // job is already succeed, remove the build state
                    CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null);
                    if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) {
                        logger.info(
                                "job:{} is already succeed, and segment:{} is ready, remove segment build state",
                                jobId, segmentState.getSegmentName());
                        streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
                    }
                    continue;
                } else if (ExecutableState.ERROR.equals(jobState)) {
                    logger.info("job:{} is error, resume the job", jobId);
                    getExecutableManager().resumeJob(jobId);
                    continue;
                } else if (ExecutableState.DISCARDED.equals(jobState)) {
                    // if the job has been discard manually, just think that the segment is not in building
                    logger.info("job:{} is discard, reset the job state in metaStore", jobId);
                    SegmentBuildState.BuildState state = new SegmentBuildState.BuildState();
                    state.setBuildStartTime(0);
                    state.setState(SegmentBuildState.BuildState.State.WAIT);
                    state.setJobId(cubingJob.getId());
                    streamMetadataStore.updateSegmentBuildState(cubeName, segmentState.getSegmentName(), state);
                    segmentState.setState(state);
                    logger.info("segment:{} is discard", segmentState.getSegmentName());
                    continue;
                } else {
                    logger.info("job:{} is in running, job state: {}", jobId, jobState);
                    continue;
                }
            }
        }
        if (leftQuota <= 0) {
            logger.info("No left quota to build segments for cube:{}", cubeName);
            return result;
        }
        if (!checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets)) {
            break;
        }
        result.add(segmentState.getSegmentName());
        leftQuota--;
    }
    return result;
}
 
Example 17
Source File: ColumnToRowJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);

        parseOptions(options, args);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(kylinConfig);
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        job.setMapperClass(ColumnToRowMapper.class);
        job.setInputFormatClass(ColumnarSplitDataInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(ColumnToRowReducer.class);
        job.setNumReduceTasks(calReducerNum(input));
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize());
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);

        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
        attachSegmentMetadataWithDict(segment, job.getConfiguration());
        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 18
Source File: MergeDictJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String jobName = getOptionValue(OPTION_JOB_NAME);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        job.setJobName(jobName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        logger.info("MergeDictReducer output path: {}", output);

        // Mapper
        job.setMapperClass(MergeDictMapper.class);
        job.setInputFormatClass(ColumnarSplitDictInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //Reducer
        job.setReducerClass(MergeDictReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        attachCubeMetadata(cube, job.getConfiguration());

        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        logger.error("job {} failed. ", job.getJobName(), e);
        throw e;
    }
}
 
Example 19
Source File: BuildJobSubmitter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * Check segment which in building state
 *
 * @return true if we need to resubmit a new build job, else false
 */
boolean checkSegmentBuildingJob(SegmentBuildState segmentState, String cubeName, CubeInstance cubeInstance) {
    String jobId = segmentState.getState().getJobId();
    logger.debug("There is segment in building, cube:{} segment:{} jobId:{}", cubeName,
            segmentState.getSegmentName(), jobId);
    long buildStartTime = segmentState.getState().getBuildStartTime();
    if (buildStartTime != 0 && jobId != null) {
        long buildDuration = System.currentTimeMillis() - buildStartTime;

        // Check build state after 15 minutes
        if (buildDuration < 15 * 60 * 1000) {
            return false;
        }
        CubingJob cubingJob = (CubingJob) coordinator.getExecutableManager().getJob(jobId);
        if (cubingJob == null) {
            // Cubing job is dropped manually, or metadata is broken.
            logger.warn("Looks like cubing job is dropped manually, it will be submitted a new one.");
            return true;
        }
        ExecutableState jobState = cubingJob.getStatus();

        // If job is already succeed and HBase segment in ready state, remove the build state
        if (ExecutableState.SUCCEED.equals(jobState)) {
            CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null);
            if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) {
                logger.info("Job:{} is already succeed, and segment:{} is ready, remove segment build state", jobId,
                        segmentState.getSegmentName());
                coordinator.getStreamMetadataStore().removeSegmentBuildState(cubeName,
                        segmentState.getSegmentName());
            }
            return false;
        }

        // If a job is in error state, just retry it
        if (ExecutableState.ERROR.equals(jobState)) {
            logger.info("Job:{} is error, resume the job.", jobId);
            coordinator.getExecutableManager().resumeJob(jobId);
            return false;
        }

        // If a job is discard, we will try to resumbit it later.
        if (ExecutableState.DISCARDED.equals(jobState)) {
            if (KylinConfig.getInstanceFromEnv().isAutoResubmitDiscardJob()) {
                logger.debug("Job:{} is discard, resubmit it later.", jobId);
                return true;
            } else {
                logger.debug("Job:{} is discard, please resubmit yourself.", jobId);
                return false;
            }
        } else {
            logger.info("Job:{} is in running, job state: {}.", jobId, jobState);
        }
    } else {
        logger.info("Unknown state {}", segmentState);
    }
    return false;
}
 
Example 20
Source File: BuildJobSubmitter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * <pre>
 * Trace the state of build job for the earliest(NOT ALL) segment for each streaming cube, and
 *  1. try to promote into Ready HBase Segment if job's state is succeed
 *  2. try to resume the build job if job's state is error
 * </pre>
 * 
 * @return all succeed building job
 */
@NonSideEffect
List<SegmentJobBuildInfo> traceEarliestSegmentBuildJob() {
    List<SegmentJobBuildInfo> successJobs = Lists.newArrayList();
    for (Map.Entry<String, ConcurrentSkipListSet<SegmentJobBuildInfo>> entry :
            segmentBuildJobCheckList.entrySet()) {
        ConcurrentSkipListSet<SegmentJobBuildInfo> buildInfos = entry.getValue();
        if (buildInfos.isEmpty()) {
            logger.trace("Skip {}", entry.getKey());
            continue;
        }

        // find the earliest segment build job and try to promote
        SegmentJobBuildInfo segmentBuildJob = buildInfos.first();
        logger.debug("Check the cube:{} segment:{} build status.", segmentBuildJob.cubeName,
                segmentBuildJob.segmentName);
        try {
            CubingJob cubingJob = (CubingJob) coordinator.getExecutableManager().getJob(segmentBuildJob.jobID);
            if (cubingJob == null) {
                logger.error("Cannot find metadata of current job.");
                continue;
            }
            ExecutableState jobState = cubingJob.getStatus();
            logger.debug("Current job state {}", jobState);
            if (ExecutableState.SUCCEED.equals(jobState)) {
                CubeManager cubeManager = coordinator.getCubeManager();
                CubeInstance cubeInstance = cubeManager.getCube(segmentBuildJob.cubeName).latestCopyForWrite();
                CubeSegment cubeSegment = cubeInstance.getSegment(segmentBuildJob.segmentName, null);
                logger.info("The cube:{} segment:{} is ready to be promoted.", segmentBuildJob.cubeName,
                        segmentBuildJob.segmentName);
                coordinator.getClusterManager().segmentBuildComplete(cubingJob, cubeInstance, cubeSegment,
                        segmentBuildJob);
                addToCheckList(cubeInstance.getName());
                successJobs.add(segmentBuildJob);
            } else if (ExecutableState.ERROR.equals(jobState)) {
                if (segmentBuildJob.retryCnt < 5) {
                    logger.info("Job:{} is error, resume the job.", segmentBuildJob);
                    coordinator.getExecutableManager().resumeJob(segmentBuildJob.jobID);
                    segmentBuildJob.retryCnt++;
                } else {
                    logger.warn("Job:{} is error, exceed max retry. Kylin admin could resume it or discard it"
                            + "(to let new building job be sumbitted) .", segmentBuildJob);
                }
            }
        } catch (StoreException storeEx) {
            logger.error("Error when check streaming segment job build state:" + segmentBuildJob, storeEx);
            throw storeEx;
        }
    }
    return successJobs;
}