Java Code Examples for org.apache.kylin.cube.CubeInstance#getLatestReadySegment()

The following examples show how to use org.apache.kylin.cube.CubeInstance#getLatestReadySegment() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamingServer.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private StreamingConsumerChannel createNewConsumer(String cubeName, List<Partition> partitions, ConsumerStartProtocol startProtocol)
        throws IOException {
    KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    StreamingSegmentManager segmentManager = getStreamingSegmentManager(cubeName);

    IStreamingSource streamingSource = StreamingSourceFactory.getStreamingSource(cube);
    IStreamingConnector streamingConnector = streamingSource.createStreamingConnector(cubeName, partitions,
            startProtocol, segmentManager);
    StreamingConsumerChannel consumer = new StreamingConsumerChannel(cubeName, streamingConnector, segmentManager,
            IStopConsumptionCondition.NEVER_STOP);
    long minAcceptEventTime = cube.getDescriptor().getPartitionDateStart();
    CubeSegment latestRemoteSegment = cube.getLatestReadySegment();
    if (latestRemoteSegment != null) {
        minAcceptEventTime = latestRemoteSegment.getTSRange().end.v;
    }
    if (minAcceptEventTime > 0) {
        consumer.setMinAcceptEventTime(minAcceptEventTime);
    }
    StreamingCubeConsumeState consumeState = streamMetadataStore.getStreamingCubeConsumeState(cubeName);
    if (consumeState != null && consumeState == StreamingCubeConsumeState.PAUSED) {
        consumer.pause(false);
    }
    cubeConsumerMap.put(cubeName, consumer);
    return consumer;
}
 
Example 2
Source File: StreamingServer.java    From kylin with Apache License 2.0 6 votes vote down vote up
private StreamingConsumerChannel createNewConsumer(String cubeName, List<Partition> partitions, ConsumerStartProtocol startProtocol)
        throws IOException {
    KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    StreamingSegmentManager segmentManager = getStreamingSegmentManager(cubeName);

    IStreamingSource streamingSource = StreamingSourceFactory.getStreamingSource(cube);
    IStreamingConnector streamingConnector = streamingSource.createStreamingConnector(cubeName, partitions,
            startProtocol, segmentManager);
    StreamingConsumerChannel consumer = new StreamingConsumerChannel(cubeName, streamingConnector, segmentManager,
            IStopConsumptionCondition.NEVER_STOP);
    long minAcceptEventTime = cube.getDescriptor().getPartitionDateStart();
    CubeSegment latestRemoteSegment = cube.getLatestReadySegment();
    if (latestRemoteSegment != null) {
        minAcceptEventTime = latestRemoteSegment.getTSRange().end.v;
    }
    if (minAcceptEventTime > 0 && minAcceptEventTime < System.currentTimeMillis()) {
        consumer.setMinAcceptEventTime(minAcceptEventTime);
    }
    StreamingCubeConsumeState consumeState = streamMetadataStore.getStreamingCubeConsumeState(cubeName);
    if (consumeState != null && consumeState == StreamingCubeConsumeState.PAUSED) {
        consumer.pause(false);
    }
    cubeConsumerMap.put(cubeName, consumer);
    return consumer;
}
 
Example 3
Source File: Coordinator.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private List<String> findSegmentsCanBuild(String cubeName) {
    List<String> result = Lists.newArrayList();
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    // in optimization
    if (isInOptimize(cubeInstance)) {
        return result;
    }
    int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments();
    CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment();
    long minSegmentStart = -1;
    if (latestHistoryReadySegment != null) {
        minSegmentStart = latestHistoryReadySegment.getTSRange().end.v;
    } else {
        // there is no ready segment, to make cube planner work, only 1 segment can build
        logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName);
        allowMaxBuildingSegments = 1;
    }

    CubeAssignment assignments = streamMetadataStore.getAssignmentsByCube(cubeName);
    Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs();
    List<SegmentBuildState> segmentStates = streamMetadataStore.getSegmentBuildStates(cubeName);
    Collections.sort(segmentStates);
    // TODO need to check whether it is in optimization
    int inBuildingSegments = cubeInstance.getBuildingSegments().size();
    int leftQuota = allowMaxBuildingSegments - inBuildingSegments;

    for (int i = 0; i < segmentStates.size(); i++) {
        SegmentBuildState segmentState = segmentStates.get(i);
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName());
        if (segmentRange.getFirst() < minSegmentStart) {
            logger.warn("the cube segment state is not clear correctly, cube:{} segment:{}, clear it", cubeName,
                    segmentState.getSegmentName());
            streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
            continue;
        }

        if (segmentState.isInBuilding()) {
            inBuildingSegments++;
            String jobId = segmentState.getState().getJobId();
            logger.info("there is segment in building, cube:{} segment:{} jobId:{}", cubeName,
                    segmentState.getSegmentName(), jobId);
            long buildStartTime = segmentState.getState().getBuildStartTime();
            if (buildStartTime != 0 && jobId != null) {
                long buildDuration = System.currentTimeMillis() - buildStartTime;
                if (buildDuration < 40 * 60 * 1000) { // if build time larger than 40 minutes, check the job status
                    continue;
                }
                CubingJob cubingJob = (CubingJob) getExecutableManager().getJob(jobId);
                ExecutableState jobState = cubingJob.getStatus();
                if (ExecutableState.SUCCEED.equals(jobState)) { // job is already succeed, remove the build state
                    CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null);
                    if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) {
                        logger.info(
                                "job:{} is already succeed, and segment:{} is ready, remove segment build state",
                                jobId, segmentState.getSegmentName());
                        streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
                    }
                    continue;
                } else if (ExecutableState.ERROR.equals(jobState)) {
                    logger.info("job:{} is error, resume the job", jobId);
                    getExecutableManager().resumeJob(jobId);
                    continue;
                } else if (ExecutableState.DISCARDED.equals(jobState)) {
                    // if the job has been discard manually, just think that the segment is not in building
                    logger.info("job:{} is discard, reset the job state in metaStore", jobId);
                    SegmentBuildState.BuildState state = new SegmentBuildState.BuildState();
                    state.setBuildStartTime(0);
                    state.setState(SegmentBuildState.BuildState.State.WAIT);
                    state.setJobId(cubingJob.getId());
                    streamMetadataStore.updateSegmentBuildState(cubeName, segmentState.getSegmentName(), state);
                    segmentState.setState(state);
                    logger.info("segment:{} is discard", segmentState.getSegmentName());
                    continue;
                } else {
                    logger.info("job:{} is in running, job state: {}", jobId, jobState);
                    continue;
                }
            }
        }
        if (leftQuota <= 0) {
            logger.info("No left quota to build segments for cube:{}", cubeName);
            return result;
        }
        if (!checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets)) {
            break;
        }
        result.add(segmentState.getSegmentName());
        leftQuota--;
    }
    return result;
}
 
Example 4
Source File: BuildJobSubmitter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * @return list of segment which could be submitted a segment build job
 */
@NonSideEffect
List<String> checkSegmentBuildJobFromMetadata(String cubeName) {
    List<String> result = Lists.newArrayList();
    CubeInstance cubeInstance = coordinator.getCubeManager().getCube(cubeName);
    // in optimization
    if (isInOptimize(cubeInstance)) {
        return result;
    }
    int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments();
    CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment();
    long minSegmentStart = -1;
    if (latestHistoryReadySegment != null) {
        minSegmentStart = latestHistoryReadySegment.getTSRange().end.v;
    } else {
        // there is no ready segment, to make cube planner work, only 1 segment can build
        logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName);
        allowMaxBuildingSegments = 1;
    }

    CubeAssignment assignments = coordinator.getStreamMetadataStore().getAssignmentsByCube(cubeName);
    Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs();

    List<SegmentBuildState> segmentStates = coordinator.getStreamMetadataStore().getSegmentBuildStates(cubeName);
    int inBuildingSegments = cubeInstance.getBuildingSegments().size();
    int leftQuota = allowMaxBuildingSegments - inBuildingSegments;
    boolean stillQuotaForNewSegment = true;

    // Sort it so we can iterate segments from eariler one to newer one
    Collections.sort(segmentStates);

    for (int i = 0; i < segmentStates.size(); i++) {
        boolean needRebuild = false;
        if (leftQuota <= 0) {
            logger.info("No left quota to build segments for cube:{} at {}", cubeName, leftQuota);
            stillQuotaForNewSegment = false;
        }

        SegmentBuildState segmentState = segmentStates.get(i);
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName());

        // If we have a exist historcial segment, we should not let new realtime segment overwrite it, it is so dangrous,
        // we just delete the entry to ignore the segment which should not exist
        if (segmentRange.getFirst() < minSegmentStart) {
            logger.warn(
                    "The cube segment state is not correct because it belongs to historcial part, cube:{} segment:{}, clear it.",
                    cubeName, segmentState.getSegmentName());
            coordinator.getStreamMetadataStore().removeSegmentBuildState(cubeName, segmentState.getSegmentName());
            continue;
        }

        // We already have a building job for current segment
        if (segmentState.isInBuilding()) {
            needRebuild = checkSegmentBuildingJob(segmentState, cubeName, cubeInstance);
            if (!needRebuild)
                continue;
        } else if (segmentState.isInWaiting()) {
            // The data maybe uploaded to remote completely, or job is discard
            // These two case should be submit a building job, just let go through it
        }

        boolean readyToBuild = checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets);
        if (!readyToBuild) {
            logger.debug("Segment {} {} is not ready to submit a building job.", cubeName, segmentState);
        } else if (stillQuotaForNewSegment || needRebuild) {
            result.add(segmentState.getSegmentName());
            leftQuota--;
        }
    }
    if (logger.isDebugEnabled() && !result.isEmpty()) {
        logger.debug("{} Candidate segment list to be built : {}.", cubeName, String.join(", ", result));
    }
    return result;
}
 
Example 5
Source File: KafkaSource.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private void setupConnectorFromCheckpoint(KafkaConnector connector, List<Partition> assignedPartitions, IStreamingSource streamingSource, StreamingSegmentManager cubeDataStore) {
    CubeInstance cubeInstance = cubeDataStore.getCubeInstance();
    CubeSegment latestReadySegment = cubeInstance.getLatestReadySegment();
    String localCheckpointConsumePos = cubeDataStore.getCheckPointSourcePosition();
    String remoteCheckpointConsumePos = null;
    if (latestReadySegment != null) {
        remoteCheckpointConsumePos = latestReadySegment.getStreamSourceCheckpoint();
    }
    logger.info("localConsumeStats from local checkpoint {}, remoteConsumeStats from remote checkpoint {} ",
            localCheckpointConsumePos, remoteCheckpointConsumePos);
    KafkaPosition localCPPosition = null;
    KafkaPosition remoteCPPosition = null;
    if (localCheckpointConsumePos != null) {
        localCPPosition = (KafkaPosition) streamingSource.getSourcePositionHandler().parsePosition(localCheckpointConsumePos);
    }

    if (remoteCheckpointConsumePos != null) {
        remoteCPPosition = (KafkaPosition) streamingSource.getSourcePositionHandler().parsePosition(remoteCheckpointConsumePos);
    }

    // merge the local and remote consume stats
    if (isEmptyPosition(localCPPosition) && isEmptyPosition(remoteCPPosition)) {
        // no segment exists in the cube and is configured to consume from latest offset
        if (cubeInstance.getSegments().isEmpty() && cubeInstance.getConfig().isStreamingConsumeFromLatestOffsets()) {
            logger.info("start kafka connector from latest");
            connector.setStartPartition(assignedPartitions, ConsumerStartMode.LATEST, null);
        } else {
            logger.info("start kafka connector from earliest");
            connector.setStartPartition(assignedPartitions, ConsumerStartMode.EARLIEST, null);
        }
        return;
    }

    KafkaPosition consumerStartPos;

    if (isEmptyPosition(localCPPosition) && !isEmptyPosition(remoteCPPosition)) {
        consumerStartPos = remoteCPPosition;
    } else if (isEmptyPosition(remoteCPPosition) && !isEmptyPosition(localCPPosition)) {
        consumerStartPos = (KafkaPosition)localCPPosition.advance();
    } else {
        Map<Integer, Long> mergedStartOffsets = Maps.newHashMap();
        MapDifference<Integer, Long> statsDiff = Maps.difference(localCPPosition.getPartitionOffsets(), remoteCPPosition.getPartitionOffsets());
        mergedStartOffsets.putAll(statsDiff.entriesInCommon());
        mergedStartOffsets.putAll(statsDiff.entriesOnlyOnLeft());
        mergedStartOffsets.putAll(statsDiff.entriesOnlyOnRight());
        mergedStartOffsets.putAll(Maps.transformValues(statsDiff.entriesDiffering(),
                new Function<MapDifference.ValueDifference<Long>, Long>() {
                    @Nullable
                    @Override
                    public Long apply(@Nullable MapDifference.ValueDifference<Long> input) {
                        return input.leftValue() > input.rightValue() ? input.leftValue() : input.rightValue();
                    }
                }));
        consumerStartPos = new KafkaPosition(mergedStartOffsets);
    }
    logger.info("start kafka connector from specified position:{}", consumerStartPos);
    connector.setStartPartition(assignedPartitions, ConsumerStartMode.SPECIFIC_POSITION, consumerStartPos.getPartitionOffsets());
}
 
Example 6
Source File: BuildCubeWithStreamV2.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private boolean isSegmentBuildSuccess() {
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(CUBE_NAME);
    return cubeInstance.getLatestReadySegment() != null;
}
 
Example 7
Source File: CubeDescTiretreeGlobalDomainDictUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * get reuse global tiretree global dic path
 * @param tblColRef
 * @param cubeDesc
 * @return
 */
public static String globalReuseDictPath(KylinConfig config, TblColRef tblColRef, CubeDesc cubeDesc) {
    String globalResumeDictPath = null;
    List<GlobalDict> globalDicts = cubeDesc.listDomainDict();
    DataModelManager metadataManager = DataModelManager.getInstance(config);
    CubeManager cubeManager = CubeManager.getInstance(config);
    for (GlobalDict dict : globalDicts) {
        if (dict.getSrc().getIdentity().equalsIgnoreCase(tblColRef.getIdentity())) {
            String model = dict.getModel();
            String cube = dict.getCube();
            logger.info("cube:{} column:{} tiretree global domain dic reuse model:{} cube{} column:{} ",
                    cubeDesc.getName(), tblColRef.getName(), model, cube, dict.getDesc());

            DataModelDesc dataModel = metadataManager.getDataModelDesc(model);
            if (Objects.isNull(dataModel)) {
                logger.error("get cube:{} column:{} tiretree global domain dic reuse DataModelDesc error",
                        cubeDesc.getName(), tblColRef.getName());
                return null;
            }

            CubeInstance cubeInstance = cubeManager.getCube(cube);
            CubeSegment cubeSegment = cubeInstance.getLatestReadySegment();

            TblColRef colRef = dataModel.findColumn(dict.getDesc());
            if (Objects.isNull(colRef)) {
                logger.error("get cube:{} column:{} tiretree global domain dic TblColRef error");
                return null;
            }

            globalResumeDictPath = cubeSegment.getDictResPath(colRef);

            if (StringUtils.isBlank(globalResumeDictPath)) {
                logger.error("get cube:{} column:{} tiretree global domain dic resume dict path error");
            }
            logger.error("get cube:{} column:{} tiretree global domain dic resume dict path is {}",
                    globalResumeDictPath);
            break;
        }
    }
    return globalResumeDictPath;
}
 
Example 8
Source File: CubeDescTiretreeGlobalDomainDictUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
/**
 * add resuce global tiretree global dic for baseid job
 * @param cubeDesc
 * @param dumpList
 */
public static void cuboidJob(CubeDesc cubeDesc, Set<String> dumpList) {
    logger.info("cube {} start to add global domain dic", cubeDesc.getName());
    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    DataModelManager metadataManager = DataModelManager.getInstance(KylinConfig.getInstanceFromEnv());

    cubeManager.getCube(cubeDesc.getName());
    List<GlobalDict> globalDicts = cubeDesc.listDomainDict();

    for (GlobalDict dict : globalDicts) {
        String cube = dict.getCube();
        String model = dict.getModel();
        logger.debug("cube {} column {} start to add global domain dic ,reuse {}.{}.{}", cubeDesc.getName(),
                dict.getSrc(), model, cube, dict.getDesc());
        CubeInstance instance = cubeManager.getCube(cube);
        logger.debug("cube {} column {} start to add global domain dic ,reuse cube{} dict", cubeDesc.getName(),
                dict.getSrc(), instance.getName());

        // cube, model_desc, cube_desc, table
        dumpList.add(instance.getResourcePath());
        dumpList.add(instance.getDescriptor().getModel().getResourcePath());
        dumpList.add(instance.getDescriptor().getResourcePath());
        dumpList.add(instance.getProjectInstance().getResourcePath());

        for (TableRef tableRef : instance.getDescriptor().getModel().getAllTables()) {
            TableDesc table = tableRef.getTableDesc();
            dumpList.add(table.getResourcePath());
            dumpList.addAll(SourceManager.getMRDependentResources(table));
        }

        DataModelDesc dataModelDesc = metadataManager.getDataModelDesc(model);
        logger.debug("cube {} column {} start to add global domain dic ,reuse model{} dict", cubeDesc.getName(),
                dict.getSrc(), dataModelDesc.getName());
        TblColRef tblColRef = dataModelDesc.findColumn(dict.getDesc());
        CubeSegment segment = instance.getLatestReadySegment();
        logger.debug(
                "cube {} column {} start to add global domain dic ,reuse mode:{} cube:{} segment:{} dict,tblColRef:{}",
                cubeDesc.getName(), dict.getSrc(), dataModelDesc.getName(), cube, segment.getName(),
                tblColRef.getIdentity());
        if (segment.getDictResPath(tblColRef) != null) {
            dumpList.addAll(ImmutableList.of(segment.getDictResPath(tblColRef)));
        }
    }
}
 
Example 9
Source File: Coordinator.java    From kylin with Apache License 2.0 4 votes vote down vote up
private List<String> findSegmentsCanBuild(String cubeName) {
    List<String> result = Lists.newArrayList();
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    // in optimization
    if (isInOptimize(cubeInstance)) {
        return result;
    }
    int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments();
    CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment();
    long minSegmentStart = -1;
    if (latestHistoryReadySegment != null) {
        minSegmentStart = latestHistoryReadySegment.getTSRange().end.v;
    } else {
        // there is no ready segment, to make cube planner work, only 1 segment can build
        logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName);
        allowMaxBuildingSegments = 1;
    }

    CubeAssignment assignments = streamMetadataStore.getAssignmentsByCube(cubeName);
    Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs();
    List<SegmentBuildState> segmentStates = streamMetadataStore.getSegmentBuildStates(cubeName);
    Collections.sort(segmentStates);
    // TODO need to check whether it is in optimization
    int inBuildingSegments = cubeInstance.getBuildingSegments().size();
    int leftQuota = allowMaxBuildingSegments - inBuildingSegments;

    for (int i = 0; i < segmentStates.size(); i++) {
        SegmentBuildState segmentState = segmentStates.get(i);
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName());
        if (segmentRange.getFirst() < minSegmentStart) {
            logger.warn("the cube segment state is not clear correctly, cube:{} segment:{}, clear it", cubeName,
                    segmentState.getSegmentName());
            streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
            continue;
        }

        if (segmentState.isInBuilding()) {
            inBuildingSegments++;
            String jobId = segmentState.getState().getJobId();
            logger.info("there is segment in building, cube:{} segment:{} jobId:{}", cubeName,
                    segmentState.getSegmentName(), jobId);
            long buildStartTime = segmentState.getState().getBuildStartTime();
            if (buildStartTime != 0 && jobId != null) {
                long buildDuration = System.currentTimeMillis() - buildStartTime;
                if (buildDuration < 40 * 60 * 1000) { // if build time larger than 40 minutes, check the job status
                    continue;
                }
                CubingJob cubingJob = (CubingJob) getExecutableManager().getJob(jobId);
                ExecutableState jobState = cubingJob.getStatus();
                if (ExecutableState.SUCCEED.equals(jobState)) { // job is already succeed, remove the build state
                    CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null);
                    if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) {
                        logger.info(
                                "job:{} is already succeed, and segment:{} is ready, remove segment build state",
                                jobId, segmentState.getSegmentName());
                        streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName());
                    }
                    continue;
                } else if (ExecutableState.ERROR.equals(jobState)) {
                    logger.info("job:{} is error, resume the job", jobId);
                    getExecutableManager().resumeJob(jobId);
                    continue;
                } else if (ExecutableState.DISCARDED.equals(jobState)) {
                    // if the job has been discard manually, just think that the segment is not in building
                    logger.info("job:{} is discard, reset the job state in metaStore", jobId);
                    SegmentBuildState.BuildState state = new SegmentBuildState.BuildState();
                    state.setBuildStartTime(0);
                    state.setState(SegmentBuildState.BuildState.State.WAIT);
                    state.setJobId(cubingJob.getId());
                    streamMetadataStore.updateSegmentBuildState(cubeName, segmentState.getSegmentName(), state);
                    segmentState.setState(state);
                    logger.info("segment:{} is discard", segmentState.getSegmentName());
                    continue;
                } else {
                    logger.info("job:{} is in running, job state: {}", jobId, jobState);
                    continue;
                }
            }
        }
        if (leftQuota <= 0) {
            logger.info("No left quota to build segments for cube:{}", cubeName);
            return result;
        }
        if (!checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets)) {
            break;
        }
        result.add(segmentState.getSegmentName());
        leftQuota--;
    }
    return result;
}
 
Example 10
Source File: BuildJobSubmitter.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * @return list of segment which could be submitted a segment build job
 */
@NonSideEffect
List<String> checkSegmentBuildJobFromMetadata(String cubeName) {
    List<String> result = Lists.newArrayList();
    CubeInstance cubeInstance = coordinator.getCubeManager().getCube(cubeName);
    // in optimization
    if (isInOptimize(cubeInstance)) {
        return result;
    }
    int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments();
    CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment();
    long minSegmentStart = -1;
    if (latestHistoryReadySegment != null) {
        minSegmentStart = latestHistoryReadySegment.getTSRange().end.v;
    } else {
        // there is no ready segment, to make cube planner work, only 1 segment can build
        logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName);
        allowMaxBuildingSegments = 1;
    }

    CubeAssignment assignments = coordinator.getStreamMetadataStore().getAssignmentsByCube(cubeName);
    Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs();

    List<SegmentBuildState> segmentStates = coordinator.getStreamMetadataStore().getSegmentBuildStates(cubeName);
    int inBuildingSegments = cubeInstance.getBuildingSegments().size();
    int leftQuota = allowMaxBuildingSegments - inBuildingSegments;
    boolean stillQuotaForNewSegment = true;

    // Sort it so we can iterate segments from eariler one to newer one
    Collections.sort(segmentStates);

    for (int i = 0; i < segmentStates.size(); i++) {
        boolean needRebuild = false;
        if (leftQuota <= 0) {
            logger.info("No left quota to build segments for cube:{} at {}", cubeName, leftQuota);
            stillQuotaForNewSegment = false;
        }

        SegmentBuildState segmentState = segmentStates.get(i);
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName());

        // If we have a exist historcial segment, we should not let new realtime segment overwrite it, it is so dangrous,
        // we just delete the entry to ignore the segment which should not exist
        if (segmentRange.getFirst() < minSegmentStart) {
            logger.warn(
                    "The cube segment state is not correct because it belongs to historcial part, cube:{} segment:{}, clear it.",
                    cubeName, segmentState.getSegmentName());
            coordinator.getStreamMetadataStore().removeSegmentBuildState(cubeName, segmentState.getSegmentName());
            continue;
        }

        // We already have a building job for current segment
        if (segmentState.isInBuilding()) {
            needRebuild = checkSegmentBuildingJob(segmentState, cubeName, cubeInstance);
            if (!needRebuild)
                continue;
        } else if (segmentState.isInWaiting()) {
            // The data maybe uploaded to remote completely, or job is discard
            // These two case should be submit a building job, just let go through it
        }

        boolean readyToBuild = checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets);
        if (!readyToBuild) {
            logger.debug("Segment {} {} is not ready to submit a building job.", cubeName, segmentState);
        } else if (stillQuotaForNewSegment || needRebuild) {
            result.add(segmentState.getSegmentName());
            leftQuota--;
        }
    }
    if (logger.isDebugEnabled() && !result.isEmpty()) {
        logger.debug("{} Candidate segment list to be built : {}.", cubeName, String.join(", ", result));
    }
    return result;
}
 
Example 11
Source File: KafkaSource.java    From kylin with Apache License 2.0 4 votes vote down vote up
private void setupConnectorFromCheckpoint(KafkaConnector connector, List<Partition> assignedPartitions, IStreamingSource streamingSource, StreamingSegmentManager cubeDataStore) {
    CubeInstance cubeInstance = cubeDataStore.getCubeInstance();
    CubeSegment latestReadySegment = cubeInstance.getLatestReadySegment();
    String localCheckpointConsumePos = cubeDataStore.getCheckPointSourcePosition();
    String remoteCheckpointConsumePos = null;
    if (latestReadySegment != null) {
        remoteCheckpointConsumePos = latestReadySegment.getStreamSourceCheckpoint();
    }
    logger.info("localConsumeStats from local checkpoint {}, remoteConsumeStats from remote checkpoint {} ",
            localCheckpointConsumePos, remoteCheckpointConsumePos);
    KafkaPosition localCPPosition = null;
    KafkaPosition remoteCPPosition = null;
    if (localCheckpointConsumePos != null) {
        localCPPosition = (KafkaPosition) streamingSource.getSourcePositionHandler().parsePosition(localCheckpointConsumePos);
    }

    if (remoteCheckpointConsumePos != null) {
        remoteCPPosition = (KafkaPosition) streamingSource.getSourcePositionHandler().parsePosition(remoteCheckpointConsumePos);
    }

    // merge the local and remote consume stats
    if (isEmptyPosition(localCPPosition) && isEmptyPosition(remoteCPPosition)) {
        // no segment exists in the cube and is configured to consume from latest offset
        if (cubeInstance.getSegments().isEmpty() && cubeInstance.getConfig().isStreamingConsumeFromLatestOffsets()) {
            logger.info("start kafka connector from latest");
            connector.setStartPartition(assignedPartitions, ConsumerStartMode.LATEST, null);
        } else {
            logger.info("start kafka connector from earliest");
            connector.setStartPartition(assignedPartitions, ConsumerStartMode.EARLIEST, null);
        }
        return;
    }

    KafkaPosition consumerStartPos;

    if (isEmptyPosition(localCPPosition) && !isEmptyPosition(remoteCPPosition)) {
        consumerStartPos = remoteCPPosition;
    } else if (isEmptyPosition(remoteCPPosition) && !isEmptyPosition(localCPPosition)) {
        consumerStartPos = (KafkaPosition)localCPPosition.advance();
    } else {
        Map<Integer, Long> mergedStartOffsets = Maps.newHashMap();
        MapDifference<Integer, Long> statsDiff = Maps.difference(localCPPosition.getPartitionOffsets(), remoteCPPosition.getPartitionOffsets());
        mergedStartOffsets.putAll(statsDiff.entriesInCommon());
        mergedStartOffsets.putAll(statsDiff.entriesOnlyOnLeft());
        mergedStartOffsets.putAll(statsDiff.entriesOnlyOnRight());
        mergedStartOffsets.putAll(Maps.transformValues(statsDiff.entriesDiffering(),
                new Function<MapDifference.ValueDifference<Long>, Long>() {
                    @Nullable
                    @Override
                    public Long apply(@Nullable MapDifference.ValueDifference<Long> input) {
                        return input.leftValue() > input.rightValue() ? input.leftValue() : input.rightValue();
                    }
                }));
        consumerStartPos = new KafkaPosition(mergedStartOffsets);
    }
    logger.info("start kafka connector from specified position:{}", consumerStartPos);
    connector.setStartPartition(assignedPartitions, ConsumerStartMode.SPECIFIC_POSITION, consumerStartPos.getPartitionOffsets());
}
 
Example 12
Source File: BuildCubeWithStreamV2.java    From kylin with Apache License 2.0 4 votes vote down vote up
private boolean isSegmentBuildSuccess() {
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(CUBE_NAME);
    return cubeInstance.getLatestReadySegment() != null;
}
 
Example 13
Source File: CubeDescTiretreeGlobalDomainDictUtil.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * get reuse global tiretree global dic path
 * @param tblColRef
 * @param cubeDesc
 * @return
 */
public static String globalReuseDictPath(KylinConfig config, TblColRef tblColRef, CubeDesc cubeDesc) {
    String globalResumeDictPath = null;
    List<GlobalDict> globalDicts = cubeDesc.listDomainDict();
    DataModelManager metadataManager = DataModelManager.getInstance(config);
    CubeManager cubeManager = CubeManager.getInstance(config);
    for (GlobalDict dict : globalDicts) {
        if (dict.getSrc().getIdentity().equalsIgnoreCase(tblColRef.getIdentity())) {
            String model = dict.getModel();
            String cube = dict.getCube();
            logger.info("cube:{} column:{} tiretree global domain dic reuse model:{} cube{} column:{} ",
                    cubeDesc.getName(), tblColRef.getName(), model, cube, dict.getDesc());

            DataModelDesc dataModel = metadataManager.getDataModelDesc(model);
            if (Objects.isNull(dataModel)) {
                logger.error("get cube:{} column:{} tiretree global domain dic reuse DataModelDesc error",
                        cubeDesc.getName(), tblColRef.getName());
                return null;
            }

            CubeInstance cubeInstance = cubeManager.getCube(cube);
            CubeSegment cubeSegment = cubeInstance.getLatestReadySegment();

            TblColRef colRef = dataModel.findColumn(dict.getDesc());
            if (Objects.isNull(colRef)) {
                logger.error("get cube:{} column:{} tiretree global domain dic TblColRef error");
                return null;
            }

            globalResumeDictPath = cubeSegment.getDictResPath(colRef);

            if (StringUtils.isBlank(globalResumeDictPath)) {
                logger.error("get cube:{} column:{} tiretree global domain dic resume dict path error");
            }
            logger.error("get cube:{} column:{} tiretree global domain dic resume dict path is {}",
                    globalResumeDictPath);
            break;
        }
    }
    return globalResumeDictPath;
}
 
Example 14
Source File: CubeDescTiretreeGlobalDomainDictUtil.java    From kylin with Apache License 2.0 4 votes vote down vote up
/**
 * add resuce global tiretree global dic for baseid job
 * @param cubeDesc
 * @param dumpList
 */
public static void cuboidJob(CubeDesc cubeDesc, Set<String> dumpList) {
    logger.info("cube {} start to add global domain dic", cubeDesc.getName());
    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    DataModelManager metadataManager = DataModelManager.getInstance(KylinConfig.getInstanceFromEnv());

    cubeManager.getCube(cubeDesc.getName());
    List<GlobalDict> globalDicts = cubeDesc.listDomainDict();

    for (GlobalDict dict : globalDicts) {
        String cube = dict.getCube();
        String model = dict.getModel();
        logger.debug("cube {} column {} start to add global domain dic ,reuse {}.{}.{}", cubeDesc.getName(),
                dict.getSrc(), model, cube, dict.getDesc());
        CubeInstance instance = cubeManager.getCube(cube);
        logger.debug("cube {} column {} start to add global domain dic ,reuse cube{} dict", cubeDesc.getName(),
                dict.getSrc(), instance.getName());

        // cube, model_desc, cube_desc, table
        dumpList.add(instance.getResourcePath());
        dumpList.add(instance.getDescriptor().getModel().getResourcePath());
        dumpList.add(instance.getDescriptor().getResourcePath());
        dumpList.add(instance.getProjectInstance().getResourcePath());

        for (TableRef tableRef : instance.getDescriptor().getModel().getAllTables()) {
            TableDesc table = tableRef.getTableDesc();
            dumpList.add(table.getResourcePath());
            dumpList.addAll(SourceManager.getMRDependentResources(table));
        }

        DataModelDesc dataModelDesc = metadataManager.getDataModelDesc(model);
        logger.debug("cube {} column {} start to add global domain dic ,reuse model{} dict", cubeDesc.getName(),
                dict.getSrc(), dataModelDesc.getName());
        TblColRef tblColRef = dataModelDesc.findColumn(dict.getDesc());
        CubeSegment segment = instance.getLatestReadySegment();
        logger.debug(
                "cube {} column {} start to add global domain dic ,reuse mode:{} cube:{} segment:{} dict,tblColRef:{}",
                cubeDesc.getName(), dict.getSrc(), dataModelDesc.getName(), cube, segment.getName(),
                tblColRef.getIdentity());
        if (segment.getDictResPath(tblColRef) != null) {
            dumpList.addAll(ImmutableList.of(segment.getDictResPath(tblColRef)));
        }
    }
}