Java Code Examples for org.apache.kylin.cube.CubeSegment#getInputRecords()

The following examples show how to use org.apache.kylin.cube.CubeSegment#getInputRecords() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SegmentPruner.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public boolean check(CubeSegment seg) {

        if (seg.getInputRecords() == 0) {
            if (seg.getConfig().isSkippingEmptySegments()) {
                logger.debug("Prune segment {} due to 0 input record", seg);
                return false;
            } else {
                logger.debug("Insist scan of segment {} having 0 input record", seg);
            }
        }

        if (!node.checkSeg(seg)) {
            logger.debug("Prune segment {} due to given filter", seg);
            return false;
        }

        logger.debug("Pruner passed on segment {}", seg);
        return true;
    }
 
Example 2
Source File: SegmentPruner.java    From kylin with Apache License 2.0 6 votes vote down vote up
public boolean check(CubeSegment seg) {

        if (seg.getInputRecords() == 0) {
            if (seg.getConfig().isSkippingEmptySegments()) {
                logger.debug("Prune segment {} due to 0 input record", seg);
                return false;
            } else {
                logger.debug("Insist scan of segment {} having 0 input record", seg);
            }
        }

        if (!node.checkSeg(seg)) {
            logger.debug("Prune segment {} due to given filter", seg);
            return false;
        }

        logger.debug("Pruner passed on segment {}", seg);
        return true;
    }
 
Example 3
Source File: UpdateCubeInfoAfterOptimizeStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment);
    long sourceCount = originalSegment.getInputRecords();
    long sourceSizeBytes = originalSegment.getInputRecordsSize();

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap());

    try {
        cubeManager.promoteNewlyOptimizeSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 4
Source File: UpdateCubeInfoAfterOptimizeStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment);
    long sourceCount = originalSegment.getInputRecords();
    long sourceSizeBytes = originalSegment.getInputRecordsSize();

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap());

    try {
        cubeManager.promoteNewlyOptimizeSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 5
Source File: UpdateCubeInfoAfterMergeStep.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeInstance cube = cubeManager.getCube(getCubeName());
    
    CubeSegment mergedSegment = cube.getSegmentById(getSegmentId());
    if (mergedSegment == null) {
        return new ExecuteResult(ExecuteResult.State.FAILED, "there is no segment with id:" + getSegmentId());
    }
    String cubeSizeString = executableManager.getOutput(getConvertToHfileStepId()).getExtra().get(ExecutableConstants.HDFS_BYTES_WRITTEN);
    Preconditions.checkState(StringUtils.isNotEmpty(cubeSizeString), "Can't get cube segment size.");
    long cubeSize = Long.parseLong(cubeSizeString) / 1024;

    // collect source statistics
    List<String> mergingSegmentIds = getMergingSegmentIds();
    if (mergingSegmentIds.isEmpty()) {
        return new ExecuteResult(ExecuteResult.State.FAILED, "there are no merging segments");
    }
    long sourceCount = 0L;
    long sourceSize = 0L;
    for (String id : mergingSegmentIds) {
        CubeSegment segment = cube.getSegmentById(id);
        sourceCount += segment.getInputRecords();
        sourceSize += segment.getInputRecordsSize();
    }
    
    // update segment info
    mergedSegment.setSizeKB(cubeSize);
    mergedSegment.setInputRecords(sourceCount);
    mergedSegment.setInputRecordsSize(sourceSize);
    mergedSegment.setLastBuildJobID(getCubingJobId());
    mergedSegment.setLastBuildTime(System.currentTimeMillis());
    
    try {
        cubeManager.promoteNewlyBuiltSegments(cube, mergedSegment);
        return new ExecuteResult(ExecuteResult.State.SUCCEED);
    } catch (IOException e) {
        logger.error("fail to update cube after merge", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}
 
Example 6
Source File: StreamStorageQuery.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) {
    GTCubeStorageQueryRequest request = getStorageQueryRequest(context, sqlDigest, returnTupleInfo);

    List<CubeSegmentScanner> scanners = Lists.newArrayList();
    long maxHistorySegmentTime = -1;
    StreamingDataQueryPlanner segmentsPlanner = new StreamingDataQueryPlanner(cubeInstance.getDescriptor(),
            request.getFilter());
    for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) {
        TSRange segmentRange = cubeSeg.getTSRange();
        if (segmentRange.end.v > maxHistorySegmentTime) {
            maxHistorySegmentTime = cubeSeg.getTSRange().end.v;
        }
        CubeSegmentScanner scanner;

        if (cubeDesc.getConfig().isSkippingEmptySegments() && cubeSeg.getInputRecords() == 0) {
            logger.info("Skip cube segment {} because its input record is 0", cubeSeg);
            continue;
        }

        if (segmentsPlanner.canSkip(segmentRange.start.v, segmentRange.end.v)) {
            logger.info("Skip cube segment {} because of not satisfy filter:{}", cubeSeg, request.getFilter());
            continue;
        }

        scanner = new CubeSegmentScanner(cubeSeg, request.getCuboid(), request.getDimensions(),
                request.getGroups(), request.getDynGroups(), request.getDynGroupExprs(), request.getMetrics(),
                request.getDynFuncs(), request.getFilter(), request.getHavingFilter(), request.getContext());
        if (!scanner.isSegmentSkipped())
            scanners.add(scanner);
    }

    ITupleIterator historyResult;
    if (scanners.isEmpty()) {
        historyResult = ITupleIterator.EMPTY_TUPLE_ITERATOR;
    } else {
        historyResult = new SequentialCubeTupleIterator(scanners, request.getCuboid(), request.getDimensions(),
                request.getDynGroups(), request.getGroups(), request.getMetrics(), returnTupleInfo, context, sqlDigest);
    }
    Set<TblColRef> dimensionsD = request.getDimensions();
    if (dimensionsD.isEmpty()) {
        dimensionsD = Sets.newHashSet(request.getCuboid().getColumns()); // temporary fix for query like: select count(1) from TABLE
    }

    ITupleIterator realTimeResult;
    if (segmentsPlanner.canSkip(maxHistorySegmentTime, Long.MAX_VALUE)) {
        logger.info("Skip scan realTime data, {}", maxHistorySegmentTime);
        realTimeResult = ITupleIterator.EMPTY_TUPLE_ITERATOR;
    } else {
        boolean isSelectAllQuery = isSelectAllQuery(request.getCuboid(), request.getGroups(), request.getFilter());
        int limitPushDown = isSelectAllQuery ? context.getFinalPushDownLimit() : Integer.MAX_VALUE;
        realTimeResult = realTimeSearchClient.search(maxHistorySegmentTime, cubeInstance, returnTupleInfo,
                request.getFilter(), dimensionsD, request.getGroups(), request.getMetrics(), limitPushDown,
                !isSelectAllQuery);
    }
    return new CompoundTupleIterator(Arrays.asList(historyResult, realTimeResult));
}
 
Example 7
Source File: StreamStorageQuery.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) {
    GTCubeStorageQueryRequest request = getStorageQueryRequest(context, sqlDigest, returnTupleInfo);

    List<CubeSegmentScanner> scanners = Lists.newArrayList();
    long maxHistorySegmentTime = -1;
    StreamingDataQueryPlanner segmentsPlanner = new StreamingDataQueryPlanner(cubeInstance.getDescriptor(),
            request.getFilter());
    long current = System.currentTimeMillis();
    for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) {
        TSRange segmentRange = cubeSeg.getTSRange();
        if (segmentRange.end.v > maxHistorySegmentTime) {
            if (cubeSeg.getTSRange().end.v < current) {
                // In normal case, the segment for future time range is not reasonable in streaming case
                maxHistorySegmentTime = cubeSeg.getTSRange().end.v;
            }
        }
        CubeSegmentScanner scanner;

        if (cubeDesc.getConfig().isSkippingEmptySegments() && cubeSeg.getInputRecords() == 0) {
            logger.info("Skip cube segment {} because its input record is 0", cubeSeg);
            continue;
        }

        if (segmentsPlanner.canSkip(segmentRange.start.v, segmentRange.end.v)) {
            logger.info("Skip cube segment {} because of not satisfy filter:{}", cubeSeg, request.getFilter());
            continue;
        }

        scanner = new CubeSegmentScanner(cubeSeg, request.getCuboid(), request.getDimensions(),
                request.getGroups(), request.getDynGroups(), request.getDynGroupExprs(), request.getMetrics(),
                request.getDynFuncs(), request.getFilter(), request.getHavingFilter(), request.getContext());
        if (!scanner.isSegmentSkipped())
            scanners.add(scanner);
    }

    ITupleIterator historyResult;
    if (scanners.isEmpty()) {
        historyResult = ITupleIterator.EMPTY_TUPLE_ITERATOR;
    } else {
        historyResult = new SequentialCubeTupleIterator(scanners, request.getCuboid(), request.getDimensions(),
                request.getDynGroups(), request.getGroups(), request.getMetrics(), returnTupleInfo, context, sqlDigest);
    }
    Set<TblColRef> dimensionsD = request.getDimensions();
    if (dimensionsD.isEmpty()) {
        dimensionsD = Sets.newHashSet(request.getCuboid().getColumns()); // temporary fix for query like: select count(1) from TABLE
    }

    ITupleIterator realTimeResult;
    if (segmentsPlanner.canSkip(maxHistorySegmentTime, Long.MAX_VALUE)) {
        logger.info("Skip scan realTime data, {}", maxHistorySegmentTime);
        realTimeResult = ITupleIterator.EMPTY_TUPLE_ITERATOR;
    } else {
        boolean isSelectAllQuery = isSelectAllQuery(request.getCuboid(), request.getGroups(), request.getFilter());
        int limitPushDown = isSelectAllQuery ? context.getFinalPushDownLimit() : Integer.MAX_VALUE;
        realTimeResult = realTimeSearchClient.search(maxHistorySegmentTime, cubeInstance, returnTupleInfo,
                request.getFilter(), dimensionsD, request.getGroups(), request.getMetrics(), limitPushDown,
                !isSelectAllQuery);
    }
    return new CompoundTupleIterator(Arrays.asList(historyResult, realTimeResult));
}