Java Code Examples for org.apache.kylin.cube.CubeSegment#setInputRecords()

The following examples show how to use org.apache.kylin.cube.CubeSegment#setInputRecords() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeBuildJob.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void updateSegmentInfo(String cubeId, SegmentInfo segmentInfo, long sourceRowCount) throws IOException {
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);

    List<CubeSegment> cubeSegments = Lists.newArrayList();
    CubeSegment segment = cubeCopy.getSegmentById(segmentInfo.id());
    segment.setSizeKB(segmentInfo.getAllLayoutSize() / 1024);
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setLastBuildJobID(getParam(MetadataConstants.P_JOB_ID));
    segment.setInputRecords(sourceRowCount);
    segment.setSnapshots(new ConcurrentHashMap<>(segmentInfo.getSnapShot2JavaMap()));
    segment.setCuboidShardNums(cuboidShardNum);
    Map<String, String> additionalInfo = segment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    segment.setAdditionalInfo(additionalInfo);
    cubeSegments.add(segment);
    update.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
    cubeManager.updateCube(update);
}
 
Example 2
Source File: Coordinator.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void promoteNewSegment(CubingJob cubingJob, CubeInstance cubeInstance, CubeSegment cubeSegment)
        throws IOException {
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();
    Map<Integer, String> sourceCheckpoint = streamMetadataStore.getSourceCheckpoint(cubeInstance.getName(),
            cubeSegment.getName());

    ISourcePositionHandler positionOperator = StreamingSourceFactory.getStreamingSource(cubeInstance)
            .getSourcePositionHandler();
    Collection<ISourcePosition> sourcePositions = Collections2.transform(sourceCheckpoint.values(),
            new Function<String, ISourcePosition>() {
                @Nullable
                @Override
                public ISourcePosition apply(@Nullable String input) {
                    return positionOperator.parsePosition(input);
                }
            });
    ISourcePosition sourcePosition = positionOperator.mergePositions(sourcePositions, MergeStrategy.KEEP_SMALL);
    cubeSegment.setLastBuildJobID(cubingJob.getId());
    cubeSegment.setLastBuildTime(System.currentTimeMillis());
    cubeSegment.setSizeKB(cubeSizeBytes / 1024);
    cubeSegment.setInputRecords(sourceCount);
    cubeSegment.setInputRecordsSize(sourceSizeBytes);
    cubeSegment.setStreamSourceCheckpoint(positionOperator.serializePosition(sourcePosition));
    getCubeManager().promoteNewlyBuiltSegments(cubeInstance, cubeSegment);
}
 
Example 3
Source File: ReceiverClusterManager.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/**
 * Promote a segment from realtime part into historical part.
 */
void promoteNewSegment(CubingJob cubingJob, CubeInstance cubeInstance, CubeSegment cubeSegment) throws IOException {
    logger.debug("Try transfer segment's {} state to ready.", cubeSegment.getName());
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();
    Map<Integer, String> sourceCheckpoint = getCoordinator().getStreamMetadataStore()
            .getSourceCheckpoint(cubeInstance.getName(), cubeSegment.getName());

    ISourcePositionHandler positionOperator = StreamingSourceFactory.getStreamingSource(cubeInstance)
            .getSourcePositionHandler();
    Collection<ISourcePosition> sourcePositions = Collections2.transform(sourceCheckpoint.values(),
            new Function<String, ISourcePosition>() {
                @Nullable
                @Override
                public ISourcePosition apply(@Nullable String input) {
                    return positionOperator.parsePosition(input);
                }
            });
    ISourcePosition sourcePosition = positionOperator.mergePositions(sourcePositions,
            ISourcePositionHandler.MergeStrategy.KEEP_SMALL);
    cubeSegment.setLastBuildJobID(cubingJob.getId());
    cubeSegment.setLastBuildTime(System.currentTimeMillis());
    cubeSegment.setSizeKB(cubeSizeBytes / 1024);
    cubeSegment.setInputRecords(sourceCount);
    cubeSegment.setInputRecordsSize(sourceSizeBytes);
    cubeSegment.setStreamSourceCheckpoint(positionOperator.serializePosition(sourcePosition));
    getCoordinator().getCubeManager().promoteNewlyBuiltSegments(cubeInstance, cubeSegment);
}
 
Example 4
Source File: UpdateCubeInfoAfterOptimizeStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment);
    long sourceCount = originalSegment.getInputRecords();
    long sourceSizeBytes = originalSegment.getInputRecordsSize();

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap());

    try {
        cubeManager.promoteNewlyOptimizeSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 5
Source File: UpdateCubeInfoAfterBuildStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()))
            .latestCopyForWrite();
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    KylinConfig config = KylinConfig.getInstanceFromEnv();
    List<Double> cuboidEstimateRatio = cubingJob.findEstimateRatio(segment, config);

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setEstimateRatio(cuboidEstimateRatio);

    try {
        saveExtSnapshotIfNeeded(cubeManager, cube, segment);
        updateSegment(segment);

        cubeManager.promoteNewlyBuiltSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 6
Source File: SegmentPrunerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptySegment() {
    CubeSegment seg = cube.getFirstSegment();
    TblColRef col = cube.getModel().findColumn("CUSTOMER.C_NATION");

    // a normal hit
    TupleFilter f = compare(col, FilterOperatorEnum.EQ, "CHINA");
    SegmentPruner segmentPruner = new SegmentPruner(f);
    Assert.assertTrue(segmentPruner.check(seg));

    // make the segment empty, it should be pruned
    seg.setInputRecords(0);
    Assert.assertFalse(segmentPruner.check(seg));
}
 
Example 7
Source File: Coordinator.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void promoteNewSegment(CubingJob cubingJob, CubeInstance cubeInstance, CubeSegment cubeSegment)
        throws IOException {
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();
    Map<Integer, String> sourceCheckpoint = streamMetadataStore.getSourceCheckpoint(cubeInstance.getName(),
            cubeSegment.getName());

    ISourcePositionHandler positionOperator = StreamingSourceFactory.getStreamingSource(cubeInstance)
            .getSourcePositionHandler();
    Collection<ISourcePosition> sourcePositions = Collections2.transform(sourceCheckpoint.values(),
            new Function<String, ISourcePosition>() {
                @Nullable
                @Override
                public ISourcePosition apply(@Nullable String input) {
                    return positionOperator.parsePosition(input);
                }
            });
    ISourcePosition sourcePosition = positionOperator.mergePositions(sourcePositions, MergeStrategy.KEEP_SMALL);
    cubeSegment.setLastBuildJobID(cubingJob.getId());
    cubeSegment.setLastBuildTime(System.currentTimeMillis());
    cubeSegment.setSizeKB(cubeSizeBytes / 1024);
    cubeSegment.setInputRecords(sourceCount);
    cubeSegment.setInputRecordsSize(sourceSizeBytes);
    cubeSegment.setStreamSourceCheckpoint(positionOperator.serializePosition(sourcePosition));
    getCubeManager().promoteNewlyBuiltSegments(cubeInstance, cubeSegment);
}
 
Example 8
Source File: ReceiverClusterManager.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Promote a segment from realtime part into historical part.
 */
void promoteNewSegment(CubingJob cubingJob, CubeInstance cubeInstance, CubeSegment cubeSegment) throws IOException {
    logger.debug("Try transfer segment's {} state to ready.", cubeSegment.getName());
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();
    Map<Integer, String> sourceCheckpoint = getCoordinator().getStreamMetadataStore()
            .getSourceCheckpoint(cubeInstance.getName(), cubeSegment.getName());

    ISourcePositionHandler positionOperator = StreamingSourceFactory.getStreamingSource(cubeInstance)
            .getSourcePositionHandler();
    Collection<ISourcePosition> sourcePositions = Collections2.transform(sourceCheckpoint.values(),
            new Function<String, ISourcePosition>() {
                @Nullable
                @Override
                public ISourcePosition apply(@Nullable String input) {
                    return positionOperator.parsePosition(input);
                }
            });
    ISourcePosition sourcePosition = positionOperator.mergePositions(sourcePositions,
            ISourcePositionHandler.MergeStrategy.KEEP_SMALL);
    cubeSegment.setLastBuildJobID(cubingJob.getId());
    cubeSegment.setLastBuildTime(System.currentTimeMillis());
    cubeSegment.setSizeKB(cubeSizeBytes / 1024);
    cubeSegment.setInputRecords(sourceCount);
    cubeSegment.setInputRecordsSize(sourceSizeBytes);
    cubeSegment.setStreamSourceCheckpoint(positionOperator.serializePosition(sourcePosition));
    getCoordinator().getCubeManager().promoteNewlyBuiltSegments(cubeInstance, cubeSegment);
}
 
Example 9
Source File: UpdateCubeInfoAfterOptimizeStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment);
    long sourceCount = originalSegment.getInputRecords();
    long sourceSizeBytes = originalSegment.getInputRecordsSize();

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap());

    try {
        cubeManager.promoteNewlyOptimizeSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 10
Source File: UpdateCubeInfoAfterBuildStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()))
            .latestCopyForWrite();
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    KylinConfig config = KylinConfig.getInstanceFromEnv();
    List<Double> cuboidEstimateRatio = cubingJob.findEstimateRatio(segment, config);

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setEstimateRatio(cuboidEstimateRatio);

    try {
        deleteDictionaryIfNeeded(segment);
        saveExtSnapshotIfNeeded(cubeManager, cube, segment);
        updateSegment(segment);

        cubeManager.promoteNewlyBuiltSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 11
Source File: SegmentPrunerTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptySegment() {
    CubeSegment seg = cube.getFirstSegment();
    TblColRef col = cube.getModel().findColumn("CUSTOMER.C_NATION");

    // a normal hit
    TupleFilter f = compare(col, FilterOperatorEnum.EQ, "CHINA");
    SegmentPruner segmentPruner = new SegmentPruner(f);
    Assert.assertTrue(segmentPruner.check(seg));

    // make the segment empty, it should be pruned
    seg.setInputRecords(0);
    Assert.assertFalse(segmentPruner.check(seg));
}
 
Example 12
Source File: UpdateCubeInfoAfterMergeStep.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeInstance cube = cubeManager.getCube(getCubeName());
    
    CubeSegment mergedSegment = cube.getSegmentById(getSegmentId());
    if (mergedSegment == null) {
        return new ExecuteResult(ExecuteResult.State.FAILED, "there is no segment with id:" + getSegmentId());
    }
    String cubeSizeString = executableManager.getOutput(getConvertToHfileStepId()).getExtra().get(ExecutableConstants.HDFS_BYTES_WRITTEN);
    Preconditions.checkState(StringUtils.isNotEmpty(cubeSizeString), "Can't get cube segment size.");
    long cubeSize = Long.parseLong(cubeSizeString) / 1024;

    // collect source statistics
    List<String> mergingSegmentIds = getMergingSegmentIds();
    if (mergingSegmentIds.isEmpty()) {
        return new ExecuteResult(ExecuteResult.State.FAILED, "there are no merging segments");
    }
    long sourceCount = 0L;
    long sourceSize = 0L;
    for (String id : mergingSegmentIds) {
        CubeSegment segment = cube.getSegmentById(id);
        sourceCount += segment.getInputRecords();
        sourceSize += segment.getInputRecordsSize();
    }
    
    // update segment info
    mergedSegment.setSizeKB(cubeSize);
    mergedSegment.setInputRecords(sourceCount);
    mergedSegment.setInputRecordsSize(sourceSize);
    mergedSegment.setLastBuildJobID(getCubingJobId());
    mergedSegment.setLastBuildTime(System.currentTimeMillis());
    
    try {
        cubeManager.promoteNewlyBuiltSegments(cube, mergedSegment);
        return new ExecuteResult(ExecuteResult.State.SUCCEED);
    } catch (IOException e) {
        logger.error("fail to update cube after merge", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}
 
Example 13
Source File: AfterMergeOrRefreshResourceMerger.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(String cubeId, String segmentId, ResourceStore remoteResourceStore, String jobType) {

    CubeManager cubeManager = CubeManager.getInstance(getConfig());
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeUpdate update = new CubeUpdate(cubeInstance.latestCopyForWrite());

    CubeManager distManager = CubeManager.getInstance(remoteResourceStore.getConfig());
    CubeInstance distCube = distManager.getCubeByUuid(cubeId).latestCopyForWrite();

    List<CubeSegment> toUpdateSegments = Lists.newArrayList();

    CubeSegment mergedSegment = distCube.getSegmentById(segmentId);
    mergedSegment.setStatus(SegmentStatusEnum.READY);
    Map<String, String> additionalInfo = mergedSegment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    mergedSegment.setAdditionalInfo(additionalInfo);
    toUpdateSegments.add(mergedSegment);

    List<CubeSegment> toRemoveSegments = getToRemoveSegs(distCube, mergedSegment);
    Collections.sort(toRemoveSegments);
    makeSnapshotForNewSegment(mergedSegment, toRemoveSegments);

    if (String.valueOf(JobTypeEnum.INDEX_MERGE).equals(jobType)) {
        Optional<Long> reduce = toRemoveSegments.stream().map(CubeSegment::getSizeKB).filter(size -> size != -1)
                .reduce(Long::sum);
        Optional<Long> inputRecords = toRemoveSegments.stream().map(CubeSegment::getInputRecords).filter(records -> records != -1)
                .reduce(Long::sum);
        if (reduce.isPresent()) {
            long totalSourceSize = reduce.get();
            mergedSegment.setSizeKB(totalSourceSize);
            mergedSegment.setInputRecords(inputRecords.get());
            mergedSegment.setLastBuildTime(System.currentTimeMillis());
        }
    }

    update.setToRemoveSegs(toRemoveSegments.toArray(new CubeSegment[0]));
    update.setToUpdateSegs(toUpdateSegments.toArray(new CubeSegment[0]));

    try {
        cubeManager.updateCube(update);
    } catch (IOException e) {
        e.printStackTrace();
    }

}
 
Example 14
Source File: UpdateCubeInfoAfterBuildStep.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(getCubeName());
    final CubeSegment segment = cube.getSegmentById(getSegmentId());

    Output baseCuboidOutput = executableManager.getOutput(getBaseCuboidStepId());
    String sourceRecordsCount = baseCuboidOutput.getExtra().get(ExecutableConstants.SOURCE_RECORDS_COUNT);
    Preconditions.checkState(StringUtils.isNotEmpty(sourceRecordsCount), "Can't get cube source record count.");
    long sourceCount = Long.parseLong(sourceRecordsCount);

    String sourceRecordsSize = baseCuboidOutput.getExtra().get(ExecutableConstants.SOURCE_RECORDS_SIZE);
    Preconditions.checkState(StringUtils.isNotEmpty(sourceRecordsSize), "Can't get cube source record size.");
    long sourceSize = Long.parseLong(sourceRecordsSize);

    long size = 0;
    boolean segmentReady = true;
    if (!StringUtils.isBlank(getConvertToHfileStepId())) {
        String cubeSizeString = executableManager.getOutput(getConvertToHfileStepId()).getExtra().get(ExecutableConstants.HDFS_BYTES_WRITTEN);
        Preconditions.checkState(StringUtils.isNotEmpty(cubeSizeString), "Can't get cube segment size.");
        size = Long.parseLong(cubeSizeString) / 1024;
    } else {
        // for the increment & merge case, the increment segment is only built to be merged, won't serve query by itself
        segmentReady = false;
    }

    segment.setLastBuildJobID(getCubingJobId());
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(size);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSize);

    try {
        if (segmentReady) {
            cubeManager.promoteNewlyBuiltSegments(cube, segment);
        } else {
            cubeManager.updateCube(cube);
        }
        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}