Java Code Examples for org.apache.kylin.cube.CubeInstance#getSegmentById()

The following examples show how to use org.apache.kylin.cube.CubeInstance#getSegmentById() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobService.java    From Kylin with Apache License 2.0 6 votes vote down vote up
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN + " or hasPermission(#job, 'ADMINISTRATION') or hasPermission(#job, 'OPERATION') or hasPermission(#job, 'MANAGEMENT')")
public JobInstance cancelJob(String jobId) throws IOException, JobException {
    //        CubeInstance cube = this.getCubeManager().getCube(job.getRelatedCube());
    //        for (BuildCubeJob cubeJob: listAllCubingJobs(cube.getName(), null, EnumSet.of(ExecutableState.READY, ExecutableState.RUNNING))) {
    //            getExecutableManager().stopJob(cubeJob.getId());
    //        }
    final JobInstance jobInstance = getJobInstance(jobId);
    final String segmentId = jobInstance.getRelatedSegment();
    CubeInstance cubeInstance = getCubeManager().getCube(jobInstance.getRelatedCube());
    final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
    if (segment.getStatus() == SegmentStatusEnum.NEW) {
        cubeInstance.getSegments().remove(segment);
        getCubeManager().updateCube(cubeInstance);
    }
    getExecutableManager().discardJob(jobId);
    return jobInstance;
}
 
Example 2
Source File: CuboidShardUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void saveCuboidShards(CubeSegment segment, Map<Long, Short> cuboidShards, int totalShards) throws IOException {
    CubeManager cubeManager = CubeManager.getInstance(segment.getConfig());

    Map<Long, Short> filtered = Maps.newHashMap();
    for (Map.Entry<Long, Short> entry : cuboidShards.entrySet()) {
        if (entry.getValue() > 1) {
            filtered.put(entry.getKey(), entry.getValue());
        }
    }
    
    // work on copy instead of cached objects
    CubeInstance cubeCopy = segment.getCubeInstance().latestCopyForWrite();
    CubeSegment segCopy = cubeCopy.getSegmentById(segment.getUuid());

    segCopy.setCuboidShardNums(filtered);
    segCopy.setTotalShards(totalShards);

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    cubeManager.updateCube(update);
}
 
Example 3
Source File: JobService.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void cancelCubingJobInner(CubingJob cubingJob) throws IOException {
    CubeInstance cubeInstance = getCubeManager().getCube(CubingExecutableUtil.getCubeName(cubingJob.getParams()));
    // might not a cube job
    final String segmentIds = CubingExecutableUtil.getSegmentId(cubingJob.getParams());
    if (!StringUtils.isEmpty(segmentIds)) {
        for (String segmentId : StringUtils.split(segmentIds)) {
            final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
            if (segment != null
                    && (segment.getStatus() == SegmentStatusEnum.NEW || segment.getTSRange().end.v == 0)) {
                // Remove this segment
                getCubeManager().updateCubeDropSegments(cubeInstance, segment);
            }
        }
    }
    getExecutableManager().discardJob(cubingJob.getId());
}
 
Example 4
Source File: CopyDictionaryStep.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    // --- Copy dictionary
    optimizeSegment.getDictionaries().putAll(oldSegment.getDictionaries());
    optimizeSegment.getSnapshots().putAll(oldSegment.getSnapshots());
    optimizeSegment.getRowkeyStats().addAll(oldSegment.getRowkeyStats());

    try {
        CubeUpdate cubeBuilder = new CubeUpdate(cube);
        cubeBuilder.setToUpdateSegs(optimizeSegment);
        mgr.updateCube(cubeBuilder);
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }

    return new ExecuteResult();
}
 
Example 5
Source File: JobService.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void cancelCubingJobInner(CubingJob cubingJob) throws IOException {
    CubeInstance cubeInstance = getCubeManager().getCube(CubingExecutableUtil.getCubeName(cubingJob.getParams()));
    // might not a cube job
    final String segmentIds = CubingExecutableUtil.getSegmentId(cubingJob.getParams());
    if (!StringUtils.isEmpty(segmentIds)) {
        for (String segmentId : StringUtils.split(segmentIds)) {
            final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
            if (segment != null
                    && (segment.getStatus() == SegmentStatusEnum.NEW || segment.getTSRange().end.v == 0)) {
                // Remove this segment
                getCubeManager().updateCubeDropSegments(cubeInstance, segment);
            }
        }
    }
    getExecutableManager().discardJob(cubingJob.getId());
}
 
Example 6
Source File: SparkExecutable.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void updateSparkDimensionDicMetadata(KylinConfig config, CubeInstance cube, String segmentId)
        throws IOException {
    KylinConfig hdfsConfig = AbstractHadoopJob
            .loadKylinConfigFromHdfs(this.getParam(SparkBuildDictionary.OPTION_META_URL.getOpt()));
    CubeInstance cubeInstance = CubeManager.getInstance(hdfsConfig).reloadCube(cube.getName());
    CubeSegment segment = cubeInstance.getSegmentById(segmentId);

    CubeSegment oldSeg = cube.getSegmentById(segmentId);
    oldSeg.setDictionaries((ConcurrentHashMap<String, String>) segment.getDictionaries());
    oldSeg.setSnapshots((ConcurrentHashMap) segment.getSnapshots());
    oldSeg.getRowkeyStats().addAll(segment.getRowkeyStats());
    CubeInstance cubeCopy = cube.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(oldSeg);
    CubeManager.getInstance(config).updateCube(update);

    Set<String> dumpList = new LinkedHashSet<>();
    dumpList.addAll(segment.getDictionaryPaths());
    dumpList.addAll(segment.getSnapshotPaths());

    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segment.getConfig(),
            config.getMetadataUrl().toString());
}
 
Example 7
Source File: NDCuboidMapper.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());

    cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);
    String cuboidModeName = context.getConfiguration().get(BatchConstants.CFG_CUBOID_MODE);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeDesc = cube.getDescriptor();
    cubeSegment = cube.getSegmentById(segmentID);
    ndCuboidBuilder = new NDCuboidBuilder(cubeSegment);
    // initialize CubiodScheduler
    cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSegment, cuboidModeName);
    rowKeySplitter = new RowKeySplitter(cubeSegment);
}
 
Example 8
Source File: FlinkCubingByLayer.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
    KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
        CubeDesc cubeDesc = cubeInstance.getDescriptor();
        CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
        CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
        Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
        baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
    }
}
 
Example 9
Source File: DictionaryGeneratorCLI.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void processSegment(KylinConfig config, String cubeName, String segmentID, String uuid,
        DistinctColumnValuesProvider factTableValueProvider, DictionaryProvider dictProvider) throws IOException {
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment segment = cube.getSegmentById(segmentID);

    int retryTime = 0;
    while (retryTime < 3) {
        if (retryTime > 0) {
            logger.info("Rebuild dictionary and snapshot for Cube: {}, Segment: {}, {} times.", cubeName, segmentID,
                    retryTime);
        }

        processSegment(config, segment, uuid, factTableValueProvider, dictProvider);

        if (isAllDictsAndSnapshotsReady(config, cubeName, segmentID)) {
            break;
        }
        retryTime++;
    }

    if (retryTime >= 3) {
        logger.error("Not all dictionaries and snapshots ready for cube segment: {}", segmentID);
    } else {
        logger.info("Succeed to build all dictionaries and snapshots for cube segment: {}", segmentID);
    }
}
 
Example 10
Source File: CubeBuildJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void updateSegmentSourceBytesSize(String cubeId, Map<String, Object> toUpdateSegmentSourceSize)
        throws IOException {
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    List<CubeSegment> cubeSegments = Lists.newArrayList();
    for (Map.Entry<String, Object> entry : toUpdateSegmentSourceSize.entrySet()) {
        CubeSegment segment = cubeCopy.getSegmentById(entry.getKey());
        segment.setInputRecordsSize((Long) entry.getValue());
        segment.setLastBuildTime(System.currentTimeMillis());
        cubeSegments.add(segment);
    }
    update.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
    cubeManager.updateCube(update);
}
 
Example 11
Source File: SparkCubingByLayer.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception {
    if (initialized == false) {
        synchronized (SparkCubingByLayer.class) {
            if (initialized == false) {
                KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
                try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
                        .setAndUnsetThreadLocalConfig(kConfig)) {
                    CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
                    CubeDesc cubeDesc = cubeInstance.getDescriptor();
                    CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
                    CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                            EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
                    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
                    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
                    baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                            AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                            MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
                    initialized = true;
                }
            }
        }
    }
    baseCuboidBuilder.resetAggrs();
    byte[] rowKey = baseCuboidBuilder.buildKey(rowArray);
    Object[] result = baseCuboidBuilder.buildValueObjects(rowArray);
    return new Tuple2<>(new ByteArray(rowKey), result);
}
 
Example 12
Source File: UpdateCubeInfoAfterBuildStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()))
            .latestCopyForWrite();
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    KylinConfig config = KylinConfig.getInstanceFromEnv();
    List<Double> cuboidEstimateRatio = cubingJob.findEstimateRatio(segment, config);

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setEstimateRatio(cuboidEstimateRatio);

    try {
        deleteDictionaryIfNeeded(segment);
        saveExtSnapshotIfNeeded(cubeManager, cube, segment);
        updateSegment(segment);

        cubeManager.promoteNewlyBuiltSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 13
Source File: SparkCubingMerge.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void init() {
    this.kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    final CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cube.getDescName());
    final CubeSegment sourceSeg = cube.getSegmentById(sourceSegmentId);
    final CubeSegment mergedSeg = cube.getSegmentById(mergedSegmentId);
    this.segmentReEncoder = new SegmentReEncoder(cubeDesc, sourceSeg, mergedSeg, kylinConfig);
}
 
Example 14
Source File: CubeMergeJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void mergeSegments(String cubeId, String segmentId) throws IOException {
    CubeManager mgr = CubeManager.getInstance(config);
    CubeInstance cube = mgr.getCubeByUuid(cubeId);
    CubeSegment mergedSeg = cube.getSegmentById(segmentId);
    SegmentInfo mergedSegInfo = ManagerHub.getSegmentInfo(config, getParam(MetadataConstants.P_CUBE_ID), mergedSeg.getUuid());

    Map<Long, DFLayoutMergeAssist> mergeCuboidsAssist = generateMergeAssist(mergingSegInfos, ss);
    for (DFLayoutMergeAssist assist : mergeCuboidsAssist.values()) {
        SpanningTree spanningTree = new ForestSpanningTree(JavaConversions.asJavaCollection(mergedSegInfo.toBuildLayouts()));
        Dataset<Row> afterMerge = assist.merge(config, cube.getName());
        LayoutEntity layout = assist.getLayout();

        Dataset<Row> afterSort;
        if (layout.isTableIndex()) {
            afterSort = afterMerge.sortWithinPartitions(NSparkCubingUtil.getColumns(layout.getOrderedDimensions().keySet()));
        } else {
            Column[] dimsCols = NSparkCubingUtil.getColumns(layout.getOrderedDimensions().keySet());
            Dataset<Row> afterAgg = CuboidAggregator.agg(ss, afterMerge, layout.getOrderedDimensions().keySet(),
                    layout.getOrderedMeasures(), spanningTree, false);
            afterSort = afterAgg.sortWithinPartitions(dimsCols);
        }
        buildLayoutWithUpdate.submit(new BuildLayoutWithUpdate.JobEntity() {
            @Override
            public String getName() {
                return "merge-layout-" + layout.getId();
            }

            @Override
            public LayoutEntity build() throws IOException {
                return saveAndUpdateCuboid(afterSort, mergedSegInfo, layout, assist);
            }
        }, config);

        buildLayoutWithUpdate.updateLayout(mergedSegInfo, config);
    }
}
 
Example 15
Source File: SparkBuildDictionary.java    From kylin with Apache License 2.0 5 votes vote down vote up
private String buildSnapshotTable(KylinConfig config, CubeSegment cubeSeg, String lookupTable, String uuid) throws IOException{
    CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy
    CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid());

    TableMetadataManager metaMgr = TableMetadataManager.getInstance(config);
    SnapshotManager snapshotMgr = SnapshotManager.getInstance(config);

    TableDesc tableDesc = new TableDesc(metaMgr.getTableDesc(lookupTable, segCopy.getProject()));
    IReadableTable hiveTable = SourceManager.createReadableTable(tableDesc, uuid);
    SnapshotTable snapshot = snapshotMgr.buildSnapshot(hiveTable, tableDesc, cubeSeg.getConfig());
    return snapshot.getResourcePath();
}
 
Example 16
Source File: JobStepFactory.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public static NSparkExecutable addStep(DefaultChainedExecutable parent, JobStepType type,
        CubeInstance cube) {
    NSparkExecutable step;
    KylinConfig config = cube.getConfig();
    switch (type) {
    case RESOURCE_DETECT:
        step = new NResourceDetectStep(parent);
        break;
    case CUBING:
        step = new NSparkCubingStep(config.getSparkBuildClassName());
        break;
    case MERGING:
        step = new NSparkMergingStep(config.getSparkMergeClassName());
        break;
    case CLEAN_UP_AFTER_MERGE:
        step = new NSparkUpdateMetaAndCleanupAfterMergeStep();
        break;
    default:
        throw new IllegalArgumentException();
    }

    step.setParams(parent.getParams());
    step.setProject(parent.getProject());
    step.setTargetSubject(parent.getTargetSubject());
    if (step instanceof NSparkUpdateMetaAndCleanupAfterMergeStep) {
        CubeSegment mergeSegment = cube.getSegmentById(parent.getTargetSegments().iterator().next());
        final Segments<CubeSegment> mergingSegments = cube.getMergingSegments(mergeSegment);
        step.setParam(MetadataConstants.P_SEGMENT_NAMES,
                String.join(",", NSparkCubingUtil.toSegmentNames(mergingSegments)));
        step.setParam(CubingExecutableUtil.SEGMENT_ID, parent.getParam(CubingExecutableUtil.SEGMENT_ID));
        step.setParam(MetadataConstants.P_JOB_TYPE, parent.getParam(MetadataConstants.P_JOB_TYPE));
        step.setParam(MetadataConstants.P_OUTPUT_META_URL, parent.getParam(MetadataConstants.P_OUTPUT_META_URL));
    }
    parent.addTask(step);
    //after addTask, step's id is changed
    step.setDistMetaUrl(config.getJobTmpMetaStoreUrl(parent.getProject(), step.getId()));
    return step;
}
 
Example 17
Source File: CalculateStatsFromBaseCuboidMapper.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    Configuration conf = context.getConfiguration();
    HadoopUtil.setCurrentConfiguration(conf);
    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment cubeSegment = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID));

    baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
    nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;

    String cuboidModeName = conf.get(BatchConstants.CFG_CUBOID_MODE);
    Set<Long> cuboidIdSet = cube.getCuboidsByMode(cuboidModeName);

    cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
    allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);

    samplingPercentage = Integer
            .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));

    allCuboidsHLL = new HLLCounter[cuboidIds.length];
    for (int i = 0; i < cuboidIds.length; i++) {
        allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
    }

    //for KYLIN-2518 backward compatibility
    if (KylinVersion.isBefore200(cubeDesc.getVersion())) {
        isUsePutRowKeyToHllNewAlgorithm = false;
        hf = Hashing.murmur3_32();
        logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", cubeDesc.getVersion());
    } else {
        isUsePutRowKeyToHllNewAlgorithm = true;
        rowHashCodesLong = new long[nRowKey];
        hf = Hashing.murmur3_128();
        logger.info(
                "Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518",
                cubeDesc.getVersion());
    }

    rowKeyDecoder = new RowKeyDecoder(cubeSegment);
}
 
Example 18
Source File: JobService.java    From kylin with Apache License 2.0 4 votes vote down vote up
public JobInstance submitRecoverSegmentOptimizeJob(CubeSegment segment, String submitter)
        throws IOException, JobException {
    CubeInstance cubeInstance = segment.getCubeInstance();

    checkCubeDescSignature(cubeInstance);

    String cubeName = cubeInstance.getName();
    List<JobInstance> jobInstanceList = searchJobsByCubeName(cubeName, null,
            Lists.newArrayList(JobStatusEnum.NEW, JobStatusEnum.PENDING, JobStatusEnum.ERROR),
            JobTimeFilterEnum.ALL, JobSearchMode.CHECKPOINT_ONLY);
    if (jobInstanceList.size() > 1) {
        throw new IllegalStateException("Exist more than one CheckpointExecutable for cube " + cubeName);
    } else if (jobInstanceList.size() == 0) {
        throw new IllegalStateException("There's no CheckpointExecutable for cube " + cubeName);
    }
    CheckpointExecutable checkpointExecutable = (CheckpointExecutable) getExecutableManager()
            .getJob(jobInstanceList.get(0).getId());

    AbstractExecutable toBeReplaced = null;
    for (AbstractExecutable taskForCheck : checkpointExecutable.getSubTasksForCheck()) {
        if (taskForCheck instanceof CubingJob) {
            CubingJob subCubingJob = (CubingJob) taskForCheck;
            String segmentName = CubingExecutableUtil.getSegmentName(subCubingJob.getParams());
            if (segmentName != null && segmentName.equals(segment.getName())) {
                String segmentID = CubingExecutableUtil.getSegmentId(subCubingJob.getParams());
                CubeSegment beingOptimizedSegment = cubeInstance.getSegmentById(segmentID);
                if (beingOptimizedSegment != null) { // beingOptimizedSegment exists & should not be recovered
                    throw new IllegalStateException("Segment " + beingOptimizedSegment.getName() + "-"
                            + beingOptimizedSegment.getUuid()
                            + " still exists. Please delete it or discard the related optimize job first!!!");
                }
                toBeReplaced = taskForCheck;
                break;
            }
        }
    }
    if (toBeReplaced == null) {
        throw new IllegalStateException("There's no CubingJob for segment " + segment.getName()
                + " in CheckpointExecutable " + checkpointExecutable.getName());
    }

    /** Add CubingJob for the related segment **/
    CubeSegment optimizeSegment = getCubeManager().appendSegment(cubeInstance, segment.getTSRange());

    DefaultChainedExecutable optimizeJob = EngineFactory.createBatchOptimizeJob(optimizeSegment, submitter);

    getExecutableManager().addJob(optimizeJob);

    JobInstance optimizeJobInstance = getSingleJobInstance(optimizeJob);

    /** Update the checkpoint job */
    checkpointExecutable.getSubTasksForCheck().set(checkpointExecutable.getSubTasksForCheck().indexOf(toBeReplaced),
            optimizeJob);

    getExecutableManager().updateCheckpointJob(checkpointExecutable.getId(),
            checkpointExecutable.getSubTasksForCheck());

    return optimizeJobInstance;
}
 
Example 19
Source File: CalculateStatsFromBaseCuboidJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_STATISTICS_SAMPLING_PERCENT);
        options.addOption(OPTION_CUBOID_MODE);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String statistics_sampling_percent = getOptionValue(OPTION_STATISTICS_SAMPLING_PERCENT);
        String cuboidMode = getOptionValue(OPTION_CUBOID_MODE);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment cubeSegment = cube.getSegmentById(segmentID);

        job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidMode);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        job.getConfiguration().set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, statistics_sampling_percent);
        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        setupMapper(input);
        setupReducer(output, cubeSegment);

        attachSegmentMetadataWithDict(cubeSegment, job.getConfiguration());

        return waitForCompletion(job);

    } catch (Exception e) {
        logger.error("error in CalculateStatsFromBaseCuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 20
Source File: CuboidJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    if (this.mapperClass == null)
        throw new Exception("Mapper class is not set!");

    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_NCUBOID_LEVEL);
        options.addOption(OPTION_CUBING_JOB_ID);
        options.addOption(OPTION_CUBOID_MODE);
        options.addOption(OPTION_DICTIONARY_SHRUNKEN_PATH);
        parseOptions(options, args);

        String output = getOptionValue(OPTION_OUTPUT_PATH);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        int nCuboidLevel = Integer.parseInt(getOptionValue(OPTION_NCUBOID_LEVEL));
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        String cubingJobId = getOptionValue(OPTION_CUBING_JOB_ID);
        String cuboidModeName = getOptionValue(OPTION_CUBOID_MODE);
        if (cuboidModeName == null) {
            cuboidModeName = CuboidModeEnum.CURRENT.toString();
        }

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegmentById(segmentID);

        cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(segment, cuboidModeName);

        if (checkSkip(cubingJobId, nCuboidLevel)) {
            logger.info(
                    "Skip job " + getOptionValue(OPTION_JOB_NAME) + " for " + segmentID + "[" + segmentID + "]");
            return 0;
        }

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, cubingJobId);
        String shrunkenDictPath = getOptionValue(OPTION_DICTIONARY_SHRUNKEN_PATH);
        if (shrunkenDictPath != null) {
            job.getConfiguration().set(BatchConstants.ARG_SHRUNKEN_DICT_PATH, shrunkenDictPath);
        }
        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        // add metadata to distributed cache
        attachSegmentMetadataWithAll(segment, job.getConfiguration());

        // Mapper
        job.setMapperClass(this.mapperClass);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others

        // Reducer
        job.setReducerClass(CuboidReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // set input
        configureMapperInputFormat(segment);

        // set output
        IMROutput2.IMROutputFormat outputFormat = MRUtil.getBatchCubingOutputSide2(segment).getOutputFormat();
        outputFormat.configureJobOutput(job, output, segment, cuboidScheduler, nCuboidLevel);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        job.getConfiguration().setInt(BatchConstants.CFG_CUBE_CUBOID_LEVEL, nCuboidLevel);
        job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidModeName);

        return waitForCompletion(job);
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}