Java Code Examples for org.apache.kylin.cube.CubeInstance#latestCopyForWrite()

The following examples show how to use org.apache.kylin.cube.CubeInstance#latestCopyForWrite() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StatisticsDecisionUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
    if (isAbleToOptimizeCubingPlan(segment)) {
        logger.info("It's able to trigger cuboid planner algorithm.");
    } else {
        return;
    }

    Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
    if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
        return;
    }

    CubeInstance cube = segment.getCubeInstance();
    CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
    update.setCuboids(recommendCuboidsWithStats);
    CubeManager.getInstance(cube.getConfig()).updateCube(update);
}
 
Example 2
Source File: SparkExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void updateSparkDimensionDicMetadata(KylinConfig config, CubeInstance cube, String segmentId)
        throws IOException {
    KylinConfig hdfsConfig = AbstractHadoopJob
            .loadKylinConfigFromHdfs(this.getParam(SparkBuildDictionary.OPTION_META_URL.getOpt()));
    CubeInstance cubeInstance = CubeManager.getInstance(hdfsConfig).reloadCube(cube.getName());
    CubeSegment segment = cubeInstance.getSegmentById(segmentId);

    CubeSegment oldSeg = cube.getSegmentById(segmentId);
    oldSeg.setDictionaries((ConcurrentHashMap<String, String>) segment.getDictionaries());
    oldSeg.setSnapshots((ConcurrentHashMap) segment.getSnapshots());
    oldSeg.getRowkeyStats().addAll(segment.getRowkeyStats());
    CubeInstance cubeCopy = cube.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(oldSeg);
    CubeManager.getInstance(config).updateCube(update);

    Set<String> dumpList = new LinkedHashSet<>();
    dumpList.addAll(segment.getDictionaryPaths());
    dumpList.addAll(segment.getSnapshotPaths());

    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segment.getConfig(),
            config.getMetadataUrl().toString());
}
 
Example 3
Source File: CubeBuildJob.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void updateSegmentInfo(String cubeId, SegmentInfo segmentInfo, long sourceRowCount) throws IOException {
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);

    List<CubeSegment> cubeSegments = Lists.newArrayList();
    CubeSegment segment = cubeCopy.getSegmentById(segmentInfo.id());
    segment.setSizeKB(segmentInfo.getAllLayoutSize() / 1024);
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setLastBuildJobID(getParam(MetadataConstants.P_JOB_ID));
    segment.setInputRecords(sourceRowCount);
    segment.setSnapshots(new ConcurrentHashMap<>(segmentInfo.getSnapShot2JavaMap()));
    segment.setCuboidShardNums(cuboidShardNum);
    Map<String, String> additionalInfo = segment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    segment.setAdditionalInfo(additionalInfo);
    cubeSegments.add(segment);
    update.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
    cubeManager.updateCube(update);
}
 
Example 4
Source File: StatisticsDecisionUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
    if (isAbleToOptimizeCubingPlan(segment)) {
        logger.info("It's able to trigger cuboid planner algorithm.");
    } else {
        return;
    }

    Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
    if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
        return;
    }

    CubeInstance cube = segment.getCubeInstance();
    CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
    update.setCuboids(recommendCuboidsWithStats);
    CubeManager.getInstance(cube.getConfig()).updateCube(update);
}
 
Example 5
Source File: SparkExecutable.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void updateSparkDimensionDicMetadata(KylinConfig config, CubeInstance cube, String segmentId)
        throws IOException {
    KylinConfig hdfsConfig = AbstractHadoopJob
            .loadKylinConfigFromHdfs(this.getParam(SparkBuildDictionary.OPTION_META_URL.getOpt()));
    CubeInstance cubeInstance = CubeManager.getInstance(hdfsConfig).reloadCube(cube.getName());
    CubeSegment segment = cubeInstance.getSegmentById(segmentId);

    CubeSegment oldSeg = cube.getSegmentById(segmentId);
    oldSeg.setDictionaries((ConcurrentHashMap<String, String>) segment.getDictionaries());
    oldSeg.setSnapshots((ConcurrentHashMap) segment.getSnapshots());
    oldSeg.getRowkeyStats().addAll(segment.getRowkeyStats());
    CubeInstance cubeCopy = cube.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(oldSeg);
    CubeManager.getInstance(config).updateCube(update);

    Set<String> dumpList = new LinkedHashSet<>();
    dumpList.addAll(segment.getDictionaryPaths());
    dumpList.addAll(segment.getSnapshotPaths());

    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segment.getConfig(),
            config.getMetadataUrl().toString());
}
 
Example 6
Source File: MergeDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    KylinConfig conf = cube.getConfig();

    Collections.sort(mergingSegments);

    try {
        checkLookupSnapshotsMustIncremental(mergingSegments);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());
        
        makeDictForNewSegment(conf, cubeCopy, newSegCopy, mergingSegments);
        makeSnapshotForNewSegment(cubeCopy, newSegCopy, mergingSegments);

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        mgr.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 7
Source File: CheckpointExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void onExecuteFinished(ExecuteResult result, ExecutableContext executableContext) {
    super.onExecuteFinished(result, executableContext);
    if (!isDiscarded() && result.succeed()) {
        List<? extends Executable> jobs = getTasks();
        boolean allSucceed = true;
        for (Executable task : jobs) {
            final ExecutableState status = task.getStatus();
            if (status != ExecutableState.SUCCEED) {
                allSucceed = false;
            }
        }
        if (allSucceed) {
            // Add last optimization time
            CubeManager cubeManager = CubeManager.getInstance(executableContext.getConfig());
            CubeInstance cube = cubeManager.getCube(getCubeName());
            CubeInstance copyForWrite = cube.latestCopyForWrite();
            try {
                copyForWrite.setCuboidLastOptimized(getEndTime());
                CubeUpdate cubeUpdate = new CubeUpdate(copyForWrite);
                cubeManager.updateCube(cubeUpdate);
            } catch (IOException e) {
                logger.error("Failed to update last optimized for " + getCubeName(), e);
            }
        }
    }
}
 
Example 8
Source File: SignatureCalculatorTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private CubeInstance cloneCubeInstance(CubeManager cubeManager, CubeInstance cube, String name) throws IOException {
    CubeInstance cubeClone = cubeManager.createCube(name, projectName, cube.getDescriptor(), cube.getOwner());
    CubeUpdate cubeUpdate = new CubeUpdate(cubeClone.latestCopyForWrite());
    cubeUpdate.setToAddSegs(cube.getSegments().toArray(new CubeSegment[cube.getSegments().size()]));
    cubeUpdate.setStatus(RealizationStatusEnum.READY);
    return cubeManager.updateCube(cubeUpdate);
}
 
Example 9
Source File: CubeBuildJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void updateSegmentSourceBytesSize(String cubeId, Map<String, Object> toUpdateSegmentSourceSize)
        throws IOException {
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    List<CubeSegment> cubeSegments = Lists.newArrayList();
    for (Map.Entry<String, Object> entry : toUpdateSegmentSourceSize.entrySet()) {
        CubeSegment segment = cubeCopy.getSegmentById(entry.getKey());
        segment.setInputRecordsSize((Long) entry.getValue());
        segment.setLastBuildTime(System.currentTimeMillis());
        cubeSegments.add(segment);
    }
    update.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
    cubeManager.updateCube(update);
}
 
Example 10
Source File: MergeDictionaryStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    KylinConfig conf = cube.getConfig();

    Collections.sort(mergingSegments);

    try {
        checkLookupSnapshotsMustIncremental(mergingSegments);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());
        
        makeDictForNewSegment(conf, cubeCopy, newSegCopy, mergingSegments);
        makeSnapshotForNewSegment(cubeCopy, newSegCopy, mergingSegments);

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        mgr.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 11
Source File: CheckpointExecutable.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected void onExecuteFinished(ExecuteResult result, ExecutableContext executableContext) {
    super.onExecuteFinished(result, executableContext);
    if (!isDiscarded() && result.succeed()) {
        List<? extends Executable> jobs = getTasks();
        boolean allSucceed = true;
        for (Executable task : jobs) {
            final ExecutableState status = task.getStatus();
            if (status != ExecutableState.SUCCEED) {
                allSucceed = false;
            }
        }
        if (allSucceed) {
            // Add last optimization time
            CubeManager cubeManager = CubeManager.getInstance(executableContext.getConfig());
            CubeInstance cube = cubeManager.getCube(getCubeName());
            CubeInstance copyForWrite = cube.latestCopyForWrite();
            try {
                copyForWrite.setCuboidLastOptimized(getEndTime());
                CubeUpdate cubeUpdate = new CubeUpdate(copyForWrite);
                cubeManager.updateCube(cubeUpdate);
            } catch (IOException e) {
                logger.error("Failed to update last optimized for " + getCubeName(), e);
            }
        }
    }
}
 
Example 12
Source File: SignatureCalculatorTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
private CubeInstance cloneCubeInstance(CubeManager cubeManager, CubeInstance cube, String name) throws IOException {
    CubeInstance cubeClone = cubeManager.createCube(name, projectName, cube.getDescriptor(), cube.getOwner());
    CubeUpdate cubeUpdate = new CubeUpdate(cubeClone.latestCopyForWrite());
    cubeUpdate.setToAddSegs(cube.getSegments().toArray(new CubeSegment[cube.getSegments().size()]));
    cubeUpdate.setStatus(RealizationStatusEnum.READY);
    return cubeManager.updateCube(cubeUpdate);
}
 
Example 13
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 14
Source File: AfterMergeOrRefreshResourceMerger.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public void merge(String cubeId, String segmentId, ResourceStore remoteResourceStore, String jobType) {

    CubeManager cubeManager = CubeManager.getInstance(getConfig());
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeUpdate update = new CubeUpdate(cubeInstance.latestCopyForWrite());

    CubeManager distManager = CubeManager.getInstance(remoteResourceStore.getConfig());
    CubeInstance distCube = distManager.getCubeByUuid(cubeId).latestCopyForWrite();

    List<CubeSegment> toUpdateSegments = Lists.newArrayList();

    CubeSegment mergedSegment = distCube.getSegmentById(segmentId);
    mergedSegment.setStatus(SegmentStatusEnum.READY);
    Map<String, String> additionalInfo = mergedSegment.getAdditionalInfo();
    additionalInfo.put("storageType", "" + IStorageAware.ID_PARQUET);
    mergedSegment.setAdditionalInfo(additionalInfo);
    toUpdateSegments.add(mergedSegment);

    List<CubeSegment> toRemoveSegments = getToRemoveSegs(distCube, mergedSegment);
    Collections.sort(toRemoveSegments);
    makeSnapshotForNewSegment(mergedSegment, toRemoveSegments);

    if (String.valueOf(JobTypeEnum.INDEX_MERGE).equals(jobType)) {
        Optional<Long> reduce = toRemoveSegments.stream().map(CubeSegment::getSizeKB).filter(size -> size != -1)
                .reduce(Long::sum);
        Optional<Long> inputRecords = toRemoveSegments.stream().map(CubeSegment::getInputRecords).filter(records -> records != -1)
                .reduce(Long::sum);
        if (reduce.isPresent()) {
            long totalSourceSize = reduce.get();
            mergedSegment.setSizeKB(totalSourceSize);
            mergedSegment.setInputRecords(inputRecords.get());
            mergedSegment.setLastBuildTime(System.currentTimeMillis());
        }
    }

    update.setToRemoveSegs(toRemoveSegments.toArray(new CubeSegment[0]));
    update.setToUpdateSegs(toUpdateSegments.toArray(new CubeSegment[0]));

    try {
        cubeManager.updateCube(update);
    } catch (IOException e) {
        e.printStackTrace();
    }

}
 
Example 15
Source File: JobStepFactoryTest.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Test
public void testAddStepInMerging() throws Exception {
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(CUBE_NAME);

    cleanupSegments(CUBE_NAME);
    /**
     * Round1. Add 2 segment
     */
    CubeSegment segment1 = cubeMgr.appendSegment(cube, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2013-01-01")));
    CubeSegment segment2 = cubeMgr.appendSegment(cube, new SegmentRange.TSRange(dateToLong("2013-01-01"), dateToLong("2015-01-01")));
    segment1.setStatus(SegmentStatusEnum.READY);
    segment2.setStatus(SegmentStatusEnum.READY);

    CubeInstance reloadCube = cube.latestCopyForWrite();
    Segments segments = new Segments();
    segments.add(segment1);
    segments.add(segment2);
    reloadCube.setSegments(segments);
    CubeUpdate update = new CubeUpdate(reloadCube);
    cubeMgr.updateCube(update);

    /**
     * Round2. Merge two segments
     */

    reloadCube = cubeMgr.reloadCube(CUBE_NAME);
    CubeSegment mergedSegment = cubeMgr.mergeSegments(reloadCube, new SegmentRange.TSRange(dateToLong("2010-01-01"), dateToLong("2015-01-01"))
            , null, true);
    NSparkMergingJob job = NSparkMergingJob.merge(mergedSegment, "ADMIN");
    Assert.assertEquals(CUBE_NAME, job.getParam(MetadataConstants.P_CUBE_NAME));

    NSparkExecutable resourceDetectStep = job.getResourceDetectStep();
    Assert.assertEquals(ResourceDetectBeforeMergingJob.class.getName(),
            resourceDetectStep.getSparkSubmitClassName());
    Assert.assertEquals(ExecutableConstants.STEP_NAME_DETECT_RESOURCE, resourceDetectStep.getName());
    job.getParams().forEach((key, value) -> Assert.assertEquals(value, resourceDetectStep.getParam(key)));
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), resourceDetectStep.getId()).toString(),
            resourceDetectStep.getDistMetaUrl());

    NSparkExecutable mergeStep = job.getSparkMergingStep();
    Assert.assertEquals(config.getSparkMergeClassName(), mergeStep.getSparkSubmitClassName());
    Assert.assertEquals(ExecutableConstants.STEP_NAME_MERGER_SPARK_SEGMENT, mergeStep.getName());
    job.getParams().forEach((key, value) -> Assert.assertEquals(value, mergeStep.getParam(key)));
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), mergeStep.getId()).toString(),
            mergeStep.getDistMetaUrl());

    CubeInstance cubeInstance = cubeMgr.reloadCube(CUBE_NAME);
    NSparkUpdateMetaAndCleanupAfterMergeStep cleanStep = job.getCleanUpAfterMergeStep();
    job.getParams().forEach((key, value) -> {
        if (key.equalsIgnoreCase(MetadataConstants.P_SEGMENT_IDS)) {
            final List<String> needDeleteSegmentNames = cubeInstance.getMergingSegments(mergedSegment).stream()
                    .map(CubeSegment::getName).collect(Collectors.toList());
            Assert.assertEquals(needDeleteSegmentNames, Arrays.asList(cleanStep.getParam(MetadataConstants.P_SEGMENT_NAMES).split(",")));
        } else {
            Assert.assertEquals(value, mergeStep.getParam(key));
        }
    });
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), cleanStep.getId()).toString(),
            cleanStep.getDistMetaUrl());
}
 
Example 16
Source File: UpdateDictionaryStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}