Java Code Examples for org.apache.kylin.dict.DictionaryManager#getDictionaryInfo()

The following examples show how to use org.apache.kylin.dict.DictionaryManager#getDictionaryInfo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MergeDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
/**
 * For the new segment, we need to create new dimension dictionaries by merging underlying
 * dictionaries. (https://issues.apache.org/jira/browse/KYLIN-2457, https://issues.apache.org/jira/browse/KYLIN-2800)
 * @param cube
 * @param newSeg
 * @throws IOException
 */
private void makeDictForNewSegment(KylinConfig conf, CubeInstance cube, CubeSegment newSeg, List<CubeSegment> mergingSegments) throws IOException {
    DictionaryManager dictMgr = DictionaryManager.getInstance(conf);
    CubeDesc cubeDesc = cube.getDescriptor();

    for (TblColRef col : cubeDesc.getAllColumnsNeedDictionaryBuilt()) {
        logger.info("Merging fact table dictionary on : " + col);
        List<DictionaryInfo> dictInfos = new ArrayList<DictionaryInfo>();
        for (CubeSegment segment : mergingSegments) {
            logger.info("Including fact table dictionary of segment : " + segment);
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                } else {
                    logger.warn("Failed to load DictionaryInfo from " + segment.getDictResPath(col));
                }
            }
        }
        mergeDictionaries(dictMgr, newSeg, dictInfos, col);
    }
}
 
Example 2
Source File: MergeDictionaryStep.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * For the new segment, we need to create new dimension dictionaries by merging underlying
 * dictionaries. (https://issues.apache.org/jira/browse/KYLIN-2457, https://issues.apache.org/jira/browse/KYLIN-2800)
 * @param cube
 * @param newSeg
 * @throws IOException
 */
private void makeDictForNewSegment(KylinConfig conf, CubeInstance cube, CubeSegment newSeg, List<CubeSegment> mergingSegments) throws IOException {
    DictionaryManager dictMgr = DictionaryManager.getInstance(conf);
    CubeDesc cubeDesc = cube.getDescriptor();

    for (TblColRef col : cubeDesc.getAllColumnsNeedDictionaryBuilt()) {
        logger.info("Merging fact table dictionary on : " + col);
        List<DictionaryInfo> dictInfos = new ArrayList<DictionaryInfo>();
        for (CubeSegment segment : mergingSegments) {
            logger.info("Including fact table dictionary of segment : " + segment);
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                } else {
                    logger.warn("Failed to load DictionaryInfo from " + segment.getDictResPath(col));
                }
            }
        }
        mergeDictionaries(dictMgr, newSeg, dictInfos, col);
    }
}
 
Example 3
Source File: CubeManager.java    From Kylin with Apache License 2.0 6 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
public Dictionary<?> getDictionary(CubeSegment cubeSeg, TblColRef col) {
    DictionaryInfo info = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();
        // logger.info("Using metadata url " + metadataUrl +
        // " for DictionaryManager");
        String dictResPath = cubeSeg.getDictResPath(col);
        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid cube state; cube segment" + cubeSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e);
    }

    return info.getDictionaryObject();
}
 
Example 4
Source File: IIManager.java    From Kylin with Apache License 2.0 6 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
public Dictionary<?> getDictionary(IISegment iiSeg, TblColRef col) {
    DictionaryInfo info = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();
        // logger.info("Using metadata url " + metadataUrl +
        // " for DictionaryManager");
        String dictResPath = iiSeg.getDictResPath(col);
        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid II state; II segment" + iiSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for II segment" + iiSeg + ", col" + col, e);
    }

    return info.getDictionaryObject();
}
 
Example 5
Source File: CubeManager.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
@SuppressWarnings("unchecked")
public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) {
    DictionaryInfo info = null;
    String dictResPath = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();

        //tiretree global domain dic
        List<CubeDescTiretreeGlobalDomainDictUtil.GlobalDict> globalDicts = cubeSeg.getCubeDesc().listDomainDict();
        if (!globalDicts.isEmpty()) {
            dictResPath = CubeDescTiretreeGlobalDomainDictUtil.globalReuseDictPath(cubeSeg.getConfig(), col, cubeSeg.getCubeDesc());
        }

        if (Objects.isNull(dictResPath)){
            dictResPath = cubeSeg.getDictResPath(col);
        }

        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath
                    + ", invalid cube state; cube segment" + cubeSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col,
                e);
    }
    return (Dictionary<String>) info.getDictionaryObject();
}
 
Example 6
Source File: CubeManager.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
@SuppressWarnings("unchecked")
public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) {
    DictionaryInfo info = null;
    String dictResPath = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();

        //tiretree global domain dic
        List<CubeDescTiretreeGlobalDomainDictUtil.GlobalDict> globalDicts = cubeSeg.getCubeDesc().listDomainDict();
        if (!globalDicts.isEmpty()) {
            dictResPath = CubeDescTiretreeGlobalDomainDictUtil.globalReuseDictPath(cubeSeg.getConfig(), col, cubeSeg.getCubeDesc());
        }

        if (Objects.isNull(dictResPath)){
            dictResPath = cubeSeg.getDictResPath(col);
        }

        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath
                    + ", invalid cube state; cube segment" + cubeSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col,
                e);
    }
    return info.getDictionaryObject();
}
 
Example 7
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 8
Source File: UpdateDictionaryStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}