Java Code Examples for org.apache.kylin.cube.model.CubeDesc#getAllColumnsNeedDictionaryBuilt()

The following examples show how to use org.apache.kylin.cube.model.CubeDesc#getAllColumnsNeedDictionaryBuilt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MergeDictionaryStep.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

/**
 * For the new segment, we need to create new dimension dictionaries by merging underlying
 * dictionaries. (https://issues.apache.org/jira/browse/KYLIN-2457, https://issues.apache.org/jira/browse/KYLIN-2800)
 * @param cube
 * @param newSeg
 * @throws IOException
 */
private void makeDictForNewSegment(KylinConfig conf, CubeInstance cube, CubeSegment newSeg, List<CubeSegment> mergingSegments) throws IOException {
    DictionaryManager dictMgr = DictionaryManager.getInstance(conf);
    CubeDesc cubeDesc = cube.getDescriptor();

    for (TblColRef col : cubeDesc.getAllColumnsNeedDictionaryBuilt()) {
        logger.info("Merging fact table dictionary on : " + col);
        List<DictionaryInfo> dictInfos = new ArrayList<DictionaryInfo>();
        for (CubeSegment segment : mergingSegments) {
            logger.info("Including fact table dictionary of segment : " + segment);
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                } else {
                    logger.warn("Failed to load DictionaryInfo from " + segment.getDictResPath(col));
                }
            }
        }
        mergeDictionaries(dictMgr, newSeg, dictInfos, col);
    }
}

Example 2

Source File: MergeDictionaryStep.java From kylin with Apache License 2.0

6 votes

/**
 * For the new segment, we need to create new dimension dictionaries by merging underlying
 * dictionaries. (https://issues.apache.org/jira/browse/KYLIN-2457, https://issues.apache.org/jira/browse/KYLIN-2800)
 * @param cube
 * @param newSeg
 * @throws IOException
 */
private void makeDictForNewSegment(KylinConfig conf, CubeInstance cube, CubeSegment newSeg, List<CubeSegment> mergingSegments) throws IOException {
    DictionaryManager dictMgr = DictionaryManager.getInstance(conf);
    CubeDesc cubeDesc = cube.getDescriptor();

    for (TblColRef col : cubeDesc.getAllColumnsNeedDictionaryBuilt()) {
        logger.info("Merging fact table dictionary on : " + col);
        List<DictionaryInfo> dictInfos = new ArrayList<DictionaryInfo>();
        for (CubeSegment segment : mergingSegments) {
            logger.info("Including fact table dictionary of segment : " + segment);
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                } else {
                    logger.warn("Failed to load DictionaryInfo from " + segment.getDictResPath(col));
                }
            }
        }
        mergeDictionaries(dictMgr, newSeg, dictInfos, col);
    }
}

Example 3

Source File: SparkFactDistinct.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void initDictColDeduper(CubeDesc cubeDesc) {
    // setup dict col deduper
    dictColDeduper = new DictColDeduper();
    Set<TblColRef> dictCols = cubeDesc.getAllColumnsNeedDictionaryBuilt();
    for (int i = 0; i < allCols.size(); i++) {
        if (dictCols.contains(allCols.get(i)))
            dictColDeduper.setIsDictCol(i);
    }
}

Example 4

Source File: SparkFactDistinct.java From kylin with Apache License 2.0

5 votes

private void initDictColDeduper(CubeDesc cubeDesc) {
    // setup dict col deduper
    dictColDeduper = new DictColDeduper();
    Set<TblColRef> dictCols = cubeDesc.getAllColumnsNeedDictionaryBuilt();
    for (int i = 0; i < allCols.size(); i++) {
        if (dictCols.contains(allCols.get(i)))
            dictColDeduper.setIsDictCol(i);
    }
}

Example 5

Source File: FactDistinctColumnsReducerMapping.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

private FactDistinctColumnsReducerMapping(CubeInstance cube, int cuboidRowCounterReducerNum) {
    CubeDesc desc = cube.getDescriptor();
    Set<TblColRef> allCols = cube.getAllColumns();
    Set<TblColRef> dictCols = desc.getAllColumnsNeedDictionaryBuilt();
    List<TblColRef> dimCols = desc.listDimensionColumnsExcludingDerived(true);
    for (TblColRef colRef : allCols) {
        if (dictCols.contains(colRef)) {
            allDimDictCols.add(colRef);
        } else if (dimCols.indexOf(colRef) >= 0){
            allDimDictCols.add(colRef);
        }
    }

    colIdToReducerBeginId = new int[allDimDictCols.size() + 1];

    int uhcReducerCount = cube.getConfig().getUHCReducerCount();
    List<TblColRef> uhcList = desc.getAllUHCColumns();
    int counter = 0;
    for (int i = 0; i < allDimDictCols.size(); i++) {
        colIdToReducerBeginId[i] = counter;
        boolean isUHC = uhcList.contains(allDimDictCols.get(i));
        counter += (isUHC) ? uhcReducerCount : 1;
    }
    colIdToReducerBeginId[allDimDictCols.size()] = counter;
    nDimReducers = counter;

    nCuboidRowCounters = cuboidRowCounterReducerNum == 0 ? //
            MapReduceUtil.getCuboidHLLCounterReducerNum(cube) : cuboidRowCounterReducerNum;
    nTotalReducers = nDimReducers + nCuboidRowCounters;

    reducerRolePlay = new int[nTotalReducers];
    for (int i = 0, dictId = 0; i < nTotalReducers; i++) {
        if (i >= nDimReducers) {
            // cuboid HLL counter reducer
            reducerRolePlay[i] = MARK_FOR_HLL_COUNTER;
        } else {
            if (i == colIdToReducerBeginId[dictId + 1])
                dictId++;

            reducerRolePlay[i] = dictId;
        }
    }
}

Example 6

Source File: FactDistinctColumnsReducerMapping.java From kylin with Apache License 2.0

4 votes

private FactDistinctColumnsReducerMapping(CubeInstance cube, int cuboidRowCounterReducerNum) {
    CubeDesc desc = cube.getDescriptor();
    Set<TblColRef> allCols = cube.getAllColumns();
    Set<TblColRef> dictCols = desc.getAllColumnsNeedDictionaryBuilt();
    List<TblColRef> dimCols = desc.listDimensionColumnsExcludingDerived(true);
    for (TblColRef colRef : allCols) {
        if (dictCols.contains(colRef)) {
            allDimDictCols.add(colRef);
        } else if (dimCols.indexOf(colRef) >= 0){
            allDimDictCols.add(colRef);
        }
    }

    colIdToReducerBeginId = new int[allDimDictCols.size() + 1];

    int uhcReducerCount = cube.getConfig().getUHCReducerCount();
    List<TblColRef> uhcList = desc.getAllUHCColumns();
    int counter = 0;
    for (int i = 0; i < allDimDictCols.size(); i++) {
        colIdToReducerBeginId[i] = counter;
        boolean isUHC = uhcList.contains(allDimDictCols.get(i));
        counter += (isUHC) ? uhcReducerCount : 1;
    }
    colIdToReducerBeginId[allDimDictCols.size()] = counter;
    nDimReducers = counter;

    nCuboidRowCounters = cuboidRowCounterReducerNum == 0 ? //
            MapReduceUtil.getCuboidHLLCounterReducerNum(cube) : cuboidRowCounterReducerNum;
    nTotalReducers = nDimReducers + nCuboidRowCounters;

    reducerRolePlay = new int[nTotalReducers];
    for (int i = 0, dictId = 0; i < nTotalReducers; i++) {
        if (i >= nDimReducers) {
            // cuboid HLL counter reducer
            reducerRolePlay[i] = MARK_FOR_HLL_COUNTER;
        } else {
            if (i == colIdToReducerBeginId[dictId + 1])
                dictId++;

            reducerRolePlay[i] = dictId;
        }
    }
}