Java Code Examples for org.apache.kylin.dict.DictionaryInfo#getDictionaryObject()

The following examples show how to use org.apache.kylin.dict.DictionaryInfo#getDictionaryObject() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeManager.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo)
        throws IOException {
    if (dictInfo == null)
        return;

    // work on copy instead of cached objects
    CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy
    CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid());

    Dictionary<?> dict = dictInfo.getDictionaryObject();
    segCopy.putDictResPath(col, dictInfo.getResourcePath());
    segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() });

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    updateCube(update);
}
 
Example 2
Source File: CubeManager.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo)
        throws IOException {
    if (dictInfo == null)
        return;

    // work on copy instead of cached objects
    CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy
    CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid());

    Dictionary<?> dict = dictInfo.getDictionaryObject();
    segCopy.putDictResPath(col, dictInfo.getResourcePath());
    segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() });

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    updateCube(update);
}
 
Example 3
Source File: CubeManager.java    From Kylin with Apache License 2.0 6 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
public Dictionary<?> getDictionary(CubeSegment cubeSeg, TblColRef col) {
    DictionaryInfo info = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();
        // logger.info("Using metadata url " + metadataUrl +
        // " for DictionaryManager");
        String dictResPath = cubeSeg.getDictResPath(col);
        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid cube state; cube segment" + cubeSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col, e);
    }

    return info.getDictionaryObject();
}
 
Example 4
Source File: IIManager.java    From Kylin with Apache License 2.0 6 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
public Dictionary<?> getDictionary(IISegment iiSeg, TblColRef col) {
    DictionaryInfo info = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();
        // logger.info("Using metadata url " + metadataUrl +
        // " for DictionaryManager");
        String dictResPath = iiSeg.getDictResPath(col);
        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath + ", invalid II state; II segment" + iiSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for II segment" + iiSeg + ", col" + col, e);
    }

    return info.getDictionaryObject();
}
 
Example 5
Source File: CubeManager.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
@SuppressWarnings("unchecked")
public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) {
    DictionaryInfo info = null;
    String dictResPath = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();

        //tiretree global domain dic
        List<CubeDescTiretreeGlobalDomainDictUtil.GlobalDict> globalDicts = cubeSeg.getCubeDesc().listDomainDict();
        if (!globalDicts.isEmpty()) {
            dictResPath = CubeDescTiretreeGlobalDomainDictUtil.globalReuseDictPath(cubeSeg.getConfig(), col, cubeSeg.getCubeDesc());
        }

        if (Objects.isNull(dictResPath)){
            dictResPath = cubeSeg.getDictResPath(col);
        }

        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath
                    + ", invalid cube state; cube segment" + cubeSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col,
                e);
    }
    return (Dictionary<String>) info.getDictionaryObject();
}
 
Example 6
Source File: SparkBuildDictionary.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<String, Tuple3<String, Integer, Integer>> call(TblColRef tblColRef) throws Exception {
    if (initialized == false) {
        synchronized (SparkBuildDictionary.class) {
            if (initialized == false) {
                init();
            }
        }
    }

    logger.info("Building dictionary for column {}", tblColRef);
    IReadableTable inpTable = getDistinctValuesFor(tblColRef);
    Dictionary<String> preBuiltDict;
    DictionaryInfo dictInfo;
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(config)) {
        preBuiltDict = getDictionary(tblColRef);

        if (preBuiltDict != null) {
            logger.info("Dict for '{}' has already been built, save it", tblColRef.getName());
            dictInfo = dictManager.saveDictionary(tblColRef, inpTable, preBuiltDict);
        } else {
            logger.info("Dict for '{}' not pre-built, build it from {}", tblColRef.getName(), inpTable);
            String builderClass = cubeSegment.getCubeDesc().getDictionaryBuilderClass(tblColRef);
            dictInfo = dictManager.buildDictionary(tblColRef, inpTable, builderClass);
            preBuiltDict = dictInfo.getDictionaryObject();
        }
    }

    return new Tuple2<>(tblColRef.getIdentity(),
            new Tuple3<>(dictInfo.getResourcePath(), preBuiltDict.getSize(), preBuiltDict.getSizeOfId()));
}
 
Example 7
Source File: CubeManager.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * return null if no dictionary for given column
 */
@SuppressWarnings("unchecked")
public Dictionary<String> getDictionary(CubeSegment cubeSeg, TblColRef col) {
    DictionaryInfo info = null;
    String dictResPath = null;
    try {
        DictionaryManager dictMgr = getDictionaryManager();

        //tiretree global domain dic
        List<CubeDescTiretreeGlobalDomainDictUtil.GlobalDict> globalDicts = cubeSeg.getCubeDesc().listDomainDict();
        if (!globalDicts.isEmpty()) {
            dictResPath = CubeDescTiretreeGlobalDomainDictUtil.globalReuseDictPath(cubeSeg.getConfig(), col, cubeSeg.getCubeDesc());
        }

        if (Objects.isNull(dictResPath)){
            dictResPath = cubeSeg.getDictResPath(col);
        }

        if (dictResPath == null)
            return null;

        info = dictMgr.getDictionaryInfo(dictResPath);
        if (info == null)
            throw new IllegalStateException("No dictionary found by " + dictResPath
                    + ", invalid cube state; cube segment" + cubeSeg + ", col " + col);
    } catch (IOException e) {
        throw new IllegalStateException("Failed to get dictionary for cube segment" + cubeSeg + ", col" + col,
                e);
    }
    return info.getDictionaryObject();
}
 
Example 8
Source File: SparkBuildDictionary.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<String, Tuple3<String, Integer, Integer>> call(TblColRef tblColRef) throws Exception {
    if (initialized == false) {
        synchronized (SparkBuildDictionary.class) {
            if (initialized == false) {
                init();
            }
        }
    }

    logger.info("Building dictionary for column {}", tblColRef);
    IReadableTable inpTable = getDistinctValuesFor(tblColRef);
    Dictionary<String> preBuiltDict;
    DictionaryInfo dictInfo;
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(config)) {
        preBuiltDict = getDictionary(tblColRef);

        if (preBuiltDict != null) {
            logger.info("Dict for '{}' has already been built, save it", tblColRef.getName());
            dictInfo = dictManager.saveDictionary(tblColRef, inpTable, preBuiltDict);
        } else {
            logger.info("Dict for '{}' not pre-built, build it from {}", tblColRef.getName(), inpTable);
            String builderClass = cubeSegment.getCubeDesc().getDictionaryBuilderClass(tblColRef);
            dictInfo = dictManager.buildDictionary(tblColRef, inpTable, builderClass);
            preBuiltDict = dictInfo.getDictionaryObject();
        }
    }

    return new Tuple2<>(tblColRef.getIdentity(),
            new Tuple3<>(dictInfo.getResourcePath(), preBuiltDict.getSize(), preBuiltDict.getSizeOfId()));
}
 
Example 9
Source File: DictionaryManagerTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void touchDictValues(DictionaryInfo info1) {
    Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject();

    HashSet<String> set = new HashSet<String>();
    for (int i = 0, n = info1.getCardinality(); i < n; i++) {
        set.add(dict.getValueFromId(i));
    }
    assertEquals(info1.getCardinality(), set.size());
}
 
Example 10
Source File: ITDictionaryManagerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Test
public void basic() throws Exception {
    dictMgr = DictionaryManager.getInstance(getTestConfig());
    CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig())
            .getCubeDesc("test_kylin_cube_without_slr_desc");
    TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME");

    MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C");

    DictionaryInfo info1 = dictMgr.buildDictionary(col, mockupData.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info1));

    Thread.sleep(1000);

    DictionaryInfo info2 = dictMgr.buildDictionary(col, mockupData.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info2));

    // test check duplicate
    assertEquals(info1.getUuid(), info2.getUuid());
    assertEquals(info1.getResourcePath(), info1.getResourcePath());
    assertNotEquals(info1.getLastModified(), info2.getLastModified());
    assertNotEquals(info1, info2);
    assertEquals(info1.getDictionaryObject(), info2.getDictionaryObject());

    // verify dictionary entries
    @SuppressWarnings("unchecked")
    Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject();
    int id = 0;
    for (String v : mockupData.set) {
        assertEquals(id, dict.getIdFromValue(v, 0));
        assertEquals(v, dict.getValueFromId(id));
        id++;
    }

    // test empty dictionary
    MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider();
    DictionaryInfo info3 = dictMgr.buildDictionary(col, mockupEmpty.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info3));
    assertEquals(0, info3.getCardinality());
    assertEquals(0, info3.getDictionaryObject().getSize());
    System.out.println(info3.getDictionaryObject().getMaxId());
    System.out.println(info3.getDictionaryObject().getMinId());
    System.out.println(info3.getDictionaryObject().getSizeOfId());
}
 
Example 11
Source File: ITDictionaryManagerTest.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Test
public void basic() throws Exception {
    dictMgr = DictionaryManager.getInstance(getTestConfig());
    CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig())
            .getCubeDesc("test_kylin_cube_without_slr_desc");
    TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME");

    MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C");

    DictionaryInfo info1 = dictMgr.buildDictionary(col, mockupData.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info1));

    Thread.sleep(1000);

    DictionaryInfo info2 = dictMgr.buildDictionary(col, mockupData.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info2));

    // test check duplicate
    assertEquals(info1.getUuid(), info2.getUuid());
    assertEquals(info1.getResourcePath(), info1.getResourcePath());
    assertNotEquals(info1.getLastModified(), info2.getLastModified());
    assertNotEquals(info1, info2);
    assertEquals(info1.getDictionaryObject(), info2.getDictionaryObject());

    // verify dictionary entries
    @SuppressWarnings("unchecked")
    Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject();
    int id = 0;
    for (String v : mockupData.set) {
        assertEquals(id, dict.getIdFromValue(v, 0));
        assertEquals(v, dict.getValueFromId(id));
        id++;
    }

    // test empty dictionary
    MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider();
    DictionaryInfo info3 = dictMgr.buildDictionary(col, mockupEmpty.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info3));
    assertEquals(0, info3.getCardinality());
    assertEquals(0, info3.getDictionaryObject().getSize());
    System.out.println(info3.getDictionaryObject().getMaxId());
    System.out.println(info3.getDictionaryObject().getMinId());
    System.out.println(info3.getDictionaryObject().getSizeOfId());
}