Java Code Examples for org.apache.kylin.cube.CubeSegment#putDictResPath()

The following examples show how to use org.apache.kylin.cube.CubeSegment#putDictResPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubingUtils.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment,
        Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) {
    Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap();

    for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) {
        final TblColRef tblColRef = entry.getKey();
        final Dictionary<String> dictionary = entry.getValue();
        IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset));
        signature.setSize(endOffset - startOffset);
        DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
        logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
        DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
        try {
            DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo);
            cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath());
            realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject());
        } catch (IOException e) {
            throw new RuntimeException("error save dictionary for column:" + tblColRef, e);
        }
    }

    return realDictMap;
}
 
Example 2
Source File: CubingUtils.java    From kylin with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment,
        Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) {
    Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap();

    for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) {
        final TblColRef tblColRef = entry.getKey();
        final Dictionary<String> dictionary = entry.getValue();
        IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset));
        signature.setSize(endOffset - startOffset);
        DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
        logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
        DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
        try {
            DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo);
            cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath());
            realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject());
        } catch (IOException e) {
            throw new RuntimeException("error save dictionary for column:" + tblColRef, e);
        }
    }

    return realDictMap;
}
 
Example 3
Source File: MergeDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private DictionaryInfo mergeDictionaries(DictionaryManager dictMgr, CubeSegment cubeSeg, List<DictionaryInfo> dicts, TblColRef col) throws IOException {
    DictionaryInfo dictInfo = dictMgr.mergeDictionary(dicts);
    if (dictInfo != null)
        cubeSeg.putDictResPath(col, dictInfo.getResourcePath());

    return dictInfo;
}
 
Example 4
Source File: MergeDictionaryStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
private DictionaryInfo mergeDictionaries(DictionaryManager dictMgr, CubeSegment cubeSeg, List<DictionaryInfo> dicts, TblColRef col) throws IOException {
    DictionaryInfo dictInfo = dictMgr.mergeDictionary(dicts);
    if (dictInfo != null)
        cubeSeg.putDictResPath(col, dictInfo.getResourcePath());

    return dictInfo;
}
 
Example 5
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 6
Source File: UpdateDictionaryStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 7
Source File: MergeDictionaryStep.java    From Kylin with Apache License 2.0 4 votes vote down vote up
private DictionaryInfo mergeDictionaries(DictionaryManager dictMgr, CubeSegment cubeSeg, List<DictionaryInfo> dicts, TblColRef col) throws IOException {
    DictionaryInfo dictInfo = dictMgr.mergeDictionary(dicts);
    cubeSeg.putDictResPath(col, dictInfo.getResourcePath());

    return dictInfo;
}
 
Example 8
Source File: MergeCuboidMapperTest.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {

    createTestMetadata();

    logger.info("The metadataUrl is : " + getTestConfig());

    MetadataManager.clearCache();
    CubeManager.clearCache();
    ProjectManager.clearCache();
    DictionaryManager.clearCache();

    // hack for distributed cache
    // CubeManager.removeInstance(KylinConfig.createInstanceFromUri("../job/meta"));//to
    // make sure the following mapper could get latest CubeManger
    FileUtils.deleteDirectory(new File("../job/meta"));

    MergeCuboidMapper mapper = new MergeCuboidMapper();
    mapDriver = MapDriver.newMapDriver(mapper);

    cubeManager = CubeManager.getInstance(getTestConfig());
    cube = cubeManager.getCube("test_kylin_cube_without_slr_left_join_ready_2_segments");
    dictionaryManager = DictionaryManager.getInstance(getTestConfig());
    lfn = cube.getDescriptor().findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME");
    lsi = cube.getDescriptor().findColumnRef("DEFAULT.TEST_KYLIN_FACT", "CAL_DT");
    ssc = cube.getDescriptor().findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME");

    DictionaryInfo sharedDict = makeSharedDict();

    boolean isFirstSegment = true;
    for (CubeSegment segment : cube.getSegments()) {

        TableSignature signature = new TableSignature();
        signature.setSize(100);
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath("fake_dict_for" + lfn.getName() + segment.getName());

        DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumn().getName(), lfn.getColumn().getZeroBasedIndex(), "string", signature, "");

        List<byte[]> values = new ArrayList<byte[]>();
        values.add(new byte[] { 97, 97, 97 });
        if (isFirstSegment)
            values.add(new byte[] { 99, 99, 99 });
        else
            values.add(new byte[] { 98, 98, 98 });
        Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values);
        dictionaryManager.trySaveNewDict(dict, newDictInfo);
        ((TrieDictionary) dict).dump(System.out);

        segment.putDictResPath(lfn, newDictInfo.getResourcePath());
        segment.putDictResPath(lsi, sharedDict.getResourcePath());
        segment.putDictResPath(ssc, sharedDict.getResourcePath());

        // cubeManager.saveResource(segment.getCubeInstance());
        // cubeManager.afterCubeUpdated(segment.getCubeInstance());
        cubeManager.updateCube(cube);

        isFirstSegment = false;
    }

}