Java Code Examples for org.apache.kylin.cube.CubeSegment#buildDictionaryMap()

The following examples show how to use org.apache.kylin.cube.CubeSegment#buildDictionaryMap() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkCubingByLayer.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
    KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
        CubeDesc cubeDesc = cubeInstance.getDescriptor();
        CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
        CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
        Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
        baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
    }
}
 
Example 2
Source File: FlinkCubingByLayer.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
    KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
        CubeDesc cubeDesc = cubeInstance.getDescriptor();
        CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
        CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
        Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
        baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
    }
}
 
Example 3
Source File: FlinkCubingByLayer.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
    KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
        CubeDesc cubeDesc = cubeInstance.getDescriptor();
        CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
        CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
        Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
        baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
    }
}
 
Example 4
Source File: FlinkCubingByLayer.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
    KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
        CubeDesc cubeDesc = cubeInstance.getDescriptor();
        CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
        CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
        Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
        baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
    }
}
 
Example 5
Source File: DictionaryGetterUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public static Map<TblColRef, Dictionary<String>> getDictionaryMap(CubeSegment cubeSegment, InputSplit inputSplit,
                                                                  Configuration configuration) throws IOException {
    Map<TblColRef, Dictionary<String>> dictionaryMap = cubeSegment.buildDictionaryMap();

    String shrunkenDictPath = configuration.get(BatchConstants.ARG_SHRUNKEN_DICT_PATH);
    if (shrunkenDictPath == null) {
        return dictionaryMap;
    }

    // replace global dictionary with shrunken dictionary if possible
    String inputSplitSignature = getInputSplitSignature(cubeSegment, inputSplit);
    FileSystem fs = FileSystem.get(configuration);
    ShrunkenDictionary.StringValueSerializer valueSerializer = new ShrunkenDictionary.StringValueSerializer();
    for (TblColRef colRef : cubeSegment.getCubeDesc().getAllGlobalDictColumns()) {
        Path colShrunkenDictDir = new Path(shrunkenDictPath, colRef.getIdentity());
        Path colShrunkenDictPath = new Path(colShrunkenDictDir, inputSplitSignature);
        if (!fs.exists(colShrunkenDictPath)) {
            logger.warn("Shrunken dictionary for column " + colRef.getIdentity() + " in split "
                    + inputSplitSignature + " does not exist!!!");
            continue;
        }
        try (DataInputStream dis = fs.open(colShrunkenDictPath)) {
            Dictionary<String> shrunkenDict = new ShrunkenDictionary(valueSerializer);
            shrunkenDict.readFields(dis);

            dictionaryMap.put(colRef, shrunkenDict);
        }
    }

    return dictionaryMap;
}
 
Example 6
Source File: SparkCubingByLayer.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception {
    if (initialized == false) {
        synchronized (SparkCubingByLayer.class) {
            if (initialized == false) {
                KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
                try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
                        .setAndUnsetThreadLocalConfig(kConfig)) {
                    CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
                    CubeDesc cubeDesc = cubeInstance.getDescriptor();
                    CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
                    CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                            EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
                    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
                    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
                    baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                            AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                            MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
                    initialized = true;
                }
            }
        }
    }
    baseCuboidBuilder.resetAggrs();
    byte[] rowKey = baseCuboidBuilder.buildKey(rowArray);
    Object[] result = baseCuboidBuilder.buildValueObjects(rowArray);
    return new Tuple2<>(new ByteArray(rowKey), result);
}
 
Example 7
Source File: DictionaryGetterUtil.java    From kylin with Apache License 2.0 5 votes vote down vote up
public static Map<TblColRef, Dictionary<String>> getDictionaryMap(CubeSegment cubeSegment, InputSplit inputSplit,
                                                                  Configuration configuration) throws IOException {
    Map<TblColRef, Dictionary<String>> dictionaryMap = cubeSegment.buildDictionaryMap();

    String shrunkenDictPath = configuration.get(BatchConstants.ARG_SHRUNKEN_DICT_PATH);
    if (shrunkenDictPath == null) {
        return dictionaryMap;
    }

    // replace global dictionary with shrunken dictionary if possible
    String inputSplitSignature = getInputSplitSignature(cubeSegment, inputSplit);
    FileSystem fs = FileSystem.get(configuration);
    ShrunkenDictionary.StringValueSerializer valueSerializer = new ShrunkenDictionary.StringValueSerializer();
    for (TblColRef colRef : cubeSegment.getCubeDesc().getAllGlobalDictColumns()) {
        Path colShrunkenDictDir = new Path(shrunkenDictPath, colRef.getIdentity());
        Path colShrunkenDictPath = new Path(colShrunkenDictDir, inputSplitSignature);
        if (!fs.exists(colShrunkenDictPath)) {
            logger.warn("Shrunken dictionary for column " + colRef.getIdentity() + " in split "
                    + inputSplitSignature + " does not exist!!!");
            continue;
        }
        try (DataInputStream dis = fs.open(colShrunkenDictPath)) {
            Dictionary<String> shrunkenDict = new ShrunkenDictionary(valueSerializer);
            shrunkenDict.readFields(dis);

            dictionaryMap.put(colRef, shrunkenDict);
        }
    }

    return dictionaryMap;
}
 
Example 8
Source File: SparkCubingByLayer.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception {
    if (initialized == false) {
        synchronized (SparkCubingByLayer.class) {
            if (initialized == false) {
                KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
                try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
                        .setAndUnsetThreadLocalConfig(kConfig)) {
                    CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
                    CubeDesc cubeDesc = cubeInstance.getDescriptor();
                    CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
                    CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                            EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
                    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
                    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
                    baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                            AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                            MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
                    initialized = true;
                }
            }
        }
    }
    baseCuboidBuilder.resetAggrs();
    byte[] rowKey = baseCuboidBuilder.buildKey(rowArray);
    Object[] result = baseCuboidBuilder.buildValueObjects(rowArray);
    return new Tuple2<>(new ByteArray(rowKey), result);
}