Java Code Examples for org.apache.kylin.cube.CubeSegment#isEnableSharding()

The following examples show how to use org.apache.kylin.cube.CubeSegment#isEnableSharding() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterRecommendCuboidDataMapper.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube = cubeManager.getCube(cubeName);
    CubeSegment optSegment = cube.getSegmentById(segmentID);
    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);

    enableSharding = originalSegment.isEnableSharding();
    baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();

    recommendCuboids = cube.getCuboidsRecommend();
    Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null");
}
 
Example 2
Source File: ConvergeCuboidDataReducer.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    mos = new MultipleOutputs(context);

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment cubeSegment = cube.getSegmentById(segmentID);
    CubeSegment oldSegment = cube.getOriginalSegmentToOptimize(cubeSegment);

    this.enableSharding = oldSegment.isEnableSharding();
    this.baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
}
 
Example 3
Source File: ConvergeCuboidDataUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void setupReducer(Job job, CubeSegment cubeSegment, Path output) throws IOException {
    // Output
    //// prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, output);

    // Reducer
    job.setReducerClass(ConvergeCuboidDataReducer.class);
    job.setPartitionerClass(ConvergeCuboidDataPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Pair<Integer, Integer> numReduceTasks = MapReduceUtil.getConvergeCuboidDataReduceTaskNums(cubeSegment);
    job.setNumReduceTasks(numReduceTasks.getFirst());

    int nBaseReduceTasks = numReduceTasks.getSecond();
    boolean enableSharding = cubeSegment.isEnableSharding();
    long baseCuboidId = cubeSegment.getCuboidScheduler().getBaseCuboidId();
    String partiParams = enableSharding + "," + baseCuboidId + "," + nBaseReduceTasks;
    job.getConfiguration().set(BatchConstants.CFG_CONVERGE_CUBOID_PARTITION_PARAM, partiParams);
}
 
Example 4
Source File: RowKeyEncoder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) {
    super(cubeSeg, cuboid);
    enableSharding = cubeSeg.isEnableSharding();
    headerLength = cubeSeg.getRowKeyPreambleSize();
    Set<TblColRef> shardByColumns = cubeSeg.getCubeDesc().getShardByColumns();
    if (shardByColumns.size() > 1) {
        throw new IllegalStateException("Does not support multiple UHC now");
    }
    colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap());
    for (TblColRef column : cuboid.getColumns()) {
        if (shardByColumns.contains(column)) {
            uhcOffset = bodyLength;
            uhcLength = colIO.getColumnLength(column);
        }
        bodyLength += colIO.getColumnLength(column);
    }
}
 
Example 5
Source File: FilterRecommendCuboidDataMapper.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube = cubeManager.getCube(cubeName);
    CubeSegment optSegment = cube.getSegmentById(segmentID);
    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);

    enableSharding = originalSegment.isEnableSharding();
    baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();

    recommendCuboids = cube.getCuboidsRecommend();
    Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null");
}
 
Example 6
Source File: ConvergeCuboidDataReducer.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    mos = new MultipleOutputs(context);

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment cubeSegment = cube.getSegmentById(segmentID);
    CubeSegment oldSegment = cube.getOriginalSegmentToOptimize(cubeSegment);

    this.enableSharding = oldSegment.isEnableSharding();
    this.baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
}
 
Example 7
Source File: ConvergeCuboidDataUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static void setupReducer(Job job, CubeSegment cubeSegment, Path output) throws IOException {
    // Output
    //// prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, output);

    // Reducer
    job.setReducerClass(ConvergeCuboidDataReducer.class);
    job.setPartitionerClass(ConvergeCuboidDataPartitioner.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Pair<Integer, Integer> numReduceTasks = MapReduceUtil.getConvergeCuboidDataReduceTaskNums(cubeSegment);
    job.setNumReduceTasks(numReduceTasks.getFirst());

    int nBaseReduceTasks = numReduceTasks.getSecond();
    boolean enableSharding = cubeSegment.isEnableSharding();
    long baseCuboidId = cubeSegment.getCuboidScheduler().getBaseCuboidId();
    String partiParams = enableSharding + "," + baseCuboidId + "," + nBaseReduceTasks;
    job.getConfiguration().set(BatchConstants.CFG_CONVERGE_CUBOID_PARTITION_PARAM, partiParams);
}
 
Example 8
Source File: RowKeyEncoder.java    From kylin with Apache License 2.0 6 votes vote down vote up
public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) {
    super(cubeSeg, cuboid);
    enableSharding = cubeSeg.isEnableSharding();
    headerLength = cubeSeg.getRowKeyPreambleSize();
    Set<TblColRef> shardByColumns = cubeSeg.getCubeDesc().getShardByColumns();
    if (shardByColumns.size() > 1) {
        throw new IllegalStateException("Does not support multiple UHC now");
    }
    colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap());
    for (TblColRef column : cuboid.getColumns()) {
        if (shardByColumns.contains(column)) {
            uhcOffset = bodyLength;
            uhcLength = colIO.getColumnLength(column);
        }
        bodyLength += colIO.getColumnLength(column);
    }
}
 
Example 9
Source File: RowKeySplitter.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}
 
Example 10
Source File: RowKeySplitter.java    From kylin with Apache License 2.0 5 votes vote down vote up
public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}
 
Example 11
Source File: CreateHTableJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public static byte[][] getRegionSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap,
        final KylinConfig kylinConfig, final CubeSegment cubeSegment, final Path hfileSplitsOutputFolder)
        throws IOException {

    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    float cut = cubeDesc.getConfig().getKylinHBaseRegionCut();

    logger.info("Cut for HBase region is {} GB", cut);

    double totalSizeInM = 0;
    for (Double cuboidSize : cubeSizeMap.values()) {
        totalSizeInM += cuboidSize;
    }

    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cubeSizeMap.keySet());
    Collections.sort(allCuboids);

    int nRegion = Math.round((float) (totalSizeInM / (cut * 1024L)));
    nRegion = Math.max(kylinConfig.getHBaseRegionCountMin(), nRegion);
    nRegion = Math.min(kylinConfig.getHBaseRegionCountMax(), nRegion);

    if (cubeSegment.isEnableSharding()) {
        //use prime nRegions to help random sharding
        int original = nRegion;
        if (nRegion == 0) {
            nRegion = 1;
        }

        if (nRegion > Short.MAX_VALUE) {
            logger.info("Too many regions! reduce to {}", Short.MAX_VALUE);
            nRegion = Short.MAX_VALUE;
        }

        if (nRegion != original) {
            logger.info("Region count is adjusted from {} to {} to help random sharding", original, nRegion);
        }
    }

    int mbPerRegion = (int) (totalSizeInM / nRegion);
    mbPerRegion = Math.max(1, mbPerRegion);

    logger.info("Total size {} M (estimated)", totalSizeInM);
    logger.info("Expecting {} regions.", nRegion);
    logger.info("Expecting {} MB per region.", mbPerRegion);

    if (cubeSegment.isEnableSharding()) {
        //each cuboid will be split into different number of shards
        HashMap<Long, Short> cuboidShards = Maps.newHashMap();

        //each shard/region may be split into multiple hfiles; array index: region ID, Map: key: cuboidID, value cuboid size in the region
        List<HashMap<Long, Double>> innerRegionSplits = Lists.newArrayList();
        for (int i = 0; i < nRegion; i++) {
            innerRegionSplits.add(new HashMap<Long, Double>());
        }

        double[] regionSizes = new double[nRegion];
        for (long cuboidId : allCuboids) {
            double estimatedSize = cubeSizeMap.get(cuboidId);
            double magic = 23;
            int shardNum = (int) (estimatedSize * magic / mbPerRegion + 1);
            if (shardNum < 1) {
                shardNum = 1;
            }

            if (shardNum > nRegion) {
                logger.debug(String.format(Locale.ROOT,
                        "Cuboid %d 's estimated size %.2f MB will generate %d regions, " + "reduce to %d", cuboidId,
                        estimatedSize, shardNum, nRegion));
                shardNum = nRegion;
            } else {
                logger.debug(
                        String.format(Locale.ROOT, "Cuboid %d 's estimated size %.2f MB will generate %d regions",
                                cuboidId, estimatedSize, shardNum));
            }

            cuboidShards.put(cuboidId, (short) shardNum);
            short startShard = ShardingHash.getShard(cuboidId, nRegion);
            for (short i = startShard; i < startShard + shardNum; ++i) {
                short j = (short) (i % nRegion);
                regionSizes[j] = regionSizes[j] + estimatedSize / shardNum;
                innerRegionSplits.get(j).put(cuboidId, estimatedSize / shardNum);
            }
        }

        for (int i = 0; i < nRegion; ++i) {
            logger.debug("Region {}'s estimated size is {} MB, accounting for {} percent", i, regionSizes[i],
                    100.0 * regionSizes[i] / totalSizeInM);
        }

        CuboidShardUtil.saveCuboidShards(cubeSegment, cuboidShards, nRegion);
        saveHFileSplits(innerRegionSplits, mbPerRegion, hfileSplitsOutputFolder, kylinConfig);
        return getSplitsByRegionCount(nRegion);
    } else {
        throw new IllegalStateException("Not supported");
    }
}
 
Example 12
Source File: CreateHTableJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
public static byte[][] getRegionSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap,
        final KylinConfig kylinConfig, final CubeSegment cubeSegment, final Path hfileSplitsOutputFolder)
        throws IOException {

    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    float cut = cubeDesc.getConfig().getKylinHBaseRegionCut();

    logger.info("Cut for HBase region is {} GB", cut);

    double totalSizeInM = 0;
    for (Double cuboidSize : cubeSizeMap.values()) {
        totalSizeInM += cuboidSize;
    }

    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cubeSizeMap.keySet());
    Collections.sort(allCuboids);

    int nRegion = Math.round((float) (totalSizeInM / (cut * 1024L)));
    nRegion = Math.max(kylinConfig.getHBaseRegionCountMin(), nRegion);
    nRegion = Math.min(kylinConfig.getHBaseRegionCountMax(), nRegion);

    if (cubeSegment.isEnableSharding()) {
        //use prime nRegions to help random sharding
        int original = nRegion;
        if (nRegion == 0) {
            nRegion = 1;
        }

        if (nRegion > Short.MAX_VALUE) {
            logger.info("Too many regions! reduce to {}", Short.MAX_VALUE);
            nRegion = Short.MAX_VALUE;
        }

        if (nRegion != original) {
            logger.info("Region count is adjusted from {} to {} to help random sharding", original, nRegion);
        }
    }

    int mbPerRegion = (int) (totalSizeInM / nRegion);
    mbPerRegion = Math.max(1, mbPerRegion);

    logger.info("Total size {} M (estimated)", totalSizeInM);
    logger.info("Expecting {} regions.", nRegion);
    logger.info("Expecting {} MB per region.", mbPerRegion);

    if (cubeSegment.isEnableSharding()) {
        //each cuboid will be split into different number of shards
        HashMap<Long, Short> cuboidShards = Maps.newHashMap();

        //each shard/region may be split into multiple hfiles; array index: region ID, Map: key: cuboidID, value cuboid size in the region
        List<HashMap<Long, Double>> innerRegionSplits = Lists.newArrayList();
        for (int i = 0; i < nRegion; i++) {
            innerRegionSplits.add(new HashMap<Long, Double>());
        }

        double[] regionSizes = new double[nRegion];
        for (long cuboidId : allCuboids) {
            double estimatedSize = cubeSizeMap.get(cuboidId);
            double magic = 23;
            int shardNum = (int) (estimatedSize * magic / mbPerRegion + 1);
            if (shardNum < 1) {
                shardNum = 1;
            }

            if (shardNum > nRegion) {
                logger.debug(String.format(Locale.ROOT,
                        "Cuboid %d 's estimated size %.2f MB will generate %d regions, " + "reduce to %d", cuboidId,
                        estimatedSize, shardNum, nRegion));
                shardNum = nRegion;
            } else {
                logger.debug(
                        String.format(Locale.ROOT, "Cuboid %d 's estimated size %.2f MB will generate %d regions",
                                cuboidId, estimatedSize, shardNum));
            }

            cuboidShards.put(cuboidId, (short) shardNum);
            short startShard = ShardingHash.getShard(cuboidId, nRegion);
            for (short i = startShard; i < startShard + shardNum; ++i) {
                short j = (short) (i % nRegion);
                regionSizes[j] = regionSizes[j] + estimatedSize / shardNum;
                innerRegionSplits.get(j).put(cuboidId, estimatedSize / shardNum);
            }
        }

        for (int i = 0; i < nRegion; ++i) {
            logger.debug("Region {}'s estimated size is {} MB, accounting for {} percent", i, regionSizes[i],
                    100.0 * regionSizes[i] / totalSizeInM);
        }

        CuboidShardUtil.saveCuboidShards(cubeSegment, cuboidShards, nRegion);
        saveHFileSplits(innerRegionSplits, mbPerRegion, hfileSplitsOutputFolder, kylinConfig);
        return getSplitsByRegionCount(nRegion);
    } else {
        throw new IllegalStateException("Not supported");
    }
}