Java Code Examples for org.apache.kylin.cube.CubeSegment

The following examples show how to use org.apache.kylin.cube.CubeSegment. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: kylin   Source File: KafkaFlatTableJob.java    License: Apache License 2.0 6 votes vote down vote up
private void setupMapper(CubeSegment cubeSeg) throws IOException {
    // set the segment's offset info to job conf
    Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
    Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();

    Integer minPartition = Collections.min(offsetStart.keySet());
    Integer maxPartition = Collections.max(offsetStart.keySet());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());

    for(Integer partition: offsetStart.keySet()) {
        job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
        job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
    }

    job.setMapperClass(KafkaFlatTableMapper.class);
    job.setInputFormatClass(KafkaInputFormat.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
}
 
Example 2
Source Project: Kylin   Source File: JobService.java    License: Apache License 2.0 6 votes vote down vote up
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN + " or hasPermission(#job, 'ADMINISTRATION') or hasPermission(#job, 'OPERATION') or hasPermission(#job, 'MANAGEMENT')")
public JobInstance cancelJob(String jobId) throws IOException, JobException {
    //        CubeInstance cube = this.getCubeManager().getCube(job.getRelatedCube());
    //        for (BuildCubeJob cubeJob: listAllCubingJobs(cube.getName(), null, EnumSet.of(ExecutableState.READY, ExecutableState.RUNNING))) {
    //            getExecutableManager().stopJob(cubeJob.getId());
    //        }
    final JobInstance jobInstance = getJobInstance(jobId);
    final String segmentId = jobInstance.getRelatedSegment();
    CubeInstance cubeInstance = getCubeManager().getCube(jobInstance.getRelatedCube());
    final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
    if (segment.getStatus() == SegmentStatusEnum.NEW) {
        cubeInstance.getSegments().remove(segment);
        getCubeManager().updateCube(cubeInstance);
    }
    getExecutableManager().discardJob(jobId);
    return jobInstance;
}
 
Example 3
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example 4
Source Project: kylin   Source File: CuboidStatsReaderUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static Pair<Map<Long, Long>, Long> readCuboidStatsWithSourceFromSegment(Set<Long> cuboidIds,
        CubeSegment cubeSegment) throws IOException {
    if (cubeSegment == null) {
        logger.warn("The cube segment can not be " + null);
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, null, cubeSegment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }

    Map<Long, Long> cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL();
    Map<Long, Long> cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size());
    for (Long cuboid : cuboidIds) {
        Long rowEstimate = cuboidsWithStatsAll.get(cuboid);
        if (rowEstimate == null) {
            logger.warn("Cannot get the row count stats for cuboid " + cuboid);
        } else {
            cuboidsWithStats.put(cuboid, rowEstimate);
        }
    }
    return new Pair<>(cuboidsWithStats, cubeStatsReader.sourceRowCount);
}
 
Example 5
public ExecutableState buildCuboid(String cubeName, SegmentRange.TSRange tsRange) throws Exception {
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(cubeName);
    ExecutableManager execMgr = ExecutableManager.getInstance(config);
    DataModelManager.getInstance(config).getModels();
    // ready cube, segment, cuboid layout
    CubeSegment oneSeg = cubeMgr.appendSegment(cube, tsRange);
    NSparkCubingJob job = NSparkCubingJob.create(Sets.newHashSet(oneSeg), "ADMIN");
    NSparkCubingStep sparkStep = job.getSparkCubingStep();
    StorageURL distMetaUrl = StorageURL.valueOf(sparkStep.getDistMetaUrl());
    Assert.assertEquals("hdfs", distMetaUrl.getScheme());
    Assert.assertTrue(distMetaUrl.getParameter("path").startsWith(config.getHdfsWorkingDirectory()));

    // launch the job
    execMgr.addJob(job);

    return wait(job);
}
 
Example 6
Source Project: kylin-on-parquet-v2   Source File: Coordinator.java    License: Apache License 2.0 6 votes vote down vote up
private boolean isInOptimize(CubeInstance cube) {
    Segments<CubeSegment> readyPendingSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
    if (readyPendingSegments.size() > 0) {
        logger.info("The cube {} has READY_PENDING segments {}. It's not allowed for building", cube.getName(),
                readyPendingSegments);
        return true;
    }
    Segments<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
    for (CubeSegment newSegment : newSegments) {
        String jobId = newSegment.getLastBuildJobID();
        if (jobId == null) {
            continue;
        }
        AbstractExecutable job = getExecutableManager().getJob(jobId);
        if (job != null && job instanceof CubingJob) {
            CubingJob cubingJob = (CubingJob) job;
            if (CubingJob.CubingJobTypeEnum.OPTIMIZE.toString().equals(cubingJob.getJobType())) {
                logger.info(
                        "The cube {} is in optimization. It's not allowed to build new segments during optimization.",
                        cube.getName());
                return true;
            }
        }
    }
    return false;
}
 
Example 7
Source Project: kylin   Source File: StatisticsDecisionUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
    CubeInstance cube = segment.getCubeInstance();
    if (!cube.getConfig().isCubePlannerEnabled())
        return false;

    if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
        logger.info("Has read pending segments and will not enable cube planner.");
        return false;
    }
    List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
    List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
    if (newSegments.size() <= 1 && //
            (readySegments.size() == 0 || //
                    (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
                            && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
        return true;
    } else {
        return false;
    }
}
 
Example 8
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example 9
Source Project: kylin-on-parquet-v2   Source File: SparkExecutable.java    License: Apache License 2.0 6 votes vote down vote up
private void attachSegmentsMetadataWithDict(List<CubeSegment> segments) throws IOException {
    Set<String> dumpList = new LinkedHashSet<>(
            JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance()));
    ResourceStore rs = ResourceStore.getStore(segments.get(0).getConfig());
    for (CubeSegment segment : segments) {
        dumpList.addAll(segment.getDictionaryPaths());
        if (rs.exists(segment.getStatisticsResourcePath())) {
            // cube statistics is not available for new segment
            dumpList.add(segment.getStatisticsResourcePath());
        }
        //tiretree global domain dic
        CubeDescTiretreeGlobalDomainDictUtil.cuboidJob(segment.getCubeDesc(), dumpList);
    }
    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(),
            this.getParam(SparkCubingByLayer.OPTION_META_URL.getOpt()));
}
 
Example 10
Source Project: Kylin   Source File: SerializedHBaseTupleIterator.java    License: Apache License 2.0 6 votes vote down vote up
public SerializedHBaseTupleIterator(HConnection conn, List<HBaseKeyRange> segmentKeyRanges, CubeInstance cube, Collection<TblColRef> dimensions, TupleFilter filter, Collection<TblColRef> groupBy, Collection<RowValueDecoder> rowValueDecoders, StorageContext context) {

        this.context = context;
        int limit = context.getLimit();
        this.partialResultLimit = Math.max(limit, PARTIAL_DEFAULT_LIMIT);

        this.segmentIteratorList = new ArrayList<CubeSegmentTupleIterator>(segmentKeyRanges.size());
        Map<CubeSegment, List<HBaseKeyRange>> rangesMap = makeRangesMap(segmentKeyRanges);
        for (Map.Entry<CubeSegment, List<HBaseKeyRange>> entry : rangesMap.entrySet()) {
            CubeSegmentTupleIterator segIter = new CubeSegmentTupleIterator(entry.getKey(), entry.getValue(), conn, dimensions, filter, groupBy, rowValueDecoders, context);
            this.segmentIteratorList.add(segIter);
        }

        this.segmentIteratorIterator = this.segmentIteratorList.iterator();
        if (this.segmentIteratorIterator.hasNext()) {
            this.segmentIterator = this.segmentIteratorIterator.next();
        } else {
            this.segmentIterator = ITupleIterator.EMPTY_TUPLE_ITERATOR;
        }
    }
 
Example 11
Source Project: kylin   Source File: CubeController.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get SQL of a Cube segment
 *
 * @param cubeName    Cube Name
 * @param segmentName Segment Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = {
        "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {

    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment " + segmentName);
    }

    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example 12
Source Project: kylin-on-parquet-v2   Source File: JoinedFlatTable.java    License: Apache License 2.0 6 votes vote down vote up
public static String generateInsertDataStatement(IJoinedFlatTableDesc flatDesc) {
    CubeSegment segment = ((CubeSegment) flatDesc.getSegment());
    KylinConfig kylinConfig;
    if (null == segment) {
        kylinConfig = KylinConfig.getInstanceFromEnv();
    } else {
        kylinConfig = (flatDesc.getSegment()).getConfig();
    }

    if (kylinConfig.isAdvancedFlatTableUsed()) {
        try {
            Class advancedFlatTable = Class.forName(kylinConfig.getAdvancedFlatTableClass());
            Method method = advancedFlatTable.getMethod("generateInsertDataStatement", IJoinedFlatTableDesc.class,
                    JobEngineConfig.class);
            return (String) method.invoke(null, flatDesc);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    return "INSERT OVERWRITE TABLE " + quoteIdentifier(flatDesc.getTableName(), null) + " " + generateSelectDataStatement(flatDesc)
            + ";\n";
}
 
Example 13
Source Project: kylin   Source File: HybridCubeCLI.java    License: Apache License 2.0 6 votes vote down vote up
private void checkSegmentOffset(List<RealizationEntry> realizationEntries) {
    List<SegmentRange> segmentRanges = Lists.newArrayList();

    for (RealizationEntry entry : realizationEntries) {
        if (entry.getType() != RealizationType.CUBE) {
            throw new IllegalArgumentException("Wrong realization type: " + entry.getType() + ", only cube supported. ");
        }

        CubeInstance cubeInstance = cubeManager.getCube(entry.getRealization());
        Segments<CubeSegment> segments = cubeInstance.getSegments();

        for (CubeSegment segment : segments) {
            segmentRanges.add(segment.getSegRange());
        }
    }

    if (segmentRanges.size() >= 2) {
        Collections.sort(segmentRanges);

        for (int i = 0; i < segmentRanges.size() - 1; i++) {
            if (segmentRanges.get(i).overlaps(segmentRanges.get(i + 1))) {
                throw new IllegalArgumentException("Segments has overlap, could not hybrid. First Segment Range: [" + segmentRanges.get(i).start.v + "," + segmentRanges.get(i).end.v + "], Second Segment Range: [" + segmentRanges.get(i + 1).start.v + "," + segmentRanges.get(i + 1).end.v + "]");
            }
        }
    }
}
 
Example 14
Source Project: kylin-on-parquet-v2   Source File: CubeService.java    License: Apache License 2.0 6 votes vote down vote up
private void cleanSegmentStorage(List<CubeSegment> toRemoveSegs) throws IOException {
    if (!KylinConfig.getInstanceFromEnv().cleanStorageAfterDelOperation()) {
        return;
    }

    if (toRemoveSegs != null && !toRemoveSegs.isEmpty()) {
        List<String> toDropHTables = Lists.newArrayListWithCapacity(toRemoveSegs.size());
        List<String> toDelHDFSPaths = Lists.newArrayListWithCapacity(toRemoveSegs.size());
        for (CubeSegment seg : toRemoveSegs) {
            toDropHTables.add(seg.getStorageLocationIdentifier());
            toDelHDFSPaths.add(JobBuilderSupport.getJobWorkingDir(seg.getConfig().getHdfsWorkingDirectory(),
                    seg.getLastBuildJobID()));
        }

        StorageCleanUtil.dropHTables(new HBaseAdmin(HBaseConnection.getCurrentHBaseConfiguration()), toDropHTables);
        StorageCleanUtil.deleteHDFSPath(HadoopUtil.getWorkingFileSystem(), toDelHDFSPaths);
    }
}
 
Example 15
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
    if (isAbleToOptimizeCubingPlan(segment)) {
        logger.info("It's able to trigger cuboid planner algorithm.");
    } else {
        return;
    }

    Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
    if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
        return;
    }

    CubeInstance cube = segment.getCubeInstance();
    CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
    update.setCuboids(recommendCuboidsWithStats);
    CubeManager.getInstance(cube.getConfig()).updateCube(update);
}
 
Example 16
Source Project: Kylin   Source File: CubingJobBuilder.java    License: Apache License 2.0 5 votes vote down vote up
private MapReduceExecutable createRangeRowkeyDistributionStep(CubeSegment seg, String inputPath) {
    MapReduceExecutable rowkeyDistributionStep = new MapReduceExecutable();
    rowkeyDistributionStep.setName(ExecutableConstants.STEP_NAME_GET_CUBOID_KEY_DISTRIBUTION);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd, seg);
    appendExecCmdParameters(cmd, "input", inputPath);
    appendExecCmdParameters(cmd, "output", getRowkeyDistributionOutputPath(seg));
    appendExecCmdParameters(cmd, "cubename", seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, "jobname", "Kylin_Region_Splits_Calculator_" + seg.getCubeInstance().getName() + "_Step");

    rowkeyDistributionStep.setMapReduceParams(cmd.toString());
    rowkeyDistributionStep.setMapReduceJobClass(RangeKeyDistributionJob.class);
    return rowkeyDistributionStep;
}
 
Example 17
Source Project: kylin   Source File: CubeStatsReader.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Read statistics from
 * @param path
 * rather than
 * @param cubeSegment
 *
 * Since the statistics are from
 * @param path
 * cuboid scheduler should be provided by default
 */
public CubeStatsReader(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, KylinConfig kylinConfig, Path path)
        throws IOException {
    CubeStatsResult cubeStatsResult = new CubeStatsResult(path, kylinConfig.getCubeStatsHLLPrecision());

    this.seg = cubeSegment;
    this.cuboidScheduler = cuboidScheduler;
    this.samplingPercentage = cubeStatsResult.getPercentage();
    this.mapperNumberOfFirstBuild = cubeStatsResult.getMapperNumber();
    this.mapperOverlapRatioOfFirstBuild = cubeStatsResult.getMapperOverlapRatio();
    this.cuboidRowEstimatesHLL = cubeStatsResult.getCounterMap();
    this.sourceRowCount = cubeStatsResult.getSourceRecordCount();
}
 
Example 18
Source Project: kylin   Source File: BatchMergeJobBuilder2.java    License: Apache License 2.0 5 votes vote down vote up
public CubingJob build() {
    logger.info("MR_V2 new job to MERGE segment " + seg);

    final CubeSegment cubeSegment = seg;
    final CubingJob result = CubingJob.createMergeJob(cubeSegment, submitter, config);
    final String jobId = result.getId();

    final List<CubeSegment> mergingSegments = cubeSegment.getCubeInstance().getMergingSegments(cubeSegment);
    Preconditions.checkState(mergingSegments.size() > 1, "there should be more than 2 segments to merge, target segment " + cubeSegment);
    final List<String> mergingSegmentIds = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingSegmentIds.add(merging.getUuid());
    }

    // Phase 1: Merge Dictionary
    inputSide.addStepPhase1_MergeDictionary(result);
    result.addTask(createMergeDictionaryStep(cubeSegment, jobId, mergingSegmentIds));
    result.addTask(createUpdateDictionaryStep(cubeSegment, jobId, mergingSegmentIds));
    outputSide.addStepPhase1_MergeDictionary(result);

    // Phase 2: Merge Cube Files
    outputSide.addStepPhase2_BuildCube(seg, mergingSegments, result);

    // Phase 3: Update Metadata & Cleanup
    result.addTask(createUpdateCubeInfoAfterMergeStep(mergingSegmentIds, jobId));
    outputSide.addStepPhase3_Cleanup(result);

    return result;
}
 
Example 19
Source Project: Kylin   Source File: CubingJobBuilder.java    License: Apache License 2.0 5 votes vote down vote up
AbstractExecutable addHTableSteps(CubeSegment seg, String cuboidRootPath, CubingJob result) {
    final String jobId = result.getId();
    final String cuboidPath = cuboidRootPath + "*";
    
    result.addTask(createRangeRowkeyDistributionStep(seg, cuboidPath));
    // create htable step
    result.addTask(createCreateHTableStep(seg));
    // generate hfiles step
    final MapReduceExecutable convertCuboidToHfileStep = createConvertCuboidToHfileStep(seg, cuboidPath, jobId);
    result.addTask(convertCuboidToHfileStep);
    // bulk load step
    result.addTask(createBulkLoadStep(seg, jobId));
    
    return convertCuboidToHfileStep;
}
 
Example 20
Source Project: kylin-on-parquet-v2   Source File: AbstractHadoopJob.java    License: Apache License 2.0 5 votes vote down vote up
protected void attachCubeMetadataWithDict(CubeInstance cube, Configuration conf) throws IOException {
    Set<String> dumpList = new LinkedHashSet<>(collectCubeMetadata(cube));
    for (CubeSegment segment : cube.getSegments()) {
        dumpList.addAll(segment.getDictionaryPaths());
    }
    dumpKylinPropsAndMetadata(cube.getProject(), dumpList, cube.getConfig(), conf);
}
 
Example 21
Source Project: kylin   Source File: BuildCubeWithEngine.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unused")
private void checkHFilesInHBase(CubeSegment segment) throws IOException {
    try (Connection conn = HBaseConnection.get(KylinConfig.getInstanceFromEnv().getStorageUrl())) {
        String tableName = segment.getStorageLocationIdentifier();

        HBaseRegionSizeCalculator cal = new HBaseRegionSizeCalculator(tableName, conn);
        Map<byte[], Long> sizeMap = cal.getRegionSizeMap();
        long totalSize = 0;
        for (Long size : sizeMap.values()) {
            totalSize += size;
        }
        if (totalSize == 0) {
            return;
        }
        Map<byte[], Pair<Integer, Integer>> countMap = cal.getRegionHFileCountMap();
        // check if there's region contains more than one hfile, which means the hfile config take effects
        boolean hasMultiHFileRegions = false;
        for (Pair<Integer, Integer> count : countMap.values()) {
            // check if hfile count is greater than store count
            if (count.getSecond() > count.getFirst()) {
                hasMultiHFileRegions = true;
                break;
            }
        }
        if (KylinConfig.getInstanceFromEnv().getHBaseHFileSizeGB() == 0 && hasMultiHFileRegions) {
            throw new IOException("hfile size set to 0, but found region contains more than one hfiles");
        } else if (KylinConfig.getInstanceFromEnv().getHBaseHFileSizeGB() > 0 && !hasMultiHFileRegions) {
            throw new IOException("hfile size set greater than 0, but all regions still has only one hfile");
        }
    }
}
 
Example 22
Source Project: Kylin   Source File: MergeCuboidMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
    public void test() throws IOException, ParseException {

        String cubeName = "test_kylin_cube_without_slr_left_join_ready_2_segments";

        CubeSegment newSeg = cubeManager.mergeSegments(cube, 0L, 1386835200000L);
        String segmentName = newSeg.getName();

        final Dictionary<?> dictionary = cubeManager.getDictionary(newSeg, lfn);
        assertTrue(dictionary == null);
//        ((TrieDictionary) dictionary).dump(System.out);

        // hack for distributed cache
//        File metaDir = new File("../job/meta");
//        FileUtils.copyDirectory(new File(getTestConfig().getMetadataUrl()), metaDir);
//
//        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
//        mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
//        // mapDriver.getConfiguration().set(KylinConfig.KYLIN_METADATA_URL,
//        // "../job/meta");
//
//        byte[] key = new byte[] { 0, 0, 0, 0, 0, 0, 0, -92, 1, 1, 1 };
//        byte[] value = new byte[] { 1, 2, 3 };
//        byte[] newkey = new byte[] { 0, 0, 0, 0, 0, 0, 0, -92, 1, 1, 2 };
//        byte[] newvalue = new byte[] { 1, 2, 3 };
//
//        mapDriver.withInput(new Text(key), new Text(value));
//        mapDriver.withOutput(new Text(newkey), new Text(newvalue));
//        mapDriver.setMapInputPath(new Path("/apps/hdmi-prod/b_kylin/prod/kylin-f24668f6-dcff-4cb6-a89b-77f1119df8fa/vac_sw_cube_v4/cuboid/15d_cuboid"));
//
//        mapDriver.runTest();
    }
 
Example 23
Source Project: kylin   Source File: CoprocessorRowType.java    License: Apache License 2.0 5 votes vote down vote up
public static CoprocessorRowType fromCuboid(CubeSegment seg, Cuboid cuboid) {
    List<TblColRef> colList = cuboid.getColumns();
    TblColRef[] cols = colList.toArray(new TblColRef[colList.size()]);
    RowKeyColumnIO colIO = new RowKeyColumnIO(seg.getDimensionEncodingMap());
    int[] colSizes = new int[cols.length];
    for (int i = 0; i < cols.length; i++) {
        colSizes[i] = colIO.getColumnLength(cols[i]);
    }
    return new CoprocessorRowType(cols, colSizes, seg.getRowKeyPreambleSize());
}
 
Example 24
public void purgeSegment(String segmentName) {
    Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentName);
    StreamingCubeSegment segment = activeSegments.remove(segmentRange.getFirst());
    if (segment == null) {
        segment = immutableSegments.remove(segmentRange.getFirst());
    }
    segmentSourceStartPositions.remove(segmentRange.getFirst());
    if (segment != null) {
        segment.purge();
    }
}
 
Example 25
Source Project: kylin-on-parquet-v2   Source File: FlinkExecutable.java    License: Apache License 2.0 5 votes vote down vote up
private void attachSegmentsMetadataWithDict(List<CubeSegment> segments) throws IOException {
    Set<String> dumpList = new LinkedHashSet<>();
    dumpList.addAll(JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance()));
    ResourceStore rs = ResourceStore.getStore(segments.get(0).getConfig());
    for (CubeSegment segment : segments) {
        dumpList.addAll(segment.getDictionaryPaths());
        if (rs.exists(segment.getStatisticsResourcePath())) {
            // cube statistics is not available for new segment
            dumpList.add(segment.getStatisticsResourcePath());
        }
    }

    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(),
            this.getParam(FlinkCubingByLayer.OPTION_META_URL.getOpt()));
}
 
Example 26
Source Project: kylin-on-parquet-v2   Source File: Coordinator.java    License: Apache License 2.0 5 votes vote down vote up
private boolean triggerSegmentBuild(String cubeName, String segmentName) {
    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeInstance cubeInstance = cubeManager.getCube(cubeName);
    try {
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentName);
        logger.info("submit streaming segment build, cube:{} segment:{}", cubeName, segmentName);
        CubeSegment newSeg = getCubeManager().appendSegment(cubeInstance,
                new TSRange(segmentRange.getFirst(), segmentRange.getSecond()));
        DefaultChainedExecutable executable = new StreamingCubingEngine().createStreamingCubingJob(newSeg,
                "SYSTEM");
        getExecutableManager().addJob(executable);
        CubingJob cubingJob = (CubingJob) executable;
        newSeg.setLastBuildJobID(cubingJob.getId());

        SegmentJobBuildInfo segmentJobBuildInfo = new SegmentJobBuildInfo(cubeName, segmentName, cubingJob.getId());
        jobStatusChecker.addSegmentBuildJob(segmentJobBuildInfo);
        SegmentBuildState.BuildState state = new SegmentBuildState.BuildState();
        state.setBuildStartTime(System.currentTimeMillis());
        state.setState(SegmentBuildState.BuildState.State.BUILDING);
        state.setJobId(cubingJob.getId());
        streamMetadataStore.updateSegmentBuildState(cubeName, segmentName, state);
        return true;
    } catch (Exception e) {
        logger.error("streaming job submit fail, cubeName:" + cubeName + " segment:" + segmentName, e);
        return false;
    }
}
 
Example 27
Source Project: kylin-on-parquet-v2   Source File: SegmentPruner.java    License: Apache License 2.0 5 votes vote down vote up
public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) {
    List<CubeSegment> r = new ArrayList<>();
    for (CubeSegment seg : cube.getSegments(SegmentStatusEnum.READY)) {
        if (check(seg))
            r.add(seg);
    }
    return r;
}
 
Example 28
Source Project: kylin   Source File: FlinkMergingDictionary.java    License: Apache License 2.0 5 votes vote down vote up
private List<CubeSegment> getMergingSegments(CubeInstance cube, String[] segmentIds) {
    List<CubeSegment> result = Lists.newArrayListWithCapacity(segmentIds.length);
    for (String id : segmentIds) {
        result.add(cube.getSegmentById(id));
    }
    return result;
}
 
Example 29
Source Project: kylin   Source File: CubeHBaseRPC.java    License: Apache License 2.0 5 votes vote down vote up
public CubeHBaseRPC(ISegment segment, Cuboid cuboid, GTInfo fullGTInfo, StorageContext context) {
    Preconditions.checkArgument(segment instanceof CubeSegment, "segment must be CubeSegment");
    
    this.cubeSeg = (CubeSegment) segment;
    this.cuboid = cuboid;
    this.fullGTInfo = fullGTInfo;
    this.queryContext = QueryContextFacade.current();
    this.storageContext = context;

    this.fuzzyKeyEncoder = new FuzzyKeyEncoder(cubeSeg, cuboid);
    this.fuzzyMaskEncoder = new FuzzyMaskEncoder(cubeSeg, cuboid);
}
 
Example 30
Source Project: Kylin   Source File: CubingJobBuilder.java    License: Apache License 2.0 5 votes vote down vote up
public CubingJob mergeJob(CubeSegment seg) {
    checkPreconditions(seg);
    
    CubingJob result = initialJob(seg, "MERGE");
    final String jobId = result.getId();
    final String mergedCuboidPath = getJobWorkingDir(jobId) + "/" + seg.getCubeInstance().getName() + "/cuboid/";
    
    List<CubeSegment> mergingSegments = seg.getCubeInstance().getMergingSegments(seg);
    Preconditions.checkState(mergingSegments.size() > 1, "there should be more than 2 segments to merge");
    List<String> mergingSegmentIds = Lists.newArrayList();
    List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingSegmentIds.add(merging.getUuid());
        mergingCuboidPaths.add(getPathToMerge(merging));
    }

    // merge cuboid
    addMergeSteps(seg, mergingSegmentIds, mergingCuboidPaths, mergedCuboidPath, result);
    
    // convert htable
    AbstractExecutable convertCuboidToHfileStep = addHTableSteps(seg, mergedCuboidPath, result);

    // update cube info
    result.addTask(createUpdateCubeInfoAfterMergeStep(seg, mergingSegmentIds, convertCuboidToHfileStep.getId(), jobId));

    return result;
}