org.apache.kylin.cube.CubeSegment Java Examples

The following examples show how to use org.apache.kylin.cube.CubeSegment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CubeService.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void cleanSegmentStorage(List<CubeSegment> toRemoveSegs) throws IOException {
    if (!KylinConfig.getInstanceFromEnv().cleanStorageAfterDelOperation()) {
        return;
    }

    if (toRemoveSegs != null && !toRemoveSegs.isEmpty()) {
        List<String> toDropHTables = Lists.newArrayListWithCapacity(toRemoveSegs.size());
        List<String> toDelHDFSPaths = Lists.newArrayListWithCapacity(toRemoveSegs.size());
        for (CubeSegment seg : toRemoveSegs) {
            toDropHTables.add(seg.getStorageLocationIdentifier());
            toDelHDFSPaths.add(JobBuilderSupport.getJobWorkingDir(seg.getConfig().getHdfsWorkingDirectory(),
                    seg.getLastBuildJobID()));
        }

        StorageCleanUtil.dropHTables(new HBaseAdmin(HBaseConnection.getCurrentHBaseConfiguration()), toDropHTables);
        StorageCleanUtil.deleteHDFSPath(HadoopUtil.getWorkingFileSystem(), toDelHDFSPaths);
    }
}
 
Example #2
Source File: Coordinator.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private boolean isInOptimize(CubeInstance cube) {
    Segments<CubeSegment> readyPendingSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
    if (readyPendingSegments.size() > 0) {
        logger.info("The cube {} has READY_PENDING segments {}. It's not allowed for building", cube.getName(),
                readyPendingSegments);
        return true;
    }
    Segments<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
    for (CubeSegment newSegment : newSegments) {
        String jobId = newSegment.getLastBuildJobID();
        if (jobId == null) {
            continue;
        }
        AbstractExecutable job = getExecutableManager().getJob(jobId);
        if (job != null && job instanceof CubingJob) {
            CubingJob cubingJob = (CubingJob) job;
            if (CubingJob.CubingJobTypeEnum.OPTIMIZE.toString().equals(cubingJob.getJobType())) {
                logger.info(
                        "The cube {} is in optimization. It's not allowed to build new segments during optimization.",
                        cube.getName());
                return true;
            }
        }
    }
    return false;
}
 
Example #3
Source File: StatisticsDecisionUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static void optimizeCubingPlan(CubeSegment segment) throws IOException {
    if (isAbleToOptimizeCubingPlan(segment)) {
        logger.info("It's able to trigger cuboid planner algorithm.");
    } else {
        return;
    }

    Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment);
    if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) {
        return;
    }

    CubeInstance cube = segment.getCubeInstance();
    CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite());
    update.setCuboids(recommendCuboidsWithStats);
    CubeManager.getInstance(cube.getConfig()).updateCube(update);
}
 
Example #4
Source File: CuboidStatsReaderUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static Pair<Map<Long, Long>, Long> readCuboidStatsWithSourceFromSegment(Set<Long> cuboidIds,
        CubeSegment cubeSegment) throws IOException {
    if (cubeSegment == null) {
        logger.warn("The cube segment can not be " + null);
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, null, cubeSegment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }

    Map<Long, Long> cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL();
    Map<Long, Long> cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size());
    for (Long cuboid : cuboidIds) {
        Long rowEstimate = cuboidsWithStatsAll.get(cuboid);
        if (rowEstimate == null) {
            logger.warn("Cannot get the row count stats for cuboid " + cuboid);
        } else {
            cuboidsWithStats.put(cuboid, rowEstimate);
        }
    }
    return new Pair<>(cuboidsWithStats, cubeStatsReader.sourceRowCount);
}
 
Example #5
Source File: StatisticsDecisionUtil.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
    CubeInstance cube = segment.getCubeInstance();
    if (!cube.getConfig().isCubePlannerEnabled())
        return false;

    if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
        logger.info("Has read pending segments and will not enable cube planner.");
        return false;
    }
    List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY);
    List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW);
    if (newSegments.size() <= 1 && //
            (readySegments.size() == 0 || //
                    (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1
                            && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
        return true;
    } else {
        return false;
    }
}
 
Example #6
Source File: FlinkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example #7
Source File: JobService.java    From Kylin with Apache License 2.0 6 votes vote down vote up
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN + " or hasPermission(#job, 'ADMINISTRATION') or hasPermission(#job, 'OPERATION') or hasPermission(#job, 'MANAGEMENT')")
public JobInstance cancelJob(String jobId) throws IOException, JobException {
    //        CubeInstance cube = this.getCubeManager().getCube(job.getRelatedCube());
    //        for (BuildCubeJob cubeJob: listAllCubingJobs(cube.getName(), null, EnumSet.of(ExecutableState.READY, ExecutableState.RUNNING))) {
    //            getExecutableManager().stopJob(cubeJob.getId());
    //        }
    final JobInstance jobInstance = getJobInstance(jobId);
    final String segmentId = jobInstance.getRelatedSegment();
    CubeInstance cubeInstance = getCubeManager().getCube(jobInstance.getRelatedCube());
    final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
    if (segment.getStatus() == SegmentStatusEnum.NEW) {
        cubeInstance.getSegments().remove(segment);
        getCubeManager().updateCube(cubeInstance);
    }
    getExecutableManager().discardJob(jobId);
    return jobInstance;
}
 
Example #8
Source File: HybridCubeCLI.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void checkSegmentOffset(List<RealizationEntry> realizationEntries) {
    List<SegmentRange> segmentRanges = Lists.newArrayList();

    for (RealizationEntry entry : realizationEntries) {
        if (entry.getType() != RealizationType.CUBE) {
            throw new IllegalArgumentException("Wrong realization type: " + entry.getType() + ", only cube supported. ");
        }

        CubeInstance cubeInstance = cubeManager.getCube(entry.getRealization());
        Segments<CubeSegment> segments = cubeInstance.getSegments();

        for (CubeSegment segment : segments) {
            segmentRanges.add(segment.getSegRange());
        }
    }

    if (segmentRanges.size() >= 2) {
        Collections.sort(segmentRanges);

        for (int i = 0; i < segmentRanges.size() - 1; i++) {
            if (segmentRanges.get(i).overlaps(segmentRanges.get(i + 1))) {
                throw new IllegalArgumentException("Segments has overlap, could not hybrid. First Segment Range: [" + segmentRanges.get(i).start.v + "," + segmentRanges.get(i).end.v + "], Second Segment Range: [" + segmentRanges.get(i + 1).start.v + "," + segmentRanges.get(i + 1).end.v + "]");
            }
        }
    }
}
 
Example #9
Source File: SparkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example #10
Source File: SparkExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void attachSegmentsMetadataWithDict(List<CubeSegment> segments) throws IOException {
    Set<String> dumpList = new LinkedHashSet<>(
            JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance()));
    ResourceStore rs = ResourceStore.getStore(segments.get(0).getConfig());
    for (CubeSegment segment : segments) {
        dumpList.addAll(segment.getDictionaryPaths());
        if (rs.exists(segment.getStatisticsResourcePath())) {
            // cube statistics is not available for new segment
            dumpList.add(segment.getStatisticsResourcePath());
        }
        //tiretree global domain dic
        CubeDescTiretreeGlobalDomainDictUtil.cuboidJob(segment.getCubeDesc(), dumpList);
    }
    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(),
            this.getParam(SparkCubingByLayer.OPTION_META_URL.getOpt()));
}
 
Example #11
Source File: KafkaFlatTableJob.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void setupMapper(CubeSegment cubeSeg) throws IOException {
    // set the segment's offset info to job conf
    Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
    Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();

    Integer minPartition = Collections.min(offsetStart.keySet());
    Integer maxPartition = Collections.max(offsetStart.keySet());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());

    for(Integer partition: offsetStart.keySet()) {
        job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
        job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
    }

    job.setMapperClass(KafkaFlatTableMapper.class);
    job.setInputFormatClass(KafkaInputFormat.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
}
 
Example #12
Source File: SerializedHBaseTupleIterator.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public SerializedHBaseTupleIterator(HConnection conn, List<HBaseKeyRange> segmentKeyRanges, CubeInstance cube, Collection<TblColRef> dimensions, TupleFilter filter, Collection<TblColRef> groupBy, Collection<RowValueDecoder> rowValueDecoders, StorageContext context) {

        this.context = context;
        int limit = context.getLimit();
        this.partialResultLimit = Math.max(limit, PARTIAL_DEFAULT_LIMIT);

        this.segmentIteratorList = new ArrayList<CubeSegmentTupleIterator>(segmentKeyRanges.size());
        Map<CubeSegment, List<HBaseKeyRange>> rangesMap = makeRangesMap(segmentKeyRanges);
        for (Map.Entry<CubeSegment, List<HBaseKeyRange>> entry : rangesMap.entrySet()) {
            CubeSegmentTupleIterator segIter = new CubeSegmentTupleIterator(entry.getKey(), entry.getValue(), conn, dimensions, filter, groupBy, rowValueDecoders, context);
            this.segmentIteratorList.add(segIter);
        }

        this.segmentIteratorIterator = this.segmentIteratorList.iterator();
        if (this.segmentIteratorIterator.hasNext()) {
            this.segmentIterator = this.segmentIteratorIterator.next();
        } else {
            this.segmentIterator = ITupleIterator.EMPTY_TUPLE_ITERATOR;
        }
    }
 
Example #13
Source File: LocalWithSparkSessionTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public ExecutableState buildCuboid(String cubeName, SegmentRange.TSRange tsRange) throws Exception {
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(cubeName);
    ExecutableManager execMgr = ExecutableManager.getInstance(config);
    DataModelManager.getInstance(config).getModels();
    // ready cube, segment, cuboid layout
    CubeSegment oneSeg = cubeMgr.appendSegment(cube, tsRange);
    NSparkCubingJob job = NSparkCubingJob.create(Sets.newHashSet(oneSeg), "ADMIN");
    NSparkCubingStep sparkStep = job.getSparkCubingStep();
    StorageURL distMetaUrl = StorageURL.valueOf(sparkStep.getDistMetaUrl());
    Assert.assertEquals("hdfs", distMetaUrl.getScheme());
    Assert.assertTrue(distMetaUrl.getParameter("path").startsWith(config.getHdfsWorkingDirectory()));

    // launch the job
    execMgr.addJob(job);

    return wait(job);
}
 
Example #14
Source File: CubeController.java    From kylin with Apache License 2.0 6 votes vote down vote up
/**
 * Get SQL of a Cube segment
 *
 * @param cubeName    Cube Name
 * @param segmentName Segment Name
 * @return
 * @throws IOException
 */
@RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = {
        "application/json" })
@ResponseBody
public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) {

    checkCubeExists(cubeName);
    CubeInstance cube = cubeService.getCubeManager().getCube(cubeName);

    CubeSegment segment = cube.getSegment(segmentName, null);
    if (segment == null) {
        throw new NotFoundException("Cannot find segment " + segmentName);
    }

    IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true);
    String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc);

    GeneralResponse response = new GeneralResponse();
    response.setProperty("sql", sql);

    return response;
}
 
Example #15
Source File: JoinedFlatTable.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static String generateInsertDataStatement(IJoinedFlatTableDesc flatDesc) {
    CubeSegment segment = ((CubeSegment) flatDesc.getSegment());
    KylinConfig kylinConfig;
    if (null == segment) {
        kylinConfig = KylinConfig.getInstanceFromEnv();
    } else {
        kylinConfig = (flatDesc.getSegment()).getConfig();
    }

    if (kylinConfig.isAdvancedFlatTableUsed()) {
        try {
            Class advancedFlatTable = Class.forName(kylinConfig.getAdvancedFlatTableClass());
            Method method = advancedFlatTable.getMethod("generateInsertDataStatement", IJoinedFlatTableDesc.class,
                    JobEngineConfig.class);
            return (String) method.invoke(null, flatDesc);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    return "INSERT OVERWRITE TABLE " + quoteIdentifier(flatDesc.getTableName(), null) + " " + generateSelectDataStatement(flatDesc)
            + ";\n";
}
 
Example #16
Source File: CubingJobBuilder.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private UpdateCubeInfoAfterMergeStep createUpdateCubeInfoAfterMergeStep(CubeSegment seg, List<String> mergingSegmentIds, String convertToHFileStepId, String jobId) {
    UpdateCubeInfoAfterMergeStep result = new UpdateCubeInfoAfterMergeStep();
    result.setName(ExecutableConstants.STEP_NAME_UPDATE_CUBE_INFO);
    result.setCubeName(seg.getCubeInstance().getName());
    result.setSegmentId(seg.getUuid());
    result.setMergingSegmentIds(mergingSegmentIds);
    result.setConvertToHFileStepId(convertToHFileStepId);
    result.setCubingJobId(jobId);
    return result;
}
 
Example #17
Source File: BuildCubeWithEngine.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private Boolean mergeSegment(String cubeName, long startDate, long endDate) throws Exception {
    CubeSegment segment = cubeManager.mergeSegments(cubeManager.getCube(cubeName), new TSRange(startDate, endDate),
            null, true);
    DefaultChainedExecutable job = EngineFactory.createBatchMergeJob(segment, "TEST");
    jobService.addJob(job);
    ExecutableState state = waitForJob(job.getId());
    return Boolean.valueOf(ExecutableState.SUCCEED == state);
}
 
Example #18
Source File: FlinkExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void attachSegmentsMetadataWithDict(List<CubeSegment> segments) throws IOException {
    Set<String> dumpList = new LinkedHashSet<>();
    dumpList.addAll(JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance()));
    ResourceStore rs = ResourceStore.getStore(segments.get(0).getConfig());
    for (CubeSegment segment : segments) {
        dumpList.addAll(segment.getDictionaryPaths());
        if (rs.exists(segment.getStatisticsResourcePath())) {
            // cube statistics is not available for new segment
            dumpList.add(segment.getStatisticsResourcePath());
        }
    }

    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(),
            this.getParam(FlinkCubingByLayer.OPTION_META_URL.getOpt()));
}
 
Example #19
Source File: Coordinator.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private boolean triggerSegmentBuild(String cubeName, String segmentName) {
    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeInstance cubeInstance = cubeManager.getCube(cubeName);
    try {
        Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentName);
        logger.info("submit streaming segment build, cube:{} segment:{}", cubeName, segmentName);
        CubeSegment newSeg = getCubeManager().appendSegment(cubeInstance,
                new TSRange(segmentRange.getFirst(), segmentRange.getSecond()));
        DefaultChainedExecutable executable = new StreamingCubingEngine().createStreamingCubingJob(newSeg,
                "SYSTEM");
        getExecutableManager().addJob(executable);
        CubingJob cubingJob = (CubingJob) executable;
        newSeg.setLastBuildJobID(cubingJob.getId());

        SegmentJobBuildInfo segmentJobBuildInfo = new SegmentJobBuildInfo(cubeName, segmentName, cubingJob.getId());
        jobStatusChecker.addSegmentBuildJob(segmentJobBuildInfo);
        SegmentBuildState.BuildState state = new SegmentBuildState.BuildState();
        state.setBuildStartTime(System.currentTimeMillis());
        state.setState(SegmentBuildState.BuildState.State.BUILDING);
        state.setJobId(cubingJob.getId());
        streamMetadataStore.updateSegmentBuildState(cubeName, segmentName, state);
        return true;
    } catch (Exception e) {
        logger.error("streaming job submit fail, cubeName:" + cubeName + " segment:" + segmentName, e);
        return false;
    }
}
 
Example #20
Source File: CubingJobBuilder.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private MapReduceExecutable createRangeRowkeyDistributionStep(CubeSegment seg, String inputPath) {
    MapReduceExecutable rowkeyDistributionStep = new MapReduceExecutable();
    rowkeyDistributionStep.setName(ExecutableConstants.STEP_NAME_GET_CUBOID_KEY_DISTRIBUTION);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd, seg);
    appendExecCmdParameters(cmd, "input", inputPath);
    appendExecCmdParameters(cmd, "output", getRowkeyDistributionOutputPath(seg));
    appendExecCmdParameters(cmd, "cubename", seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, "jobname", "Kylin_Region_Splits_Calculator_" + seg.getCubeInstance().getName() + "_Step");

    rowkeyDistributionStep.setMapReduceParams(cmd.toString());
    rowkeyDistributionStep.setMapReduceJobClass(RangeKeyDistributionJob.class);
    return rowkeyDistributionStep;
}
 
Example #21
Source File: UpdateCubeInfoAfterBuildStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void saveExtSnapshotIfNeeded(CubeManager cubeManager, CubeInstance cube, CubeSegment segment)
        throws IOException {
    String extLookupSnapshotStr = this.getParam(BatchConstants.ARG_EXT_LOOKUP_SNAPSHOTS_INFO);
    if (extLookupSnapshotStr == null || extLookupSnapshotStr.isEmpty()) {
        return;
    }
    Map<String, String> extLookupSnapshotMap = LookupMaterializeContext.parseLookupSnapshots(extLookupSnapshotStr);
    logger.info("update ext lookup snapshots:{}", extLookupSnapshotMap);
    List<SnapshotTableDesc> snapshotTableDescList = cube.getDescriptor().getSnapshotTableDescList();
    for (SnapshotTableDesc snapshotTableDesc : snapshotTableDescList) {
        String tableName = snapshotTableDesc.getTableName();
        if (snapshotTableDesc.isExtSnapshotTable()) {
            String newSnapshotResPath = extLookupSnapshotMap.get(tableName);
            if (newSnapshotResPath == null || newSnapshotResPath.isEmpty()) {
                continue;
            }

            if (snapshotTableDesc.isGlobal()) {
                if (!newSnapshotResPath.equals(cube.getSnapshotResPath(tableName))) {
                    cubeManager.updateCubeLookupSnapshot(cube, tableName, newSnapshotResPath);
                }
            } else {
                segment.putSnapshotResPath(tableName, newSnapshotResPath);
            }
        }
    }
}
 
Example #22
Source File: SegmentPruner.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) {
    List<CubeSegment> r = new ArrayList<>();
    for (CubeSegment seg : cube.getSegments(SegmentStatusEnum.READY)) {
        if (check(seg))
            r.add(seg);
    }
    return r;
}
 
Example #23
Source File: FlinkMergingDictionary.java    From kylin with Apache License 2.0 5 votes vote down vote up
private List<CubeSegment> getMergingSegments(CubeInstance cube, String[] segmentIds) {
    List<CubeSegment> result = Lists.newArrayListWithCapacity(segmentIds.length);
    for (String id : segmentIds) {
        result.add(cube.getSegmentById(id));
    }
    return result;
}
 
Example #24
Source File: CubeHBaseRPC.java    From kylin with Apache License 2.0 5 votes vote down vote up
public CubeHBaseRPC(ISegment segment, Cuboid cuboid, GTInfo fullGTInfo, StorageContext context) {
    Preconditions.checkArgument(segment instanceof CubeSegment, "segment must be CubeSegment");
    
    this.cubeSeg = (CubeSegment) segment;
    this.cuboid = cuboid;
    this.fullGTInfo = fullGTInfo;
    this.queryContext = QueryContextFacade.current();
    this.storageContext = context;

    this.fuzzyKeyEncoder = new FuzzyKeyEncoder(cubeSeg, cuboid);
    this.fuzzyMaskEncoder = new FuzzyMaskEncoder(cubeSeg, cuboid);
}
 
Example #25
Source File: AbstractHadoopJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected void attachSegmentMetadata(CubeSegment segment, Configuration conf, boolean ifDictIncluded,
        boolean ifStatsIncluded) throws IOException {
    Set<String> dumpList = new LinkedHashSet<>(collectCubeMetadata(segment.getCubeInstance()));
    if (ifDictIncluded) {
        dumpList.addAll(segment.getDictionaryPaths());
    }
    if (ifStatsIncluded) {
        dumpList.add(segment.getStatisticsResourcePath());
    }
    //tiretree global domain dic
    CubeDescTiretreeGlobalDomainDictUtil.cuboidJob(segment.getCubeDesc(), dumpList);

    dumpKylinPropsAndMetadata(segment.getProject(), dumpList, segment.getConfig(), conf);
}
 
Example #26
Source File: FactDistinctColumnsJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void setupReducer(Path output, CubeSegment cubeSeg)
        throws IOException {
    FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
    int numberOfReducers = reducerMapping.getTotalReducerNum();
    logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
    if (numberOfReducers > 250) {
        throw new IllegalArgumentException(
                "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
                        + numberOfReducers
                        + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
    }

    job.setReducerClass(FactDistinctColumnsReducer.class);
    job.setPartitionerClass(FactDistinctColumnPartitioner.class);
    job.setNumReduceTasks(numberOfReducers);

    // make each reducer output to respective dir
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    // prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    deletePath(job.getConfiguration(), output);
}
 
Example #27
Source File: CubingJobBuilder.java    From Kylin with Apache License 2.0 5 votes vote down vote up
public CubingJob mergeJob(CubeSegment seg) {
    checkPreconditions(seg);
    
    CubingJob result = initialJob(seg, "MERGE");
    final String jobId = result.getId();
    final String mergedCuboidPath = getJobWorkingDir(jobId) + "/" + seg.getCubeInstance().getName() + "/cuboid/";
    
    List<CubeSegment> mergingSegments = seg.getCubeInstance().getMergingSegments(seg);
    Preconditions.checkState(mergingSegments.size() > 1, "there should be more than 2 segments to merge");
    List<String> mergingSegmentIds = Lists.newArrayList();
    List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingSegmentIds.add(merging.getUuid());
        mergingCuboidPaths.add(getPathToMerge(merging));
    }

    // merge cuboid
    addMergeSteps(seg, mergingSegmentIds, mergingCuboidPaths, mergedCuboidPath, result);
    
    // convert htable
    AbstractExecutable convertCuboidToHfileStep = addHTableSteps(seg, mergedCuboidPath, result);

    // update cube info
    result.addTask(createUpdateCubeInfoAfterMergeStep(seg, mergingSegmentIds, convertCuboidToHfileStep.getId(), jobId));

    return result;
}
 
Example #28
Source File: UpdateCubeInfoAfterOptimizeStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment);
    long sourceCount = originalSegment.getInputRecords();
    long sourceSizeBytes = originalSegment.getInputRecordsSize();

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap());

    try {
        cubeManager.promoteNewlyOptimizeSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example #29
Source File: CuboidRecommenderUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
/** Trigger cube planner phase one */
public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment) throws IOException {
    if (segment == null) {
        return null;
    }

    CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig());
    if (cubeStatsReader.getCuboidRowEstimatesHLL() == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) {
        logger.info("Cuboid Statistics is not enabled.");
        return null;
    }
    CubeInstance cube = segment.getCubeInstance();
    long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
    if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null
            || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) {
        logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO);
        return null;
    }

    Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids();

    String key = cube.getName();
    CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(),
            cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).setBPUSMinBenefitRatio(segment.getConfig().getCubePlannerBPUSMinBenefitRatio()).build();
    return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(),
            !mandatoryCuboids.isEmpty());
}
 
Example #30
Source File: CubeInstanceCreator.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public static CubeInstance generateKylinCubeInstance(String owner, String tableName) {
    CubeInstance cubeInstance = new CubeInstance();
    cubeInstance.setName(tableName.replace('.', '_'));
    cubeInstance.setSegments(new Segments<CubeSegment>());
    cubeInstance.setDescName(tableName.replace('.', '_'));
    cubeInstance.setStatus(RealizationStatusEnum.DISABLED);
    cubeInstance.setOwner(owner);
    cubeInstance.setCreateTimeUTC(0L);
    cubeInstance.updateRandomUuid();

    return cubeInstance;
}