Java Code Examples for org.apache.kylin.job.execution.ExecuteResult#createError()

The following examples show how to use org.apache.kylin.job.execution.ExecuteResult#createError() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UpdateCubeInfoAfterCheckpointStep.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));

    Set<Long> recommendCuboids = cube.getCuboidsRecommend();
    try {
        List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
        Map<Long, Long> recommendCuboidsWithStats = CuboidStatsReaderUtil
                .readCuboidStatsFromSegments(recommendCuboids, newSegments);
        if (recommendCuboidsWithStats == null) {
            throw new RuntimeException("Fail to get statistics info for recommended cuboids after optimization!!!");
        }
        cubeManager.promoteCheckpointOptimizeSegments(cube, recommendCuboidsWithStats,
                newSegments.toArray(new CubeSegment[newSegments.size()]));
        return new ExecuteResult();
    } catch (Exception e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 2
Source File: UpdateCubeInfoAfterCheckpointStep.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));

    Set<Long> recommendCuboids = cube.getCuboidsRecommend();
    try {
        List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
        Map<Long, Long> recommendCuboidsWithStats = CuboidStatsReaderUtil
                .readCuboidStatsFromSegments(recommendCuboids, newSegments);
        if (recommendCuboidsWithStats == null) {
            throw new RuntimeException("Fail to get statistics info for recommended cuboids after optimization!!!");
        }
        cubeManager.promoteCheckpointOptimizeSegments(cube, recommendCuboidsWithStats,
                newSegments.toArray(new CubeSegment[newSegments.size()]));
        return new ExecuteResult();
    } catch (Exception e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 3
Source File: NSparkExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private ExecuteResult runSparkSubmit(KylinConfig config, String hadoopConf, String jars,
        String kylinJobJar, String appArgs, String jobId) {
    PatternedLogger patternedLogger;
    if (config.isJobLogPrintEnabled()) {
        patternedLogger = new PatternedLogger(logger);
    } else {
        patternedLogger = new PatternedLogger(null);
    }

    try {
        String cmd = generateSparkCmd(config, hadoopConf, jars, kylinJobJar, appArgs);

        CliCommandExecutor exec = new CliCommandExecutor();
        Pair<Integer, String> result = exec.execute(cmd, patternedLogger, jobId);

        Map<String, String> extraInfo = makeExtraInfo(patternedLogger.getInfo());
        ExecuteResult ret = ExecuteResult.createSucceed(result.getSecond());
        ret.getExtraInfo().putAll(extraInfo);
        updateMetaAfterBuilding(config);
        return ret;
    } catch (Exception e) {
        return ExecuteResult.createError(e);
    }
}
 
Example 4
Source File: GarbageCollectionStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig config = context.getConfig();
    StringBuffer output = new StringBuffer();
    try {
        output.append(cleanUpIntermediateFlatTable(config));
    } catch (IOException e) {
        logger.error("job:" + getId() + " execute finished with exception", e);
        return ExecuteResult.createError(e);
    }

    return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
}
 
Example 5
Source File: LookupSnapshotToMetaStoreStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig kylinConfig = context.getConfig();
    CubeManager cubeManager = CubeManager.getInstance(kylinConfig);
    TableMetadataManager metaMgr = TableMetadataManager.getInstance(kylinConfig);
    SnapshotManager snapshotMgr = SnapshotManager.getInstance(kylinConfig);
    CubeInstance cube = cubeManager.getCube(LookupExecutableUtil.getCubeName(this.getParams()));
    List<String> segmentIDs = LookupExecutableUtil.getSegments(this.getParams());
    String lookupTableName = LookupExecutableUtil.getLookupTableName(this.getParams());
    CubeDesc cubeDesc = cube.getDescriptor();
    try {
        TableDesc tableDesc = metaMgr.getTableDesc(lookupTableName, cube.getProject());
        IReadableTable hiveTable = SourceManager.createReadableTable(tableDesc, null);
        logger.info("take snapshot for table:" + lookupTableName);
        SnapshotTable snapshot = snapshotMgr.buildSnapshot(hiveTable, tableDesc, cube.getConfig());

        logger.info("update snapshot path to cube metadata");
        if (cubeDesc.isGlobalSnapshotTable(lookupTableName)) {
            LookupExecutableUtil.updateSnapshotPathToCube(cubeManager, cube, lookupTableName,
                    snapshot.getResourcePath());
        } else {
            LookupExecutableUtil.updateSnapshotPathToSegments(cubeManager, cube, segmentIDs, lookupTableName,
                    snapshot.getResourcePath());
        }
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to build snapshot for:" + lookupTableName, e);
        return ExecuteResult.createError(e);
    }
}
 
Example 6
Source File: ShellExecutable.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    try {
        logger.info("executing:" + getCmd());
        final PatternedLogger patternedLogger = new PatternedLogger(logger);
        final Pair<Integer, String> result = context.getConfig().getCliCommandExecutor().execute(getCmd(), patternedLogger);
        getManager().addJobInfo(getId(), patternedLogger.getInfo());
        return result.getFirst() == 0 ? new ExecuteResult(ExecuteResult.State.SUCCEED, result.getSecond())
                : ExecuteResult.createFailed(new ShellException(result.getSecond()));
    } catch (IOException e) {
        logger.error("job:" + getId() + " execute finished with exception", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 7
Source File: UpdateCubeAfterSnapshotStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig kylinConfig = context.getConfig();
    CubeManager cubeManager = CubeManager.getInstance(kylinConfig);

    CubeInstance cube = cubeManager.getCube(LookupExecutableUtil.getCubeName(this.getParams()));
    List<String> segmentIDs = LookupExecutableUtil.getSegments(this.getParams());
    String lookupTableName = LookupExecutableUtil.getLookupTableName(this.getParams());

    String extLookupSnapshotStr = this.getParam(BatchConstants.ARG_EXT_LOOKUP_SNAPSHOTS_INFO);
    if (extLookupSnapshotStr == null || extLookupSnapshotStr.isEmpty()) {
        return new ExecuteResult();
    }

    Map<String, String> extLookupSnapshotMap = LookupMaterializeContext.parseLookupSnapshots(extLookupSnapshotStr);
    String snapshotResPath = extLookupSnapshotMap.get(lookupTableName);
    if (snapshotResPath == null) {
        logger.info("no snapshot path exist in the context, so no need to update snapshot path");
        return new ExecuteResult();
    }
    CubeDesc cubeDesc = cube.getDescriptor();
    try {
        logger.info("update snapshot path:{} to cube:{}", snapshotResPath, cube.getName());
        if (cubeDesc.isGlobalSnapshotTable(lookupTableName)) {
            if (!snapshotResPath.equals(cube.getSnapshotResPath(lookupTableName))) {
                LookupExecutableUtil.updateSnapshotPathToCube(cubeManager, cube, lookupTableName,
                        snapshotResPath);
            }
        } else {
            LookupExecutableUtil.updateSnapshotPathToSegments(cubeManager, cube, segmentIDs, lookupTableName,
                    snapshotResPath);
        }
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to save cuboid statistics", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 8
Source File: LookupSnapshotToMetaStoreStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig kylinConfig = context.getConfig();
    CubeManager cubeManager = CubeManager.getInstance(kylinConfig);
    TableMetadataManager metaMgr = TableMetadataManager.getInstance(kylinConfig);
    SnapshotManager snapshotMgr = SnapshotManager.getInstance(kylinConfig);
    CubeInstance cube = cubeManager.getCube(LookupExecutableUtil.getCubeName(this.getParams()));
    List<String> segmentIDs = LookupExecutableUtil.getSegments(this.getParams());
    String lookupTableName = LookupExecutableUtil.getLookupTableName(this.getParams());
    CubeDesc cubeDesc = cube.getDescriptor();
    try {
        TableDesc tableDesc = metaMgr.getTableDesc(lookupTableName, cube.getProject());
        IReadableTable hiveTable = SourceManager.createReadableTable(tableDesc, null);
        logger.info("take snapshot for table:" + lookupTableName);
        SnapshotTable snapshot = snapshotMgr.buildSnapshot(hiveTable, tableDesc, cube.getConfig());

        logger.info("update snapshot path to cube metadata");
        if (cubeDesc.isGlobalSnapshotTable(lookupTableName)) {
            LookupExecutableUtil.updateSnapshotPathToCube(cubeManager, cube, lookupTableName,
                    snapshot.getResourcePath());
        } else {
            LookupExecutableUtil.updateSnapshotPathToSegments(cubeManager, cube, segmentIDs, lookupTableName,
                    snapshot.getResourcePath());
        }
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to build snapshot for:" + lookupTableName, e);
        return ExecuteResult.createError(e);
    }
}
 
Example 9
Source File: UpdateCubeAfterSnapshotStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig kylinConfig = context.getConfig();
    CubeManager cubeManager = CubeManager.getInstance(kylinConfig);

    CubeInstance cube = cubeManager.getCube(LookupExecutableUtil.getCubeName(this.getParams()));
    List<String> segmentIDs = LookupExecutableUtil.getSegments(this.getParams());
    String lookupTableName = LookupExecutableUtil.getLookupTableName(this.getParams());

    String extLookupSnapshotStr = this.getParam(BatchConstants.ARG_EXT_LOOKUP_SNAPSHOTS_INFO);
    if (extLookupSnapshotStr == null || extLookupSnapshotStr.isEmpty()) {
        return new ExecuteResult();
    }

    Map<String, String> extLookupSnapshotMap = LookupMaterializeContext.parseLookupSnapshots(extLookupSnapshotStr);
    String snapshotResPath = extLookupSnapshotMap.get(lookupTableName);
    if (snapshotResPath == null) {
        logger.info("no snapshot path exist in the context, so no need to update snapshot path");
        return new ExecuteResult();
    }
    CubeDesc cubeDesc = cube.getDescriptor();
    try {
        logger.info("update snapshot path:{} to cube:{}", snapshotResPath, cube.getName());
        if (cubeDesc.isGlobalSnapshotTable(lookupTableName)) {
            if (!snapshotResPath.equals(cube.getSnapshotResPath(lookupTableName))) {
                LookupExecutableUtil.updateSnapshotPathToCube(cubeManager, cube, lookupTableName,
                        snapshotResPath);
            }
        } else {
            LookupExecutableUtil.updateSnapshotPathToSegments(cubeManager, cube, segmentIDs, lookupTableName,
                    snapshotResPath);
        }
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to save cuboid statistics", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 10
Source File: UpdateCubeInfoAfterBuildStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()))
            .latestCopyForWrite();
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    KylinConfig config = KylinConfig.getInstanceFromEnv();
    List<Double> cuboidEstimateRatio = cubingJob.findEstimateRatio(segment, config);

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setEstimateRatio(cuboidEstimateRatio);

    try {
        saveExtSnapshotIfNeeded(cubeManager, cube, segment);
        updateSegment(segment);

        cubeManager.promoteNewlyBuiltSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 11
Source File: UpdateCubeInfoAfterOptimizeStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment);
    long sourceCount = originalSegment.getInputRecords();
    long sourceSizeBytes = originalSegment.getInputRecordsSize();

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap());

    try {
        cubeManager.promoteNewlyOptimizeSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 12
Source File: ShellExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    try {
        logger.info("executing:" + getCmd());
        final PatternedLogger patternedLogger = new PatternedLogger(logger);
        final Pair<Integer, String> result = context.getConfig().getCliCommandExecutor().execute(getCmd(), patternedLogger, null);
        getManager().addJobInfo(getId(), patternedLogger.getInfo());
        return result.getFirst() == 0 ? new ExecuteResult(ExecuteResult.State.SUCCEED, result.getSecond())
                : ExecuteResult.createFailed(new ShellException(result.getSecond()));
    } catch (IOException e) {
        logger.error("job:" + getId() + " execute finished with exception", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 13
Source File: MergeOffsetStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cubeCopy = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final String segmentId = CubingExecutableUtil.getSegmentId(this.getParams());
    final CubeSegment segCopy = cubeCopy.getSegmentById(segmentId);

    Preconditions.checkNotNull(segCopy, "Cube segment '" + segmentId + "' not found.");
    Segments<CubeSegment> mergingSegs = cubeCopy.getMergingSegments(segCopy);

    Preconditions.checkArgument(mergingSegs.size() > 0, "Merging segment not exist.");

    Collections.sort(mergingSegs);
    final CubeSegment first = mergingSegs.get(0);
    final CubeSegment last = mergingSegs.get(mergingSegs.size() - 1);

    segCopy.setSegRange(new SegmentRange(first.getSegRange().start, last.getSegRange().end));
    segCopy.setSourcePartitionOffsetStart(first.getSourcePartitionOffsetStart());
    segCopy.setSourcePartitionOffsetEnd(last.getSourcePartitionOffsetEnd());

    segCopy.setTSRange(new TSRange(mergingSegs.getTSStart(), mergingSegs.getTSEnd()));

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    try {
        cubeManager.updateCube(update);
        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to update cube segment offset", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 14
Source File: NSparkExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private ExecuteResult runLocalMode(String appArgs, KylinConfig config) {
    try {
        Class<? extends Object> appClz = ClassUtil.forName(getSparkSubmitClassName(), Object.class);
        appClz.getMethod("main", String[].class).invoke(null, (Object) new String[] { appArgs });
        updateMetaAfterBuilding(config);
        return ExecuteResult.createSucceed();
    } catch (Exception e) {
        return ExecuteResult.createError(e);
    }
}
 
Example 15
Source File: GarbageCollectionStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    KylinConfig config = context.getConfig();
    StringBuffer output = new StringBuffer();
    try {
        output.append(cleanUpIntermediateFlatTable(config));
    } catch (IOException e) {
        logger.error("job:" + getId() + " execute finished with exception", e);
        return ExecuteResult.createError(e);
    }

    return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
}
 
Example 16
Source File: MergeStatisticsWithOldStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}
 
Example 17
Source File: UpdateDictionaryStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 18
Source File: SaveStatisticsStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    CubeSegment newSegment = CubingExecutableUtil.findSegment(context,
            CubingExecutableUtil.getCubeName(this.getParams()),
            CubingExecutableUtil.getSegmentId(this.getParams()));
    KylinConfig kylinConf = newSegment.getConfig();

    ResourceStore rs = ResourceStore.getStore(kylinConf);
    try {

        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
        Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        Path[] statisticsFiles = HadoopUtil.getFilteredPath(fs, statisticsDir,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDir);
        }

        Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
        long totalRowsBeforeMerge = 0;
        long grantTotal = 0;
        int samplingPercentage = -1;
        int mapperNumber = -1;
        for (Path item : statisticsFiles) {
            CubeStatsReader.CubeStatsResult cubeStatsResult = new CubeStatsReader.CubeStatsResult(item,
                    kylinConf.getCubeStatsHLLPrecision());
            cuboidHLLMap.putAll(cubeStatsResult.getCounterMap());
            long pGrantTotal = 0L;
            for (HLLCounter hll : cubeStatsResult.getCounterMap().values()) {
                pGrantTotal += hll.getCountEstimate();
            }
            totalRowsBeforeMerge += pGrantTotal * cubeStatsResult.getMapperOverlapRatio();
            grantTotal += pGrantTotal;
            int pMapperNumber = cubeStatsResult.getMapperNumber();
            if (pMapperNumber > 0) {
                if (mapperNumber < 0) {
                    mapperNumber = pMapperNumber;
                } else {
                    throw new RuntimeException(
                            "Base cuboid has been distributed to multiple reducers at step FactDistinctColumnsReducer!!!");
                }
            }
            int pSamplingPercentage = cubeStatsResult.getPercentage();
            if (samplingPercentage < 0) {
                samplingPercentage = pSamplingPercentage;
            } else if (samplingPercentage != pSamplingPercentage) {
                throw new RuntimeException(
                        "The sampling percentage should be same among all of the reducer of FactDistinctColumnsReducer!!!");
            }
        }
        if (samplingPercentage < 0) {
            logger.warn("The sampling percentage should be set!!!");
        }
        if (mapperNumber < 0) {
            logger.warn("The mapper number should be set!!!");
        }

        if (logger.isDebugEnabled()) {
            logMapperAndCuboidStatistics(cuboidHLLMap, samplingPercentage, mapperNumber, grantTotal,
                    totalRowsBeforeMerge);
        }
        double mapperOverlapRatio = grantTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grantTotal;
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        long sourceRecordCount = cubingJob.findSourceRecordCount();
        CubeStatsWriter.writeCuboidStatistics(hadoopConf, statisticsDir, cuboidHLLMap, samplingPercentage,
                mapperNumber, mapperOverlapRatio, sourceRecordCount);

        Path statisticsFile = new Path(statisticsDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
        logger.info("{} stats saved to hdfs {}", newSegment, statisticsFile);

        FSDataInputStream is = fs.open(statisticsFile);
        try {
            // put the statistics to metadata store
            String resPath = newSegment.getStatisticsResourcePath();
            rs.putResource(resPath, is, System.currentTimeMillis());
            logger.info("{} stats saved to resource {}", newSegment, resPath);

            StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, newSegment);
            StatisticsDecisionUtil.optimizeCubingPlan(newSegment);
        } finally {
            IOUtils.closeStream(is);
        }

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to save cuboid statistics", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 19
Source File: SaveStatisticsStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    CubeSegment newSegment = CubingExecutableUtil.findSegment(context,
            CubingExecutableUtil.getCubeName(this.getParams()),
            CubingExecutableUtil.getSegmentId(this.getParams()));
    KylinConfig kylinConf = newSegment.getConfig();

    ResourceStore rs = ResourceStore.getStore(kylinConf);
    try {

        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
        Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        Path[] statisticsFiles = HadoopUtil.getFilteredPath(fs, statisticsDir,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDir);
        }

        Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
        long totalRowsBeforeMerge = 0;
        long grantTotal = 0;
        int samplingPercentage = -1;
        int mapperNumber = -1;
        for (Path item : statisticsFiles) {
            CubeStatsReader.CubeStatsResult cubeStatsResult = new CubeStatsReader.CubeStatsResult(item,
                    kylinConf.getCubeStatsHLLPrecision());
            cuboidHLLMap.putAll(cubeStatsResult.getCounterMap());
            long pGrantTotal = 0L;
            for (HLLCounter hll : cubeStatsResult.getCounterMap().values()) {
                pGrantTotal += hll.getCountEstimate();
            }
            totalRowsBeforeMerge += pGrantTotal * cubeStatsResult.getMapperOverlapRatio();
            grantTotal += pGrantTotal;
            int pMapperNumber = cubeStatsResult.getMapperNumber();
            if (pMapperNumber > 0) {
                if (mapperNumber < 0) {
                    mapperNumber = pMapperNumber;
                } else {
                    throw new RuntimeException(
                            "Base cuboid has been distributed to multiple reducers at step FactDistinctColumnsReducer!!!");
                }
            }
            int pSamplingPercentage = cubeStatsResult.getPercentage();
            if (samplingPercentage < 0) {
                samplingPercentage = pSamplingPercentage;
            } else if (samplingPercentage != pSamplingPercentage) {
                throw new RuntimeException(
                        "The sampling percentage should be same among all of the reducer of FactDistinctColumnsReducer!!!");
            }
        }
        if (samplingPercentage < 0) {
            logger.warn("The sampling percentage should be set!!!");
        }
        if (mapperNumber < 0) {
            logger.warn("The mapper number should be set!!!");
        }

        if (logger.isDebugEnabled()) {
            logMapperAndCuboidStatistics(cuboidHLLMap, samplingPercentage, mapperNumber, grantTotal,
                    totalRowsBeforeMerge);
        }
        double mapperOverlapRatio = grantTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grantTotal;
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        long sourceRecordCount = cubingJob.findSourceRecordCount();
        CubeStatsWriter.writeCuboidStatistics(hadoopConf, statisticsDir, cuboidHLLMap, samplingPercentage,
                mapperNumber, mapperOverlapRatio, sourceRecordCount);

        Path statisticsFile = new Path(statisticsDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
        logger.info("{} stats saved to hdfs {}", newSegment, statisticsFile);

        FSDataInputStream is = fs.open(statisticsFile);
        try {
            // put the statistics to metadata store
            String resPath = newSegment.getStatisticsResourcePath();
            rs.putResource(resPath, is, System.currentTimeMillis());
            logger.info("{} stats saved to resource {}", newSegment, resPath);

            StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, newSegment);
            StatisticsDecisionUtil.optimizeCubingPlan(newSegment);
        } finally {
            IOUtils.closeStream(is);
        }

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to save cuboid statistics", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 20
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}