Java Code Examples for org.apache.kylin.cube.CubeManager#getCube()

The following examples show how to use org.apache.kylin.cube.CubeManager#getCube() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HybridCubeCLITest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testSegmentOverlap() throws IOException {
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Segments has overlap");

    HybridManager hybridManager = HybridManager.getInstance(KylinConfig.getInstanceFromEnv());
    Assert.assertNull(hybridManager.getHybridInstance("ssb_hybrid"));
    HybridCubeCLI.main(new String[] { "-name", "ssb_hybrid", "-project", "default", "-model", "ssb", "-cubes", "ssb_cube1,ssb_cube2", "-action", "create" });

    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeInstance cube1 = cubeManager.getCube("ssb_cube1");
    CubeInstance cube2 = cubeManager.getCube("ssb_cube2");

    // 2012-01-01,2012-01-03
    cubeManager.appendSegment(cube1, new SegmentRange.TSRange(1325376000000L, 1325548800000L));
    // 2012-01-02,2012-01-04
    cubeManager.appendSegment(cube2, new SegmentRange.TSRange(1325462400000L, 1325635200000L));

    HybridCubeCLI.main(new String[] { "-name", "ssb_hybrid", "-project", "default", "-model", "ssb", "-cubes", "ssb_cube1,ssb_cube2", "-action", "update" });
}
 
Example 2
Source File: ColumnarSplitReader.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (!(split instanceof FileSplit)) {
        throw new IllegalArgumentException("Only compatible with FileSplits.");
    } else {
        logger.debug("CFG_Cube_Name: " + BatchConstants.CFG_CUBE_NAME);
        cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT);
        segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase(Locale.ROOT);

        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
        CubeManager cubeManager = CubeManager.getInstance(config);
        cube = cubeManager.getCube(cubeName);
        cubeDesc = cube.getDescriptor();
        cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
    }
}
 
Example 3
Source File: DeployCoprocessorCLI.java    From kylin with Apache License 2.0 6 votes vote down vote up
private static List<String> filterByProjects(List<String> allTableNames, List<String> projectNames) {
    ProjectManager projectManager = ProjectManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

    List<String> result = Lists.newArrayList();
    for (String p : projectNames) {
        p = p.trim();
        if (p.endsWith(",")) {
            p = p.substring(0, p.length() - 1);
        }

        ProjectInstance projectInstance = projectManager.getProject(p);
        List<RealizationEntry> cubeList = projectInstance.getRealizationEntries(RealizationType.CUBE);
        for (RealizationEntry cube : cubeList) {
            CubeInstance cubeInstance = cubeManager.getCube(cube.getRealization());
            for (CubeSegment segment : cubeInstance.getSegments()) {
                String tableName = segment.getStorageLocationIdentifier();
                if (allTableNames.contains(tableName)) {
                    result.add(tableName);
                }
            }
        }
    }
    return result;
}
 
Example 4
Source File: MergeCuboidMapper.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.bindCurrentConfiguration(context.getConfiguration());

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube = cubeManager.getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment mergedCubeSegment = cube.getSegmentById(segmentID);

    // decide which source segment
    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(mergedCubeSegment)
            .getOutputFormat();
    CubeSegment sourceCubeSegment = outputFormat.findSourceSegment(fileSplit, cube);
    reEncoder = new SegmentReEncoder(cubeDesc, sourceCubeSegment, mergedCubeSegment, config);
}
 
Example 5
Source File: UpdateCubeInfoAfterCheckpointStep.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));

    Set<Long> recommendCuboids = cube.getCuboidsRecommend();
    try {
        List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
        Map<Long, Long> recommendCuboidsWithStats = CuboidStatsReaderUtil
                .readCuboidStatsFromSegments(recommendCuboids, newSegments);
        if (recommendCuboidsWithStats == null) {
            throw new RuntimeException("Fail to get statistics info for recommended cuboids after optimization!!!");
        }
        cubeManager.promoteCheckpointOptimizeSegments(cube, recommendCuboidsWithStats,
                newSegments.toArray(new CubeSegment[newSegments.size()]));
        return new ExecuteResult();
    } catch (Exception e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 6
Source File: BaseCuboidMapperTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapperWithNull() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
}
 
Example 7
Source File: ProjectManagerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectsDrop() throws IOException {
    ProjectManager prjMgr = ProjectManager.getInstance(getTestConfig());
    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());

    CubeInstance cube = cubeMgr.getCube("ci_left_join_cube");
    assertTrue(prjMgr.getRealizationsByTable("default", "default.test_kylin_fact").contains(cube));
    assertTrue(prjMgr.listAllRealizations("default").contains(cube));

    cubeMgr.dropCube(cube.getName(), false);

    assertTrue(!prjMgr.getRealizationsByTable("default", "default.test_kylin_fact").contains(cube));
    assertTrue(!prjMgr.listAllRealizations("default").contains(cube));
}
 
Example 8
Source File: ColumnToRowJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);

        parseOptions(options, args);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(kylinConfig);
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        job.setMapperClass(ColumnToRowMapper.class);
        job.setInputFormatClass(ColumnarSplitDataInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(ColumnToRowReducer.class);
        job.setNumReduceTasks(calReducerNum(input));
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize());
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);

        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
        attachSegmentMetadataWithDict(segment, job.getConfiguration());
        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 9
Source File: LocalWithSparkSessionTest.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
protected void cleanupSegments(String cubeName) throws IOException {
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(cubeName);
    cubeMgr.updateCubeDropSegments(cube, cube.getSegments());
}
 
Example 10
Source File: SignatureCalculatorTest.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Test
public void testRealizationSetCalculator() throws IOException {
    KylinConfig config = KylinConfig.createKylinConfig(getTestConfig());
    Map<String, String> overrides = Maps.newHashMap();
    overrides.put("kylin.query.signature-class", "org.apache.kylin.rest.signature.RealizationSetCalculator");

    ProjectInstance projectInstance = ProjectManager.getInstance(config).getProject(projectName);
    projectInstance.setConfig(KylinConfigExt.createInstance(config, overrides));

    HybridManager hybridManager = HybridManager.getInstance(config);
    HybridInstance hybrid1 = hybridManager.getHybridInstance("test_kylin_hybrid_ready");

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube1 = cubeManager.getCube("test_kylin_cube_with_slr_ready_2_segments");
    CubeInstance cube2 = cubeManager.getCube("test_kylin_cube_without_slr_ready");
    CubeInstance cube2Clone = cloneCubeInstance(cubeManager, cube2, cube2.getName() + "_clone");

    //Related cubes:
    // - test_kylin_cube_with_slr_ready
    // - test_kylin_cube_with_slr_ready_2_segments
    // - test_kylin_cube_without_slr_ready
    String cubes = hybrid1.getCanonicalName() + "," + cube2Clone.getCanonicalName();

    SQLResponse sqlResponse = new SQLResponse();
    sqlResponse.setCube(cubes);

    String signature = SQLResponseSignatureUtil.createSignature(config, sqlResponse, projectName);
    sqlResponse.setSignature(signature);

    Assert.assertTrue(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));

    {//Test the influence of related cubes status change
        cube1 = cubeManager.updateCubeStatus(cube1, RealizationStatusEnum.DISABLED);
        Assert.assertFalse(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));

        cube1 = cubeManager.updateCubeStatus(cube1, RealizationStatusEnum.READY);
        Assert.assertTrue(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));
    }

    {//Test the influence of segment changes
        cube2Clone = cubeManager.updateCubeDropSegments(cube2Clone, cube2Clone.getSegments().get(0));
        Assert.assertFalse(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));
    }
}
 
Example 11
Source File: BulkLoadJob.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        options.addOption(OPTION_CUBE_NAME);
        parseOptions(options, args);

        String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase();
        // e.g
        // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/
        // end with "/"
        String input = getOptionValue(OPTION_INPUT_PATH);

        Configuration conf = HBaseConfiguration.create(getConf());
        FileSystem fs = FileSystem.get(conf);

        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        KylinConfig config = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(config);
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeDesc cubeDesc = cube.getDescriptor();
        FsPermission permission = new FsPermission((short) 0777);
        for (HBaseColumnFamilyDesc cf : cubeDesc.getHBaseMapping().getColumnFamily()) {
            String cfName = cf.getName();
            fs.setPermission(new Path(input + cfName), permission);
        }

        String[] newArgs = new String[2];
        newArgs[0] = input;
        newArgs[1] = tableName;

        log.debug("Start to run LoadIncrementalHFiles");
        int ret = ToolRunner.run(new LoadIncrementalHFiles(conf), newArgs);
        log.debug("End to run LoadIncrementalHFiles");
        return ret;
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }
}
 
Example 12
Source File: MergeStatisticsWithOldStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}
 
Example 13
Source File: FilterRecommendCuboidDataJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment optSegment = cube.getSegmentById(segmentID);
        CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);

        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        // Mapper
        job.setMapperClass(FilterRecommendCuboidDataMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        // Input
        job.setInputFormatClass(SequenceFileInputFormat.class);
        FileInputFormat.setInputPaths(job, input);

        // Reducer
        ConvergeCuboidDataUtil.setupReducer(job, originalSegment, output);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        // add metadata to distributed cache
        attachSegmentMetadata(originalSegment, job.getConfiguration(), false, false);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 14
Source File: UpdateDictionaryStep.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 15
Source File: CubeHFileJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_PARTITION_FILE_PATH);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        parseOptions(options, args);

        Path partitionFilePath = new Path(getOptionValue(OPTION_PARTITION_FILE_PATH));

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

        CubeInstance cube = cubeMgr.getCube(cubeName);

        // use current hbase configuration
        Configuration configuration = new Configuration(HBaseConnection.getCurrentHBaseConfiguration());
        String[] allServices = getAllServices(configuration);
        merge(configuration, getConf());
        configuration.setStrings(DFSConfigKeys.DFS_NAMESERVICES, allServices);

        job = Job.getInstance(configuration, getOptionValue(OPTION_JOB_NAME));

        setJobClasspath(job, cube.getConfig());

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);
        FileOutputFormat.setOutputPath(job, output);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        // add metadata to distributed cache
        attachCubeMetadata(cube, job.getConfiguration());

        HTable htable = new HTable(configuration, getOptionValue(OPTION_HTABLE_NAME));

        // Automatic config !
        HFileOutputFormat3.configureIncrementalLoad(job, htable);
        reconfigurePartitions(configuration, partitionFilePath);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(CubeHFileMapper.class);
        job.setReducerClass(KeyValueReducer.class);
        job.setMapOutputKeyClass(RowKeyWritable.class);
        job.setMapOutputValueClass(KeyValue.class);
        job.setSortComparatorClass(RowKeyWritable.RowKeyComparator.class);

        // set block replication to 3 for hfiles
        configuration.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3");

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 16
Source File: MergeDictJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String jobName = getOptionValue(OPTION_JOB_NAME);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        job.setJobName(jobName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        logger.info("MergeDictReducer output path: {}", output);

        // Mapper
        job.setMapperClass(MergeDictMapper.class);
        job.setInputFormatClass(ColumnarSplitDictInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //Reducer
        job.setReducerClass(MergeDictReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        attachCubeMetadata(cube, job.getConfiguration());

        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        logger.error("job {} failed. ", job.getJobName(), e);
        throw e;
    }
}
 
Example 17
Source File: UHCDictionaryJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_CUBING_JOB_ID);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_INPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String job_id = getOptionValue(OPTION_CUBING_JOB_ID);
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));

        //add metadata to distributed cache
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        attachCubeMetadata(cube, job.getConfiguration());

        List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns();
        int reducerCount = uhcColumns.size();

        //Note! handle uhc columns is null.
        boolean hasUHCValue = false;
        for (TblColRef tblColRef : uhcColumns) {
            Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
            if (HadoopUtil.getFileSystem(path).exists(path)) {
                FileInputFormat.addInputPath(job, path);
                FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class);
                hasUHCValue = true;
            }
        }

        if (!hasUHCValue) {
            isSkipped = true;
            return 0;
        }

        setJobClasspath(job, cube.getConfig());
        setupMapper();
        setupReducer(output, reducerCount);

        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, job_id);
        job.getConfiguration().set(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR, KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory());
        job.getConfiguration().set(BatchConstants.CFG_MAPRED_OUTPUT_COMPRESS, "false");

        //8G memory is enough for all global dict, because the input is sequential and we handle global dict slice by slice
        job.getConfiguration().set("mapreduce.reduce.memory.mb", "8500");
        job.getConfiguration().set("mapred.reduce.child.java.opts", "-Xmx8g");
        //Copying global dict to working dir in GlobalDictHDFSStore maybe elapsed a long time (Maybe we could improve it)
        //Waiting the global dict lock maybe also take a long time.
        //So we set 8 hours here
        job.getConfiguration().set("mapreduce.task.timeout", "28800000");

        //allow user specially set config for uhc step
        for (Map.Entry<String, String> entry : cube.getConfig().getUHCMRConfigOverride().entrySet()) {
            job.getConfiguration().set(entry.getKey(), entry.getValue());
        }

        return waitForCompletion(job);
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}
 
Example 18
Source File: RangeKeyDistributionJob.java    From Kylin with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);

        parseOptions(options, args);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        job = Job.getInstance(getConf(), jobName);

        setJobClasspath(job);

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);
        // job.getConfiguration().set("dfs.block.size", "67108864");

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(RangeKeyDistributionMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        // Reducer - only one
        job.setReducerClass(RangeKeyDistributionReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        RealizationCapacity realizationCapacity = cube.getDescriptor().getModel().getCapacity();
        job.getConfiguration().set(BatchConstants.CUBE_CAPACITY, realizationCapacity.toString());

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }
}
 
Example 19
Source File: AbstractExecutable.java    From kylin with Apache License 2.0 4 votes vote down vote up
public KylinConfig getCubeSpecificConfig() {
    String cubeName = getCubeName();
    CubeManager manager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeInstance cube = manager.getCube(cubeName);
    return cube.getConfig();
}
 
Example 20
Source File: KafkaFlatTableJob.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_SEGMENT_ID);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        String segmentId = getOptionValue(OPTION_SEGMENT_ID);

        // ----------------------------------------------------------------------------
        // add metadata to distributed cache
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentId);
        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        KafkaConfigManager kafkaConfigManager = KafkaConfigManager.getInstance(KylinConfig.getInstanceFromEnv());
        KafkaConfig kafkaConfig = kafkaConfigManager.getKafkaConfig(cube.getRootFactTable());
        String brokers = KafkaClient.getKafkaBrokers(kafkaConfig);
        String topic = kafkaConfig.getTopic();

        if (brokers == null || brokers.length() == 0 || topic == null) {
            throw new IllegalArgumentException("Invalid Kafka information, brokers " + brokers + ", topic " + topic);
        }

        JobEngineConfig jobEngineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
        job.getConfiguration().addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null)));
        KafkaConsumerProperties kafkaConsumerProperties = KafkaConsumerProperties.getInstanceFromEnv();
        job.getConfiguration().addResource(new Path(kafkaConsumerProperties.getKafkaConsumerHadoopJobConf()));
        job.getConfiguration().set(CONFIG_KAFKA_BROKERS, brokers);
        job.getConfiguration().set(CONFIG_KAFKA_TOPIC, topic);
        job.getConfiguration().set(CONFIG_KAFKA_TIMEOUT, String.valueOf(kafkaConfig.getTimeout()));
        job.getConfiguration().set(CONFIG_KAFKA_INPUT_FORMAT, "json");
        job.getConfiguration().set(CONFIG_KAFKA_PARSER_NAME, kafkaConfig.getParserName());
        job.getConfiguration().set(CONFIG_KAFKA_SPLIT_ROWS, String.valueOf(kafkaConfig.getSplitRows()));
        job.getConfiguration().set(CONFIG_KAFKA_CONSUMER_GROUP, cubeName); // use cubeName as consumer group name
        appendKafkaOverrideProperties(cube.getConfig(), job.getConfiguration());
        setupMapper(cube.getSegmentById(segmentId));
        job.setNumReduceTasks(0);
        FileOutputFormat.setOutputPath(job, output);
        FileOutputFormat.setCompressOutput(job, true);
        org.apache.log4j.Logger.getRootLogger().info("Output hdfs location: " + output);
        org.apache.log4j.Logger.getRootLogger().info("Output hdfs compression: " + true);
        job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

        attachCubeMetadata(cube, job.getConfiguration());
        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);

    } catch (Exception e) {
        logger.error("error in KafkaFlatTableJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }

}