Java Code Examples for org.apache.kylin.cube.CubeManager#getCube()

The following examples show how to use org.apache.kylin.cube.CubeManager#getCube() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HybridCubeCLITest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Test
public void testSegmentOverlap() throws IOException {
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Segments has overlap");

    HybridManager hybridManager = HybridManager.getInstance(KylinConfig.getInstanceFromEnv());
    Assert.assertNull(hybridManager.getHybridInstance("ssb_hybrid"));
    HybridCubeCLI.main(new String[] { "-name", "ssb_hybrid", "-project", "default", "-model", "ssb", "-cubes", "ssb_cube1,ssb_cube2", "-action", "create" });

    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeInstance cube1 = cubeManager.getCube("ssb_cube1");
    CubeInstance cube2 = cubeManager.getCube("ssb_cube2");

    // 2012-01-01,2012-01-03
    cubeManager.appendSegment(cube1, new SegmentRange.TSRange(1325376000000L, 1325548800000L));
    // 2012-01-02,2012-01-04
    cubeManager.appendSegment(cube2, new SegmentRange.TSRange(1325462400000L, 1325635200000L));

    HybridCubeCLI.main(new String[] { "-name", "ssb_hybrid", "-project", "default", "-model", "ssb", "-cubes", "ssb_cube1,ssb_cube2", "-action", "update" });
}

Example 2

Source File: ColumnarSplitReader.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (!(split instanceof FileSplit)) {
        throw new IllegalArgumentException("Only compatible with FileSplits.");
    } else {
        logger.debug("CFG_Cube_Name: " + BatchConstants.CFG_CUBE_NAME);
        cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT);
        segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase(Locale.ROOT);

        KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
        CubeManager cubeManager = CubeManager.getInstance(config);
        cube = cubeManager.getCube(cubeName);
        cubeDesc = cube.getDescriptor();
        cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
    }
}

Example 3

Source File: DeployCoprocessorCLI.java From kylin with Apache License 2.0

6 votes

private static List<String> filterByProjects(List<String> allTableNames, List<String> projectNames) {
    ProjectManager projectManager = ProjectManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

    List<String> result = Lists.newArrayList();
    for (String p : projectNames) {
        p = p.trim();
        if (p.endsWith(",")) {
            p = p.substring(0, p.length() - 1);
        }

        ProjectInstance projectInstance = projectManager.getProject(p);
        List<RealizationEntry> cubeList = projectInstance.getRealizationEntries(RealizationType.CUBE);
        for (RealizationEntry cube : cubeList) {
            CubeInstance cubeInstance = cubeManager.getCube(cube.getRealization());
            for (CubeSegment segment : cubeInstance.getSegments()) {
                String tableName = segment.getStorageLocationIdentifier();
                if (allTableNames.contains(tableName)) {
                    result.add(tableName);
                }
            }
        }
    }
    return result;
}

Example 4

Source File: MergeCuboidMapper.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.bindCurrentConfiguration(context.getConfiguration());

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube = cubeManager.getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment mergedCubeSegment = cube.getSegmentById(segmentID);

    // decide which source segment
    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(mergedCubeSegment)
            .getOutputFormat();
    CubeSegment sourceCubeSegment = outputFormat.findSourceSegment(fileSplit, cube);
    reEncoder = new SegmentReEncoder(cubeDesc, sourceCubeSegment, mergedCubeSegment, config);
}

Example 5

Source File: UpdateCubeInfoAfterCheckpointStep.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()));

    Set<Long> recommendCuboids = cube.getCuboidsRecommend();
    try {
        List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING);
        Map<Long, Long> recommendCuboidsWithStats = CuboidStatsReaderUtil
                .readCuboidStatsFromSegments(recommendCuboids, newSegments);
        if (recommendCuboidsWithStats == null) {
            throw new RuntimeException("Fail to get statistics info for recommended cuboids after optimization!!!");
        }
        cubeManager.promoteCheckpointOptimizeSegments(cube, recommendCuboidsWithStats,
                newSegments.toArray(new CubeSegment[newSegments.size()]));
        return new ExecuteResult();
    } catch (Exception e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}

Example 6

Source File: BaseCuboidMapperTest.java From Kylin with Apache License 2.0

5 votes

@Test
public void testMapperWithNull() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0");
}

Example 7

Source File: ProjectManagerTest.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Test
public void testProjectsDrop() throws IOException {
    ProjectManager prjMgr = ProjectManager.getInstance(getTestConfig());
    CubeManager cubeMgr = CubeManager.getInstance(getTestConfig());

    CubeInstance cube = cubeMgr.getCube("ci_left_join_cube");
    assertTrue(prjMgr.getRealizationsByTable("default", "default.test_kylin_fact").contains(cube));
    assertTrue(prjMgr.listAllRealizations("default").contains(cube));

    cubeMgr.dropCube(cube.getName(), false);

    assertTrue(!prjMgr.getRealizationsByTable("default", "default.test_kylin_fact").contains(cube));
    assertTrue(!prjMgr.listAllRealizations("default").contains(cube));
}

Example 8

Source File: ColumnToRowJob.java From kylin with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);

        parseOptions(options, args);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(kylinConfig);
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        job.setMapperClass(ColumnToRowMapper.class);
        job.setInputFormatClass(ColumnarSplitDataInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(ColumnToRowReducer.class);
        job.setNumReduceTasks(calReducerNum(input));
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize());
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);

        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
        attachSegmentMetadataWithDict(segment, job.getConfiguration());
        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

Example 9

Source File: LocalWithSparkSessionTest.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

protected void cleanupSegments(String cubeName) throws IOException {
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(cubeName);
    cubeMgr.updateCubeDropSegments(cube, cube.getSegments());
}

Example 10

Source File: SignatureCalculatorTest.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Test
public void testRealizationSetCalculator() throws IOException {
    KylinConfig config = KylinConfig.createKylinConfig(getTestConfig());
    Map<String, String> overrides = Maps.newHashMap();
    overrides.put("kylin.query.signature-class", "org.apache.kylin.rest.signature.RealizationSetCalculator");

    ProjectInstance projectInstance = ProjectManager.getInstance(config).getProject(projectName);
    projectInstance.setConfig(KylinConfigExt.createInstance(config, overrides));

    HybridManager hybridManager = HybridManager.getInstance(config);
    HybridInstance hybrid1 = hybridManager.getHybridInstance("test_kylin_hybrid_ready");

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube1 = cubeManager.getCube("test_kylin_cube_with_slr_ready_2_segments");
    CubeInstance cube2 = cubeManager.getCube("test_kylin_cube_without_slr_ready");
    CubeInstance cube2Clone = cloneCubeInstance(cubeManager, cube2, cube2.getName() + "_clone");

    //Related cubes:
    // - test_kylin_cube_with_slr_ready
    // - test_kylin_cube_with_slr_ready_2_segments
    // - test_kylin_cube_without_slr_ready
    String cubes = hybrid1.getCanonicalName() + "," + cube2Clone.getCanonicalName();

    SQLResponse sqlResponse = new SQLResponse();
    sqlResponse.setCube(cubes);

    String signature = SQLResponseSignatureUtil.createSignature(config, sqlResponse, projectName);
    sqlResponse.setSignature(signature);

    Assert.assertTrue(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));

    {//Test the influence of related cubes status change
        cube1 = cubeManager.updateCubeStatus(cube1, RealizationStatusEnum.DISABLED);
        Assert.assertFalse(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));

        cube1 = cubeManager.updateCubeStatus(cube1, RealizationStatusEnum.READY);
        Assert.assertTrue(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));
    }

    {//Test the influence of segment changes
        cube2Clone = cubeManager.updateCubeDropSegments(cube2Clone, cube2Clone.getSegments().get(0));
        Assert.assertFalse(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName));
    }
}

Example 11

Source File: BulkLoadJob.java From Kylin with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        options.addOption(OPTION_CUBE_NAME);
        parseOptions(options, args);

        String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase();
        // e.g
        // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/
        // end with "/"
        String input = getOptionValue(OPTION_INPUT_PATH);

        Configuration conf = HBaseConfiguration.create(getConf());
        FileSystem fs = FileSystem.get(conf);

        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        KylinConfig config = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(config);
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeDesc cubeDesc = cube.getDescriptor();
        FsPermission permission = new FsPermission((short) 0777);
        for (HBaseColumnFamilyDesc cf : cubeDesc.getHBaseMapping().getColumnFamily()) {
            String cfName = cf.getName();
            fs.setPermission(new Path(input + cfName), permission);
        }

        String[] newArgs = new String[2];
        newArgs[0] = input;
        newArgs[1] = tableName;

        log.debug("Start to run LoadIncrementalHFiles");
        int ret = ToolRunner.run(new LoadIncrementalHFiles(conf), newArgs);
        log.debug("End to run LoadIncrementalHFiles");
        return ret;
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }
}

Example 12

Source File: MergeStatisticsWithOldStep.java From kylin with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}

Example 13

Source File: FilterRecommendCuboidDataJob.java From kylin with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment optSegment = cube.getSegmentById(segmentID);
        CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);

        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        // Mapper
        job.setMapperClass(FilterRecommendCuboidDataMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        // Input
        job.setInputFormatClass(SequenceFileInputFormat.class);
        FileInputFormat.setInputPaths(job, input);

        // Reducer
        ConvergeCuboidDataUtil.setupReducer(job, originalSegment, output);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        // add metadata to distributed cache
        attachSegmentMetadata(originalSegment, job.getConfiguration(), false, false);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

Example 14

Source File: UpdateDictionaryStep.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

Example 15

Source File: CubeHFileJob.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_PARTITION_FILE_PATH);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        parseOptions(options, args);

        Path partitionFilePath = new Path(getOptionValue(OPTION_PARTITION_FILE_PATH));

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

        CubeInstance cube = cubeMgr.getCube(cubeName);

        // use current hbase configuration
        Configuration configuration = new Configuration(HBaseConnection.getCurrentHBaseConfiguration());
        String[] allServices = getAllServices(configuration);
        merge(configuration, getConf());
        configuration.setStrings(DFSConfigKeys.DFS_NAMESERVICES, allServices);

        job = Job.getInstance(configuration, getOptionValue(OPTION_JOB_NAME));

        setJobClasspath(job, cube.getConfig());

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);
        FileOutputFormat.setOutputPath(job, output);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        // add metadata to distributed cache
        attachCubeMetadata(cube, job.getConfiguration());

        HTable htable = new HTable(configuration, getOptionValue(OPTION_HTABLE_NAME));

        // Automatic config !
        HFileOutputFormat3.configureIncrementalLoad(job, htable);
        reconfigurePartitions(configuration, partitionFilePath);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(CubeHFileMapper.class);
        job.setReducerClass(KeyValueReducer.class);
        job.setMapOutputKeyClass(RowKeyWritable.class);
        job.setMapOutputValueClass(KeyValue.class);
        job.setSortComparatorClass(RowKeyWritable.RowKeyComparator.class);

        // set block replication to 3 for hfiles
        configuration.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3");

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

Example 16

Source File: MergeDictJob.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);

        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String jobName = getOptionValue(OPTION_JOB_NAME);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClasspath(job, cube.getConfig());
        job.setJobName(jobName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        logger.info("MergeDictReducer output path: {}", output);

        // Mapper
        job.setMapperClass(MergeDictMapper.class);
        job.setInputFormatClass(ColumnarSplitDictInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //Reducer
        job.setReducerClass(MergeDictReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        attachCubeMetadata(cube, job.getConfiguration());

        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        logger.error("job {} failed. ", job.getJobName(), e);
        throw e;
    }
}

Example 17

Source File: UHCDictionaryJob.java From kylin with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_CUBING_JOB_ID);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_INPUT_PATH);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String job_id = getOptionValue(OPTION_CUBING_JOB_ID);
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));

        //add metadata to distributed cache
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        attachCubeMetadata(cube, job.getConfiguration());

        List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns();
        int reducerCount = uhcColumns.size();

        //Note! handle uhc columns is null.
        boolean hasUHCValue = false;
        for (TblColRef tblColRef : uhcColumns) {
            Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
            if (HadoopUtil.getFileSystem(path).exists(path)) {
                FileInputFormat.addInputPath(job, path);
                FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class);
                hasUHCValue = true;
            }
        }

        if (!hasUHCValue) {
            isSkipped = true;
            return 0;
        }

        setJobClasspath(job, cube.getConfig());
        setupMapper();
        setupReducer(output, reducerCount);

        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, job_id);
        job.getConfiguration().set(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR, KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory());
        job.getConfiguration().set(BatchConstants.CFG_MAPRED_OUTPUT_COMPRESS, "false");

        //8G memory is enough for all global dict, because the input is sequential and we handle global dict slice by slice
        job.getConfiguration().set("mapreduce.reduce.memory.mb", "8500");
        job.getConfiguration().set("mapred.reduce.child.java.opts", "-Xmx8g");
        //Copying global dict to working dir in GlobalDictHDFSStore maybe elapsed a long time (Maybe we could improve it)
        //Waiting the global dict lock maybe also take a long time.
        //So we set 8 hours here
        job.getConfiguration().set("mapreduce.task.timeout", "28800000");

        //allow user specially set config for uhc step
        for (Map.Entry<String, String> entry : cube.getConfig().getUHCMRConfigOverride().entrySet()) {
            job.getConfiguration().set(entry.getKey(), entry.getValue());
        }

        return waitForCompletion(job);
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

Example 18

Source File: RangeKeyDistributionJob.java From Kylin with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);

        parseOptions(options, args);

        // start job
        String jobName = getOptionValue(OPTION_JOB_NAME);
        job = Job.getInstance(getConf(), jobName);

        setJobClasspath(job);

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        FileOutputFormat.setOutputPath(job, output);
        // job.getConfiguration().set("dfs.block.size", "67108864");

        // Mapper
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(RangeKeyDistributionMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        // Reducer - only one
        job.setReducerClass(RangeKeyDistributionReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);

        this.deletePath(job.getConfiguration(), output);

        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        RealizationCapacity realizationCapacity = cube.getDescriptor().getModel().getCapacity();
        job.getConfiguration().set(BatchConstants.CUBE_CAPACITY, realizationCapacity.toString());

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }
}

Example 19

Source File: AbstractExecutable.java From kylin with Apache License 2.0

4 votes

public KylinConfig getCubeSpecificConfig() {
    String cubeName = getCubeName();
    CubeManager manager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
    CubeInstance cube = manager.getCube(cubeName);
    return cube.getConfig();
}

Example 20

Source File: KafkaFlatTableJob.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_SEGMENT_ID);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        String segmentId = getOptionValue(OPTION_SEGMENT_ID);

        // ----------------------------------------------------------------------------
        // add metadata to distributed cache
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);

        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentId);
        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        KafkaConfigManager kafkaConfigManager = KafkaConfigManager.getInstance(KylinConfig.getInstanceFromEnv());
        KafkaConfig kafkaConfig = kafkaConfigManager.getKafkaConfig(cube.getRootFactTable());
        String brokers = KafkaClient.getKafkaBrokers(kafkaConfig);
        String topic = kafkaConfig.getTopic();

        if (brokers == null || brokers.length() == 0 || topic == null) {
            throw new IllegalArgumentException("Invalid Kafka information, brokers " + brokers + ", topic " + topic);
        }

        JobEngineConfig jobEngineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
        job.getConfiguration().addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null)));
        KafkaConsumerProperties kafkaConsumerProperties = KafkaConsumerProperties.getInstanceFromEnv();
        job.getConfiguration().addResource(new Path(kafkaConsumerProperties.getKafkaConsumerHadoopJobConf()));
        job.getConfiguration().set(CONFIG_KAFKA_BROKERS, brokers);
        job.getConfiguration().set(CONFIG_KAFKA_TOPIC, topic);
        job.getConfiguration().set(CONFIG_KAFKA_TIMEOUT, String.valueOf(kafkaConfig.getTimeout()));
        job.getConfiguration().set(CONFIG_KAFKA_INPUT_FORMAT, "json");
        job.getConfiguration().set(CONFIG_KAFKA_PARSER_NAME, kafkaConfig.getParserName());
        job.getConfiguration().set(CONFIG_KAFKA_SPLIT_ROWS, String.valueOf(kafkaConfig.getSplitRows()));
        job.getConfiguration().set(CONFIG_KAFKA_CONSUMER_GROUP, cubeName); // use cubeName as consumer group name
        appendKafkaOverrideProperties(cube.getConfig(), job.getConfiguration());
        setupMapper(cube.getSegmentById(segmentId));
        job.setNumReduceTasks(0);
        FileOutputFormat.setOutputPath(job, output);
        FileOutputFormat.setCompressOutput(job, true);
        org.apache.log4j.Logger.getRootLogger().info("Output hdfs location: " + output);
        org.apache.log4j.Logger.getRootLogger().info("Output hdfs compression: " + true);
        job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

        attachCubeMetadata(cube, job.getConfiguration());
        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);

    } catch (Exception e) {
        logger.error("error in KafkaFlatTableJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }

}