Java Code Examples for org.apache.kylin.common.util.HadoopUtil#getCurrentConfiguration()

The following examples show how to use org.apache.kylin.common.util.HadoopUtil#getCurrentConfiguration() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveToBaseCuboidMapperPerformanceTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
    Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);

    mapper.doSetup(context);

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    while (reader.next(key, value)) {
        mapper.map(key, value, context);
    }

    reader.close();
}
 
Example 2
Source File: CubeStatsWriterTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Test
public void testWrite() throws IOException {
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.defaultFS", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");
    conf.set("fs.file.impl.disable.cache", "true");

    final Path outputPath = new Path(getTmpFolderPath(), segmentId);

    System.out.println(outputPath);
    Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();

    Set<Long> allCuboids = cube.getDescriptor().getAllCuboids();
    for (Long cuboid : allCuboids) {
        cuboidHLLMap.put(cuboid, createMockHLLCounter());
    }
    CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
    assertTrue(new File(outputPath.toString(), BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME).exists());
}
 
Example 3
Source File: HBaseResourceStore.java    From Kylin with Apache License 2.0 6 votes vote down vote up
@Override
protected InputStream getResourceImpl(String resPath) throws IOException {
    Result r = getByScan(resPath, B_FAMILY, B_COLUMN);
    if (r == null)
        return null;

    byte[] value = r.getValue(B_FAMILY, B_COLUMN);
    if (value.length == 0) {
        Path redirectPath = bigCellHDFSPath(resPath);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);

        return fileSystem.open(redirectPath);
    } else {
        return new ByteArrayInputStream(value);
    }
}
 
Example 4
Source File: CubeStatsReader.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public CubeStatsResult(Path path, int precision) throws IOException {
    Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
    Option seqInput = SequenceFile.Reader.file(path);
    try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() == -3) {
                sourceRecordCount = Bytes.toLong(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(precision);
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }
    }
}
 
Example 5
Source File: FactDistinctColumnsReducerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteCuboidStatistics() throws IOException {

    final Configuration conf = HadoopUtil.getCurrentConfiguration();
    File tmp = File.createTempFile("cuboidstatistics", "");
    final Path outputPath = new Path(tmp.getParent().toString() + File.separator + RandomUtil.randomUUID().toString());
    if (!FileSystem.getLocal(conf).exists(outputPath)) {
        //            FileSystem.getLocal(conf).create(outputPath);
    }

    System.out.println(outputPath);
    Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
    CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
    FileSystem.getLocal(conf).delete(outputPath, true);

}
 
Example 6
Source File: IICLI.java    From Kylin with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	Configuration hconf = HadoopUtil.getCurrentConfiguration();
	IIManager mgr = IIManager.getInstance(KylinConfig.getInstanceFromEnv());

	String iiName = args[0];
	IIInstance ii = mgr.getII(iiName);

	String path = args[1];
	System.out.println("Reading from " + path + " ...");

	TableRecordInfo info = new TableRecordInfo(ii.getFirstSegment());
	IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest());
	int count = 0;
	for (Slice slice : codec.decodeKeyValue(readSequenceKVs(hconf, path))) {
		for (RawTableRecord rec : slice) {
			System.out.printf(new TableRecord(rec, info).toString());
			count++;
		}
	}
	System.out.println("Total " + count + " records");
}
 
Example 7
Source File: HBaseResourceStore.java    From Kylin with Apache License 2.0 6 votes vote down vote up
private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table) throws IOException {
    Path redirectPath = bigCellHDFSPath(resPath);
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    if (fileSystem.exists(redirectPath)) {
        fileSystem.delete(redirectPath, true);
    }

    FSDataOutputStream out = fileSystem.create(redirectPath);

    try {
        out.write(largeColumn);
    } finally {
        IOUtils.closeQuietly(out);
    }

    return redirectPath;
}
 
Example 8
Source File: AppendTrieDictionaryTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void convertIndexToOldFormat(String baseDir) throws IOException {
    Path basePath = new Path(baseDir);
    FileSystem fs = HadoopUtil.getFileSystem(basePath);

    GlobalDictHDFSStore store = new GlobalDictHDFSStore(baseDir);
    Long[] versions = store.listAllVersions();
    GlobalDictMetadata metadata = store.getMetadata(versions[versions.length - 1]);

    //convert v2 index to v1 index
    Path versionPath = store.getVersionDir(versions[versions.length - 1]);
    Path v2IndexFile = new Path(versionPath, V2_INDEX_NAME);

    fs.delete(v2IndexFile, true);
    GlobalDictHDFSStore.IndexFormat indexFormatV1 = new GlobalDictHDFSStore.IndexFormatV1(fs,
            HadoopUtil.getCurrentConfiguration());
    indexFormatV1.writeIndexFile(versionPath, metadata);

    //convert v2 fileName format to v1 fileName format
    for (Map.Entry<AppendDictSliceKey, String> entry : metadata.sliceFileMap.entrySet()) {
        fs.rename(new Path(versionPath, entry.getValue()), new Path(versionPath, "cached_" + entry.getKey()));
    }
}
 
Example 9
Source File: NDCuboidJobTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws Exception {
    conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.default.name", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");

    // for local runner out-of-memory issue
    conf.set("mapreduce.task.io.sort.mb", "10");

    createTestMetadata();
}
 
Example 10
Source File: DeployCoprocessorCLI.java    From Kylin with Apache License 2.0 5 votes vote down vote up
private static void initHTableCoprocessor(HTableDescriptor desc) throws IOException {
    KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    String localCoprocessorJar = kylinConfig.getCoprocessorLocalJar();
    Path hdfsCoprocessorJar = DeployCoprocessorCLI.uploadCoprocessorJar(localCoprocessorJar, fileSystem, null);

    DeployCoprocessorCLI.addCoprocessorOnHTable(desc, hdfsCoprocessorJar);
}
 
Example 11
Source File: NDCuboidJobTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws Exception {
    conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.default.name", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");

    // for local runner out-of-memory issue
    conf.set("mapreduce.task.io.sort.mb", "10");

    createTestMetadata();
}
 
Example 12
Source File: MergeCuboidJobTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws Exception {
    conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.default.name", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");

    // for local runner out-of-memory issue
    conf.set("mapreduce.task.io.sort.mb", "10");
    createTestMetadata();
}
 
Example 13
Source File: HBaseResourceStoreTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testHBaseStoreWithLargeCell() throws Exception {
    String path = "/cube/_test_large_cell.json";
    String largeContent = "THIS_IS_A_LARGE_CELL";
    StringEntity content = new StringEntity(largeContent);
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    int origSize = config.getHBaseKeyValueSize();
    ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv());

    try {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1));

        store.deleteResource(path);

        store.putResource(path, content, StringEntity.serializer);
        assertTrue(store.exists(path));
        StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer);
        assertEquals(content, t);

        Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);
        assertTrue(fileSystem.exists(redirectPath));

        FSDataInputStream in = fileSystem.open(redirectPath);
        assertEquals(largeContent, in.readUTF());
        in.close();

        store.deleteResource(path);
    } finally {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize);
        store.deleteResource(path);
    }
}
 
Example 14
Source File: AbstractHadoopJob.java    From kylin with Apache License 2.0 4 votes vote down vote up
public AbstractHadoopJob() {
    super(HadoopUtil.getCurrentConfiguration());
}
 
Example 15
Source File: MrJobInfoExtractor.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private void extractRestCheckUrl() {
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    yarnMasterUrlBase = HadoopConfExtractor.extractYarnMasterUrl(conf);
    jobHistoryUrlBase = HadoopConfExtractor.extractJobHistoryUrl(yarnMasterUrlBase, conf);
    logger.info("job history url base: " + jobHistoryUrlBase);
}
 
Example 16
Source File: MergeStatisticsWithOldStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}
 
Example 17
Source File: UpdateDictionaryStep.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}
 
Example 18
Source File: CubeHFileMapper2Test.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
@Test
public void testBasic() throws Exception {

    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    Context context = MockupMapContext.create(hconf, cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.doSetup(context);

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0);
}
 
Example 19
Source File: CubeHFileMapper2Test.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Test
public void testBasic() throws Exception {

    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    Context context = MockupMapContext.create(hconf, cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.doSetup(context);

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0);
}
 
Example 20
Source File: GlobalDictHDFSStore.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public GlobalDictHDFSStore(String baseDir) throws IOException {
    super(baseDir);
    this.basePath = new Path(baseDir);
    this.conf = HadoopUtil.getCurrentConfiguration();
    this.fileSystem = HadoopUtil.getFileSystem(baseDir);
}