Java Code Examples for org.apache.kylin.common.util.HadoopUtil

The following examples show how to use org.apache.kylin.common.util.HadoopUtil. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: kylin   Source File: CubeStatsWriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWrite() throws IOException {
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.defaultFS", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");
    conf.set("fs.file.impl.disable.cache", "true");

    final Path outputPath = new Path(getTmpFolderPath(), segmentId);

    System.out.println(outputPath);
    Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();

    Set<Long> allCuboids = cube.getDescriptor().getAllCuboids();
    for (Long cuboid : allCuboids) {
        cuboidHLLMap.put(cuboid, createMockHLLCounter());
    }
    CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
    assertTrue(new File(outputPath.toString(), BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME).exists());
}
 
Example 2
Source Project: kylin-on-parquet-v2   Source File: StreamingServer.java    License: Apache License 2.0 6 votes vote down vote up
public void flushToHDFS() throws IOException {
    logger.info("start to flush cube:{} segment:{} to hdfs:{}", segment.getCubeName(),
            segment.getSegmentName(), hdfsPath);
    final FileSystem fs = HadoopUtil.getFileSystem(hdfsPath);
    final String localPath = segment.getDataSegmentFolder().getPath();
    final Path remotePath = new Path(hdfsPath);
    if (fs.exists(remotePath)) {
        logger.info("the remote path:{} is already exist, skip copy data to remote", remotePath);
        return;
    }
    final Path remoteTempPath = new Path(hdfsPath + ".tmp");
    if (fs.exists(remoteTempPath)) {
        FileStatus sdst = fs.getFileStatus(remoteTempPath);
        if (sdst.isDirectory()) {
            logger.warn("target temp path: {} is an existed directory, try to delete it.", remoteTempPath);
            fs.delete(remoteTempPath, true);
            logger.warn("target temp path: {} is deleted.", remoteTempPath);
        }
    }
    fs.copyFromLocalFile(new Path(localPath), remoteTempPath);
    logger.info("data copy to remote temp path:{}", remoteTempPath);
    boolean renamed = fs.rename(remoteTempPath, remotePath);
    if (renamed) {
        logger.info("successfully rename the temp path to:{}", remotePath);
    }
}
 
Example 3
Source Project: kylin   Source File: ITMassInQueryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {

    ITKylinQueryTest.clean();
    ITKylinQueryTest.joinType = "left";
    ITKylinQueryTest.setupAll();

    fileSystem = HadoopUtil.getWorkingFileSystem();

    int sellerCount = 200;
    Random r = new Random();
    vipSellers = Sets.newHashSet();
    for (int i = 0; i < sellerCount; i++) {
        vipSellers.add(10000000L + r.nextInt(1500));
    }

    Path path = new Path("/tmp/vip_customers.txt");
    fileSystem.delete(path, false);
    FSDataOutputStream outputStream = fileSystem.create(path);
    org.apache.commons.io.IOUtils.write(StringUtils.join(vipSellers, "\n"), outputStream, Charset.defaultCharset());
    outputStream.close();

    System.out.println("The filter is " + vipSellers);
}
 
Example 4
Source Project: Kylin   Source File: IICLI.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	Configuration hconf = HadoopUtil.getCurrentConfiguration();
	IIManager mgr = IIManager.getInstance(KylinConfig.getInstanceFromEnv());

	String iiName = args[0];
	IIInstance ii = mgr.getII(iiName);

	String path = args[1];
	System.out.println("Reading from " + path + " ...");

	TableRecordInfo info = new TableRecordInfo(ii.getFirstSegment());
	IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest());
	int count = 0;
	for (Slice slice : codec.decodeKeyValue(readSequenceKVs(hconf, path))) {
		for (RawTableRecord rec : slice) {
			System.out.printf(new TableRecord(rec, info).toString());
			count++;
		}
	}
	System.out.println("Total " + count + " records");
}
 
Example 5
Source Project: kylin-on-parquet-v2   Source File: CubeStatsReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @param cuboidScheduler if it's null, part of it's functions will not be supported
 */
public CubeStatsReader(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, KylinConfig kylinConfig)
        throws IOException {
    ResourceStore store = ResourceStore.getStore(kylinConfig);
    String statsKey = cubeSegment.getStatisticsResourcePath();
    RawResource resource = store.getResource(statsKey);
    if (resource == null)
        throw new IllegalStateException("Missing resource at " + statsKey);

    File tmpSeqFile = writeTmpSeqFile(resource.content());
    Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath()));

    CubeStatsResult cubeStatsResult = new CubeStatsResult(path, kylinConfig.getCubeStatsHLLPrecision());
    tmpSeqFile.delete();

    this.seg = cubeSegment;
    this.cuboidScheduler = cuboidScheduler;
    this.samplingPercentage = cubeStatsResult.getPercentage();
    this.mapperNumberOfFirstBuild = cubeStatsResult.getMapperNumber();
    this.mapperOverlapRatioOfFirstBuild = cubeStatsResult.getMapperOverlapRatio();
    this.cuboidRowEstimatesHLL = cubeStatsResult.getCounterMap();
    this.sourceRowCount = cubeStatsResult.getSourceRecordCount();
}
 
Example 6
Source Project: kylin-on-parquet-v2   Source File: CubeStatsReader.java    License: Apache License 2.0 6 votes vote down vote up
public CubeStatsResult(Path path, int precision) throws IOException {
    Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
    Option seqInput = SequenceFile.Reader.file(path);
    try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() == -3) {
                sourceRecordCount = Bytes.toLong(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(precision);
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }
    }
}
 
Example 7
Source Project: kylin   Source File: StreamingServer.java    License: Apache License 2.0 6 votes vote down vote up
public void flushToHDFS() throws IOException {
    logger.info("start to flush cube:{} segment:{} to hdfs:{}", segment.getCubeName(),
            segment.getSegmentName(), hdfsPath);
    final FileSystem fs = HadoopUtil.getFileSystem(hdfsPath);
    final String localPath = segment.getDataSegmentFolder().getPath();
    final Path remotePath = new Path(hdfsPath);
    if (fs.exists(remotePath)) {
        logger.info("the remote path:{} is already exist, skip copy data to remote", remotePath);
        return;
    }
    final Path remoteTempPath = new Path(hdfsPath + ".tmp");
    if (fs.exists(remoteTempPath)) {
        FileStatus sdst = fs.getFileStatus(remoteTempPath);
        if (sdst.isDirectory()) {
            logger.warn("target temp path: {} is an existed directory, try to delete it.", remoteTempPath);
            fs.delete(remoteTempPath, true);
            logger.warn("target temp path: {} is deleted.", remoteTempPath);
        }
    }
    fs.copyFromLocalFile(new Path(localPath), remoteTempPath);
    logger.info("data copy to remote temp path:{}", remoteTempPath);
    boolean renamed = fs.rename(remoteTempPath, remotePath);
    if (renamed) {
        logger.info("successfully rename the temp path to:{}", remotePath);
    }
}
 
Example 8
Source Project: kylin   Source File: HDFSResourceStore.java    License: Apache License 2.0 6 votes vote down vote up
public HDFSResourceStore(KylinConfig kylinConfig, StorageURL metadataUrl) throws Exception {
    super(kylinConfig);
    Preconditions.checkState(HDFS_SCHEME.equals(metadataUrl.getScheme()));

    String path = metadataUrl.getParameter("path");
    if (path == null) {
        // missing path is not expected, but don't fail it
        path = kylinConfig.getHdfsWorkingDirectory(null) + "tmp_metadata";
        logger.warn("Missing path, fall back to {}. ", path);
    }

    fs = HadoopUtil.getFileSystem(path);
    Path metadataPath = new Path(path);
    if (!fs.exists(metadataPath)) {
        logger.warn("Path not exist in HDFS, create it: {}. ", path);
        createMetaFolder(metadataPath);
    }

    hdfsMetaPath = metadataPath;
    logger.info("hdfs meta path : {}", hdfsMetaPath);

}
 
Example 9
Source Project: kylin-on-parquet-v2   Source File: KylinHealthCheckJob.java    License: Apache License 2.0 6 votes vote down vote up
private void checkSegmentHDFSPath(List<CubeInstance> cubes) throws IOException {
    reporter.log("## Fix missing HDFS path of segments");
    FileSystem defaultFs = HadoopUtil.getWorkingFileSystem();
    for (CubeInstance cube : cubes) {
        for (CubeSegment segment : cube.getSegments()) {
            String jobUuid = segment.getLastBuildJobID();
            if (jobUuid != null && jobUuid.equals("") == false) {
                String path = JobBuilderSupport.getJobWorkingDir(config.getHdfsWorkingDirectory(), jobUuid);
                if (!defaultFs.exists(new Path(path))) {
                    reporter.log(
                            "Project: {} cube: {} segment: {} cube id data: {} don't exist and need to rebuild it",
                            cube.getProject(), cube.getName(), segment, path);
                    reporter.log(
                            "The rebuild url: -d '{\"startTime\":{}, \"endTime\":{}, \"buildType\":\"REFRESH\"}' /kylin/api/cubes/{}/build",
                            segment.getTSRange().start, segment.getTSRange().end, cube.getName());
                }
            }
        }
    }
}
 
Example 10
Source Project: Kylin   Source File: HiveTable.java    License: Apache License 2.0 6 votes vote down vote up
private String computeHDFSLocation(boolean needFilePath) throws IOException {

        String override = KylinConfig.getInstanceFromEnv().getOverrideHiveTableLocation(hiveTable);
        if (override != null) {
            logger.debug("Override hive table location " + hiveTable + " -- " + override);
            return override;
        }
        
        String hdfsDir = null;
        try {
            HiveClient hiveClient = new HiveClient();
            hdfsDir = hiveClient.getHiveTableLocation(database, hiveTable);
        } catch (Exception e) {
            e.printStackTrace();
            throw new IOException(e);
        }

        if (needFilePath) {
            FileSystem fs = HadoopUtil.getFileSystem(hdfsDir);
            FileStatus file = findOnlyFile(hdfsDir, fs);
            return file.getPath().toString();
        } else {
            return hdfsDir;
        }

    }
 
Example 11
Source Project: kylin-on-parquet-v2   Source File: CubeService.java    License: Apache License 2.0 6 votes vote down vote up
private void cleanSegmentStorage(List<CubeSegment> toRemoveSegs) throws IOException {
    if (!KylinConfig.getInstanceFromEnv().cleanStorageAfterDelOperation()) {
        return;
    }

    if (toRemoveSegs != null && !toRemoveSegs.isEmpty()) {
        List<String> toDropHTables = Lists.newArrayListWithCapacity(toRemoveSegs.size());
        List<String> toDelHDFSPaths = Lists.newArrayListWithCapacity(toRemoveSegs.size());
        for (CubeSegment seg : toRemoveSegs) {
            toDropHTables.add(seg.getStorageLocationIdentifier());
            toDelHDFSPaths.add(JobBuilderSupport.getJobWorkingDir(seg.getConfig().getHdfsWorkingDirectory(),
                    seg.getLastBuildJobID()));
        }

        StorageCleanUtil.dropHTables(new HBaseAdmin(HBaseConnection.getCurrentHBaseConfiguration()), toDropHTables);
        StorageCleanUtil.deleteHDFSPath(HadoopUtil.getWorkingFileSystem(), toDelHDFSPaths);
    }
}
 
Example 12
Source Project: kylin   Source File: HDFSPathGarbageCollectionStep.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    try {
        config = new JobEngineConfig(context.getConfig());
        List<String> toDeletePaths = getDeletePaths();
        dropHdfsPathOnCluster(toDeletePaths, HadoopUtil.getWorkingFileSystem());

        if (StringUtils.isNotEmpty(context.getConfig().getHBaseClusterFs())) {
            dropHdfsPathOnCluster(toDeletePaths, FileSystem.get(HBaseConnection.getCurrentHBaseConfiguration()));
        }
    } catch (IOException e) {
        logger.error("job:" + getId() + " execute finished with exception", e);
        output.append("\n").append(e.getLocalizedMessage());
    }

    return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
}
 
Example 13
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    String cubeId = getParam(MetadataConstants.P_CUBE_ID);
    String[] segments = StringUtils.split(getParam(MetadataConstants.P_SEGMENT_NAMES), ",");
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeInstance cube = CubeManager.getInstance(config).getCubeByUuid(cubeId);

    updateMetadataAfterMerge(cubeId);

    for (String segmentName : segments) {
        String path = config.getHdfsWorkingDirectory() + cube.getProject() + "/parquet/" + cube.getName() + "/" + segmentName;
        try {
            HadoopUtil.deletePath(HadoopUtil.getCurrentConfiguration(), new Path(path));
        } catch (IOException e) {
            throw new ExecuteException("Can not delete segment: " + segmentName + ", in cube: " + cube.getName());
        }
    }

    return ExecuteResult.createSucceed();
}
 
Example 14
Source Project: kylin-on-parquet-v2   Source File: GarbageCollectionStep.java    License: Apache License 2.0 5 votes vote down vote up
private void rmdirOnHDFS(List<String> paths) throws IOException {
    for (String path : paths) {
        Path externalDataPath = new Path(path);
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        if (fs.exists(externalDataPath)) {
            fs.delete(externalDataPath, true);
        }
    }
}
 
Example 15
Source Project: Kylin   Source File: HBaseResourceStoreTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testHBaseStoreWithLargeCell() throws Exception {
    String path = "/cube/_test_large_cell.json";
    String largeContent = "THIS_IS_A_LARGE_CELL";
    StringEntity content = new StringEntity(largeContent);
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    int origSize = config.getHBaseKeyValueSize();
    ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv());

    try {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1));

        store.deleteResource(path);

        store.putResource(path, content, StringEntity.serializer);
        assertTrue(store.exists(path));
        StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer);
        assertEquals(content, t);

        Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);
        assertTrue(fileSystem.exists(redirectPath));

        FSDataInputStream in = fileSystem.open(redirectPath);
        assertEquals(largeContent, in.readUTF());
        in.close();

        store.deleteResource(path);
    } finally {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize);
        store.deleteResource(path);
    }
}
 
Example 16
private static List<String> readLines(Path location, Configuration conf) throws Exception {
    FileSystem fileSystem = HadoopUtil.getWorkingFileSystem();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    FileStatus[] items = fileSystem.listStatus(location);
    if (items == null)
        return new ArrayList<String>();
    List<String> results = new ArrayList<String>();
    for (FileStatus item : items) {

        // ignoring files like _SUCCESS
        if (item.getPath().getName().startsWith("_")) {
            continue;
        }

        CompressionCodec codec = factory.getCodec(item.getPath());
        InputStream stream = null;

        // check if we have a compression codec we need to use
        if (codec != null) {
            stream = codec.createInputStream(fileSystem.open(item.getPath()));
        } else {
            stream = fileSystem.open(item.getPath());
        }

        StringWriter writer = new StringWriter();
        IOUtils.copy(stream, writer, "UTF-8");
        String raw = writer.toString();
        for (String str : StringUtil.split(raw, "\n")) {
            results.add(str);
        }
    }
    return results;
}
 
Example 17
Source Project: kylin   Source File: GarbageCollectionStep.java    License: Apache License 2.0 5 votes vote down vote up
private void rmdirOnHDFS(List<String> paths) throws IOException {
    for (String path : paths) {
        Path externalDataPath = new Path(path);
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        if (fs.exists(externalDataPath)) {
            fs.delete(externalDataPath, true);
        }
    }
}
 
Example 18
Source Project: kylin-on-parquet-v2   Source File: Coordinator.java    License: Apache License 2.0 5 votes vote down vote up
public void removeCubeHDFSFiles(String cubeName) {
    String segmentHDFSPath = HDFSUtil.getStreamingCubeFilePath(cubeName);
    try {
        FileSystem fs = HadoopUtil.getFileSystem(segmentHDFSPath);
        fs.delete(new Path(segmentHDFSPath), true);
    } catch (Exception e) {
        logger.error("error when remove hdfs file, hdfs path:{}", segmentHDFSPath);
    }
}
 
Example 19
Source Project: kylin-on-parquet-v2   Source File: StreamingCoordinator.java    License: Apache License 2.0 5 votes vote down vote up
public void removeCubeHDFSFiles(String cubeName) {
    String segmentHDFSPath = HDFSUtil.getStreamingCubeFilePath(cubeName);
    try {
        FileSystem fs = HadoopUtil.getFileSystem(segmentHDFSPath);
        fs.delete(new Path(segmentHDFSPath), true);
    } catch (Exception e) {
        logger.error("Error when remove hdfs file, hdfs path:{}", segmentHDFSPath);
    }
}
 
Example 20
Source Project: kylin   Source File: ITGlobalDictionaryBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
private void cleanup() {
    String BASE_DIR = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "/resources/GlobalDict"
            + dictionaryInfo.getResourceDir() + "/";
    Path basePath = new Path(BASE_DIR);
    try {
        HadoopUtil.getFileSystem(basePath).delete(basePath, true);
    } catch (IOException e) {
    }
}
 
Example 21
private void cleanup() {
    String BASE_DIR = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "/resources/GlobalDict"
            + dictionaryInfo.getResourceDir() + "/";
    Path basePath = new Path(BASE_DIR);
    try {
        HadoopUtil.getFileSystem(basePath).delete(basePath, true);
    } catch (IOException e) {
    }
}
 
Example 22
Source Project: kylin   Source File: RemoteDictionaryStore.java    License: Apache License 2.0 5 votes vote down vote up
static Connection getConnection() {
    Configuration conf = HBaseConfiguration.create(HadoopUtil.getCurrentConfiguration());
    try {
        return ConnectionFactory.createConnection(conf);
    } catch (IOException ioe) {
        throw new IllegalStateException("Cannot connect to HBase.", ioe);
    }
}
 
Example 23
Source Project: kylin   Source File: ReceiverClusterManager.java    License: Apache License 2.0 5 votes vote down vote up
private void removeHDFSFiles(String cubeName, String segmentName) {
    String segmentHDFSPath = HDFSUtil.getStreamingSegmentFilePath(cubeName, segmentName);
    try {
        FileSystem fs = HadoopUtil.getFileSystem(segmentHDFSPath);
        logger.info("Deleting segment data in HDFS {}", segmentHDFSPath);
        fs.delete(new Path(segmentHDFSPath), true);
    } catch (Exception e) {
        logger.error("error when remove hdfs file, hdfs path:{}", segmentHDFSPath);
    }
}
 
Example 24
Source Project: kylin   Source File: HBaseConnection.java    License: Apache License 2.0 5 votes vote down vote up
public static String makeQualifiedPathInHBaseCluster(String inPath) {
    Path path = new Path(inPath);
    path = Path.getPathWithoutSchemeAndAuthority(path);

    FileSystem fs = HadoopUtil.getFileSystem(path, getCurrentHBaseConfiguration()); // Must be HBase's FS, not working FS
    return fs.makeQualified(path).toString();
}
 
Example 25
private void initFs() {
    DebugFilesystem.clearOpenStreams();
    Configuration conf = new Configuration();
    conf.set("fs.file.impl", DebugFilesystem.class.getCanonicalName());
    conf.set("fs.file.impl.disable.cache", "true");
    HadoopUtil.setCurrentConfiguration(conf);
}
 
Example 26
Source Project: Kylin   Source File: InvertedIndexJob.java    License: Apache License 2.0 5 votes vote down vote up
private void setupMapper(String intermediateTable) throws IOException {

        String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
        HCatInputFormat.setInput(job, dbTableNames[0],
                dbTableNames[1]);
        
        job.setInputFormatClass(HCatInputFormat.class);

        job.setMapperClass(InvertedIndexMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(InvertedIndexPartitioner.class);
    }
 
Example 27
Source Project: kylin-on-parquet-v2   Source File: AbstractHadoopJob.java    License: Apache License 2.0 5 votes vote down vote up
public static int addInputDirs(String[] inputs, Job job) throws IOException {
    int ret = 0;//return number of added folders
    for (String inp : inputs) {
        inp = inp.trim();
        if (inp.endsWith("/*")) {
            inp = inp.substring(0, inp.length() - 2);
            FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
            Path path = new Path(inp);

            if (!exists(fs, path)) {
                logger.warn("Path not exist:" + path.toString());
                continue;
            }

            FileStatus[] fileStatuses = fs.listStatus(path);
            boolean hasDir = false;
            for (FileStatus stat : fileStatuses) {
                if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
                    hasDir = true;
                    ret += addInputDirs(new String[] { stat.getPath().toString() }, job);
                }
            }
            if (fileStatuses.length > 0 && !hasDir) {
                ret += addInputDirs(new String[] { path.toString() }, job);
            }
        } else {
            logger.trace("Add input " + inp);
            FileInputFormat.addInputPath(job, new Path(inp));
            ret++;
        }
    }
    return ret;
}
 
Example 28
Source Project: kylin   Source File: AbstractHadoopJob.java    License: Apache License 2.0 5 votes vote down vote up
public static int addInputDirs(String[] inputs, Job job) throws IOException {
    int ret = 0;//return number of added folders
    for (String inp : inputs) {
        inp = inp.trim();
        if (inp.endsWith("/*")) {
            inp = inp.substring(0, inp.length() - 2);
            FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
            Path path = new Path(inp);

            if (!exists(fs, path)) {
                logger.warn("Path not exist:" + path.toString());
                continue;
            }

            FileStatus[] fileStatuses = fs.listStatus(path);
            boolean hasDir = false;
            for (FileStatus stat : fileStatuses) {
                if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
                    hasDir = true;
                    ret += addInputDirs(new String[] { stat.getPath().toString() }, job);
                }
            }
            if (fileStatuses.length > 0 && !hasDir) {
                ret += addInputDirs(new String[] { path.toString() }, job);
            }
        } else {
            logger.trace("Add input " + inp);
            FileInputFormat.addInputPath(job, new Path(inp));
            ret++;
        }
    }
    return ret;
}
 
Example 29
Source Project: Kylin   Source File: CubeService.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Generate cardinality for table This will trigger a hadoop job
 * The result will be merged into table exd info
 *
 * @param tableName
 */
public void calculateCardinality(String tableName, String submitter) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    tableName = dbTableName[0] + "." + dbTableName[1];
    TableDesc table = getMetadataManager().getTableDesc(tableName);
    final Map<String, String> tableExd = getMetadataManager().getTableDescExd(tableName);
    if (tableExd == null || table == null) {
        IllegalArgumentException e = new IllegalArgumentException("Cannot find table descirptor " + tableName);
        logger.error("Cannot find table descirptor " + tableName, e);
        throw e;
    }

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    job.setName("Hive Column Cardinality calculation for table '" + tableName + "'");
    job.setSubmitter(submitter);

    String outPath = HiveColumnCardinalityJob.OUTPUT_PATH + "/" + tableName;
    String param = "-table " + tableName + " -output " + outPath;

    HadoopShellExecutable step1 = new HadoopShellExecutable();

    step1.setJobClass(HiveColumnCardinalityJob.class);
    step1.setJobParams(param);

    job.addTask(step1);

    HadoopShellExecutable step2 = new HadoopShellExecutable();

    step2.setJobClass(HiveColumnCardinalityUpdateJob.class);
    step2.setJobParams(param);
    job.addTask(step2);

    getExecutableManager().addJob(job);
}
 
Example 30
Source Project: kylin-on-parquet-v2   Source File: CubingJob.java    License: Apache License 2.0 5 votes vote down vote up
private double getRealSizeByLevel(String rootPath, int level) {
    try {
        String levelPath = JobBuilderSupport.getCuboidOutputPathsByLevel(rootPath, level);
        FileSystem fs = HadoopUtil.getFileSystem(levelPath);
        return fs.getContentSummary(new Path(levelPath)).getLength() / (1024L * 1024L);
    } catch (Exception e) {
        logger.warn("get level real size failed." + e);
        return 0L;
    }
}