Java Code Examples for org.apache.hadoop.fs.ContentSummary#getLength()

The following examples show how to use org.apache.hadoop.fs.ContentSummary#getLength() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnToRowJob.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private int calReducerNum(Path input) {
    try {
        long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
        FileSystem fs = FileSystem.get(job.getConfiguration());
        ContentSummary cs = fs.getContentSummary(input);
        long totalInputFileSize = cs.getLength();

        int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
        reducers = Math.max(1, reducers);
        reducers = Math.min(MAX_REDUCERS, reducers);
        logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
                MAX_REDUCERS, totalInputFileSize, reducers);
        return reducers;
    } catch (IOException e) {
        logger.error("error when calculate reducer number", e);
    }
    return 1;
}
 
Example 2
Source File: ColumnToRowJob.java    From kylin with Apache License 2.0 6 votes vote down vote up
private int calReducerNum(Path input) {
    try {
        long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
        FileSystem fs = FileSystem.get(job.getConfiguration());
        ContentSummary cs = fs.getContentSummary(input);
        long totalInputFileSize = cs.getLength();

        int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
        reducers = Math.max(1, reducers);
        reducers = Math.min(MAX_REDUCERS, reducers);
        logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
                MAX_REDUCERS, totalInputFileSize, reducers);
        return reducers;
    } catch (IOException e) {
        logger.error("error when calculate reducer number", e);
    }
    return 1;
}
 
Example 3
Source File: DataValidationInputFormat.java    From jumbune with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 *  Finds files inside directories recusively and add to  fileStatusList
 * @param job refers to JobContext that is being used to read the configurations of the job that ran
 * @param minSize refers to the minimum file block size.
 * @param maxSize refers to the maximum file block size.
 * @param splits refers  to a list of splits that are being generated.
 * @param fileStatusList list of FileStatus
 * @throws IOException Signals that an I/O exception has occurred.
 */
public void setData(JobContext job, long minSize, long maxSize,
		List<InputSplit> splits, List<FileStatus> fileStatusList) throws IOException {
	for(FileStatus file:fileStatusList) {
		if (file.isDirectory()) {
			Path dirPath = file.getPath();
			FileStatus [] fileArray = dirPath.getFileSystem(job.getConfiguration()).listStatus(dirPath);
			setData(job, minSize, maxSize, splits, Arrays.asList(fileArray));
		} else {
			//Checking whether file is empty or not
			Path path  = file.getPath();
			FileSystem fs = path.getFileSystem(job.getConfiguration());
			ContentSummary cs = fs.getContentSummary(path);
			if (cs.getLength() > 0) {
				generateSplits(job, minSize, maxSize, splits, file);	
			} 
	    }
	}
}
 
Example 4
Source File: LogicalPlanner.java    From incubator-tajo with Apache License 2.0 6 votes vote down vote up
private void updatePhysicalInfo(TableDesc desc) {
  if (desc.getPath() != null) {
    try {
      FileSystem fs = desc.getPath().getFileSystem(new Configuration());
      FileStatus status = fs.getFileStatus(desc.getPath());
      if (desc.getStats() != null && (status.isDirectory() || status.isFile())) {
        ContentSummary summary = fs.getContentSummary(desc.getPath());
        if (summary != null) {
          long volume = summary.getLength();
          desc.getStats().setNumBytes(volume);
        }
      }
    } catch (Throwable t) {
      LOG.warn(t);
    }
  }
}
 
Example 5
Source File: MRHiveDictUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}
 
Example 6
Source File: CreateFlatHiveTableStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    long length = contentSummary.getLength();
    return length;
}
 
Example 7
Source File: StorageCleanupJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void cleanUnusedHdfsFiles() throws IOException {

        UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(collector);

        if (collector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }

        long garbageBytes = 0;
        List<String> garbageList = new ArrayList<>();

        for (Pair<FileSystem, String> entry : collector.list) {
            FileSystem fs = entry.getKey();
            String path = entry.getValue();
            try {
                garbageList.add(path);
                ContentSummary sum = fs.getContentSummary(new Path(path));
                if (sum != null)
                    garbageBytes += sum.getLength();

                if (delete) {
                    logger.info("Deleting HDFS path " + path);
                    fs.delete(new Path(path), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + path);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + path, e);
            }
        }

        hdfsGarbageFileBytes = garbageBytes;
        hdfsGarbageFiles = garbageList;
    }
 
Example 8
Source File: MRHiveDictUtil.java    From kylin with Apache License 2.0 5 votes vote down vote up
private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}
 
Example 9
Source File: CreateFlatHiveTableStep.java    From kylin with Apache License 2.0 5 votes vote down vote up
private long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    long length = contentSummary.getLength();
    return length;
}
 
Example 10
Source File: StorageCleanupJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void cleanUnusedHdfsFiles() throws IOException {

        UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(collector);

        if (collector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }

        long garbageBytes = 0;
        List<String> garbageList = new ArrayList<>();

        for (Pair<FileSystem, String> entry : collector.list) {
            FileSystem fs = entry.getKey();
            String path = entry.getValue();
            try {
                garbageList.add(path);
                ContentSummary sum = fs.getContentSummary(new Path(path));
                if (sum != null)
                    garbageBytes += sum.getLength();

                if (delete) {
                    logger.info("Deleting HDFS path " + path);
                    fs.delete(new Path(path), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + path);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + path, e);
            }
        }

        hdfsGarbageFileBytes = garbageBytes;
        hdfsGarbageFiles = garbageList;
    }
 
Example 11
Source File: BlurIndexSimpleWriter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public long getOnDiskSize() throws IOException {
  Path hdfsDirPath = _shardContext.getHdfsDirPath();
  Configuration configuration = _tableContext.getConfiguration();
  FileSystem fileSystem = hdfsDirPath.getFileSystem(configuration);
  ContentSummary contentSummary = fileSystem.getContentSummary(hdfsDirPath);
  return contentSummary.getLength();
}
 
Example 12
Source File: Query.java    From tajo with Apache License 2.0 4 votes vote down vote up
public static long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
  FileSystem fs = tablePath.getFileSystem(systemConf);
  ContentSummary directorySummary = fs.getContentSummary(tablePath);
  return directorySummary.getLength();
}
 
Example 13
Source File: Query.java    From incubator-tajo with Apache License 2.0 4 votes vote down vote up
private long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
  FileSystem fs = tablePath.getFileSystem(systemConf);
  ContentSummary directorySummary = fs.getContentSummary(tablePath);
  return directorySummary.getLength();
}