Java Code Examples for org.apache.hadoop.fs.FileSystem#getContentSummary()

The following examples show how to use org.apache.hadoop.fs.FileSystem#getContentSummary() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUnionQuery.java    From tajo with Apache License 2.0 6 votes vote down vote up
private void verifyResultStats(Optional<TajoResultSetBase[]> existing, long numRows) throws Exception {
  assertTrue(existing.isPresent());

  // Get TableStats using TajoResultSetBase.
  TajoResultSetBase[] resultSet = existing.get();
  QueryId qid = resultSet[0].getQueryId();
  QueryInfo queryInfo = testingCluster.getMaster().getContext().getQueryJobManager().getFinishedQuery(qid);
  TableDesc desc = queryInfo.getResultDesc();
  TableStats stats = desc.getStats();

  // Compare specified number of rows to the number of rows in TableStats.
  assertEquals(numRows, stats.getNumRows().longValue());

  // Compare the volume number of directRaw to the number of rows in TableStats.
  FileSystem fs = FileSystem.get(conf);
  Path path = new Path(desc.getUri());
  assertTrue(fs.exists(path));
  ContentSummary summary = fs.getContentSummary(path);
  assertEquals(summary.getLength(), stats.getNumBytes().longValue());

  closeResultSets(resultSet);
}
 
Example 2
Source File: BaseTestHttpFSWith.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void testContentSummary() throws Exception {
  FileSystem fs = FileSystem.get(getProxiedFSConf());
  Path path = new Path(getProxiedFSTestDir(), "foo.txt");
  OutputStream os = fs.create(path);
  os.write(1);
  os.close();
  ContentSummary hdfsContentSummary = fs.getContentSummary(path);
  fs.close();
  fs = getHttpFSFileSystem();
  ContentSummary httpContentSummary = fs.getContentSummary(path);
  fs.close();
  Assert.assertEquals(httpContentSummary.getDirectoryCount(), hdfsContentSummary.getDirectoryCount());
  Assert.assertEquals(httpContentSummary.getFileCount(), hdfsContentSummary.getFileCount());
  Assert.assertEquals(httpContentSummary.getLength(), hdfsContentSummary.getLength());
  Assert.assertEquals(httpContentSummary.getQuota(), hdfsContentSummary.getQuota());
  Assert.assertEquals(httpContentSummary.getSpaceConsumed(), hdfsContentSummary.getSpaceConsumed());
  Assert.assertEquals(httpContentSummary.getSpaceQuota(), hdfsContentSummary.getSpaceQuota());
}
 
Example 3
Source File: DistributedCacheTestUtil.java    From pentaho-hadoop-shims with Apache License 2.0 6 votes vote down vote up
/**
 * Utility to attempt to stage a file to HDFS for use with Distributed Cache.
 *
 * @param ch                Distributed Cache Helper
 * @param source            File or directory to stage
 * @param fs                FileSystem to stage to
 * @param root              Root directory to clean up when this test is complete
 * @param dest              Destination path to stage to
 * @param expectedFileCount Expected number of files to exist in the destination once staged
 * @param expectedDirCount  Expected number of directories to exist in the destiation once staged
 * @throws Exception
 */
static void stageForCacheTester( DistributedCacheUtilImpl ch, FileObject source, FileSystem fs, Path root, Path dest,
                                 int expectedFileCount, int expectedDirCount ) throws Exception {
  try {
    ch.stageForCache( source, fs, dest, true );

    assertTrue( fs.exists( dest ) );
    ContentSummary cs = fs.getContentSummary( dest );
    assertEquals( expectedFileCount, cs.getFileCount() );
    assertEquals( expectedDirCount, cs.getDirectoryCount() );
    assertEquals( FsPermission.createImmutable( (short) 0755 ), fs.getFileStatus( dest ).getPermission() );
  } finally {
    // Clean up after ourself
    if ( !fs.delete( root, true ) ) {
      log.logError( "error deleting FileSystem temp dir " + root );
    }
  }
}
 
Example 4
Source File: ColumnToRowJob.java    From kylin with Apache License 2.0 6 votes vote down vote up
private int calReducerNum(Path input) {
    try {
        long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
        FileSystem fs = FileSystem.get(job.getConfiguration());
        ContentSummary cs = fs.getContentSummary(input);
        long totalInputFileSize = cs.getLength();

        int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
        reducers = Math.max(1, reducers);
        reducers = Math.min(MAX_REDUCERS, reducers);
        logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
                MAX_REDUCERS, totalInputFileSize, reducers);
        return reducers;
    } catch (IOException e) {
        logger.error("error when calculate reducer number", e);
    }
    return 1;
}
 
Example 5
Source File: DataValidationInputFormat.java    From jumbune with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 *  Finds files inside directories recusively and add to  fileStatusList
 * @param job refers to JobContext that is being used to read the configurations of the job that ran
 * @param minSize refers to the minimum file block size.
 * @param maxSize refers to the maximum file block size.
 * @param splits refers  to a list of splits that are being generated.
 * @param fileStatusList list of FileStatus
 * @throws IOException Signals that an I/O exception has occurred.
 */
public void setData(JobContext job, long minSize, long maxSize,
		List<InputSplit> splits, List<FileStatus> fileStatusList) throws IOException {
	for(FileStatus file:fileStatusList) {
		if (file.isDirectory()) {
			Path dirPath = file.getPath();
			FileStatus [] fileArray = dirPath.getFileSystem(job.getConfiguration()).listStatus(dirPath);
			setData(job, minSize, maxSize, splits, Arrays.asList(fileArray));
		} else {
			//Checking whether file is empty or not
			Path path  = file.getPath();
			FileSystem fs = path.getFileSystem(job.getConfiguration());
			ContentSummary cs = fs.getContentSummary(path);
			if (cs.getLength() > 0) {
				generateSplits(job, minSize, maxSize, splits, file);	
			} 
	    }
	}
}
 
Example 6
Source File: BaseTestHttpFSWith.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void testContentSummary() throws Exception {
  FileSystem fs = FileSystem.get(getProxiedFSConf());
  Path path = new Path(getProxiedFSTestDir(), "foo.txt");
  OutputStream os = fs.create(path);
  os.write(1);
  os.close();
  ContentSummary hdfsContentSummary = fs.getContentSummary(path);
  fs.close();
  fs = getHttpFSFileSystem();
  ContentSummary httpContentSummary = fs.getContentSummary(path);
  fs.close();
  Assert.assertEquals(httpContentSummary.getDirectoryCount(), hdfsContentSummary.getDirectoryCount());
  Assert.assertEquals(httpContentSummary.getFileCount(), hdfsContentSummary.getFileCount());
  Assert.assertEquals(httpContentSummary.getLength(), hdfsContentSummary.getLength());
  Assert.assertEquals(httpContentSummary.getQuota(), hdfsContentSummary.getQuota());
  Assert.assertEquals(httpContentSummary.getSpaceConsumed(), hdfsContentSummary.getSpaceConsumed());
  Assert.assertEquals(httpContentSummary.getSpaceQuota(), hdfsContentSummary.getSpaceQuota());
}
 
Example 7
Source File: DistributedCacheUtilImplOSDependentTest.java    From pentaho-hadoop-shims with Apache License 2.0 6 votes vote down vote up
@Test
public void stagePluginsForCache() throws Exception {
  DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl();

  Configuration conf = new Configuration();
  FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf );

  Path pluginsDir = new Path( "bin/test/plugins-installation-dir" );

  FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent();

  try {
    ch.stagePluginsForCache( fs, pluginsDir, "bin/test/sample-folder" );
    Path pluginInstallPath = new Path( pluginsDir, "bin/test/sample-folder" );
    assertTrue( fs.exists( pluginInstallPath ) );
    ContentSummary summary = fs.getContentSummary( pluginInstallPath );
    assertEquals( 6, summary.getFileCount() );
    assertEquals( 9, summary.getDirectoryCount() );
  } finally {
    pluginDir.delete( new AllFileSelector() );
    fs.delete( pluginsDir, true );
  }
}
 
Example 8
Source File: LogicalPlanner.java    From incubator-tajo with Apache License 2.0 6 votes vote down vote up
private void updatePhysicalInfo(TableDesc desc) {
  if (desc.getPath() != null) {
    try {
      FileSystem fs = desc.getPath().getFileSystem(new Configuration());
      FileStatus status = fs.getFileStatus(desc.getPath());
      if (desc.getStats() != null && (status.isDirectory() || status.isFile())) {
        ContentSummary summary = fs.getContentSummary(desc.getPath());
        if (summary != null) {
          long volume = summary.getLength();
          desc.getStats().setNumBytes(volume);
        }
      }
    } catch (Throwable t) {
      LOG.warn(t);
    }
  }
}
 
Example 9
Source File: MRHiveDictUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}
 
Example 10
Source File: CreateFlatHiveTableStep.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    long length = contentSummary.getLength();
    return length;
}
 
Example 11
Source File: StorageCleanupJob.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private void cleanUnusedHdfsFiles() throws IOException {

        UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(collector);

        if (collector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }

        long garbageBytes = 0;
        List<String> garbageList = new ArrayList<>();

        for (Pair<FileSystem, String> entry : collector.list) {
            FileSystem fs = entry.getKey();
            String path = entry.getValue();
            try {
                garbageList.add(path);
                ContentSummary sum = fs.getContentSummary(new Path(path));
                if (sum != null)
                    garbageBytes += sum.getLength();

                if (delete) {
                    logger.info("Deleting HDFS path " + path);
                    fs.delete(new Path(path), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + path);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + path, e);
            }
        }

        hdfsGarbageFileBytes = garbageBytes;
        hdfsGarbageFiles = garbageList;
    }
 
Example 12
Source File: HdfsUtil.java    From spring-boot-tutorial with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
public ContentSummary getContentSummary(String path) throws Exception {
    if (!exists(path)) {
        throw new IOException(path + " not exists in hdfs");
    }

    FileSystem fileSystem = null;
    try {
        fileSystem = this.hdfsPool.borrowObject();
        return fileSystem.getContentSummary(new Path(path));
    } finally {
        if (fileSystem != null) { this.hdfsPool.returnObject(fileSystem); }
    }
}
 
Example 13
Source File: TestTargetDir.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
/** test target-dir contains imported files. */
public void testTargetDir() throws IOException {

  try {
    String targetDir = getWarehouseDir() + "/tempTargetDir";

    ArrayList args = getOutputArgv(true);
    args.add("--target-dir");
    args.add(targetDir);

    // delete target-dir if exists and recreate it
    FileSystem fs = FileSystem.get(getConf());
    Path outputPath = new Path(targetDir);
    if (fs.exists(outputPath)) {
      fs.delete(outputPath, true);
    }

    String[] argv = (String[]) args.toArray(new String[0]);
    runImport(argv);

    ContentSummary summ = fs.getContentSummary(outputPath);

    assertTrue("There's no new imported files in target-dir",
        summ.getFileCount() > 0);

  } catch (Exception e) {
    LOG.error("Got Exception: " + StringUtils.stringifyException(e));
    fail(e.toString());
  }
}
 
Example 14
Source File: StorageCleanupJob.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void cleanUnusedHdfsFiles() throws IOException {

        UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(collector);

        if (collector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }

        long garbageBytes = 0;
        List<String> garbageList = new ArrayList<>();

        for (Pair<FileSystem, String> entry : collector.list) {
            FileSystem fs = entry.getKey();
            String path = entry.getValue();
            try {
                garbageList.add(path);
                ContentSummary sum = fs.getContentSummary(new Path(path));
                if (sum != null)
                    garbageBytes += sum.getLength();

                if (delete) {
                    logger.info("Deleting HDFS path " + path);
                    fs.delete(new Path(path), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + path);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + path, e);
            }
        }

        hdfsGarbageFileBytes = garbageBytes;
        hdfsGarbageFiles = garbageList;
    }
 
Example 15
Source File: BlurIndexSimpleWriter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public long getOnDiskSize() throws IOException {
  Path hdfsDirPath = _shardContext.getHdfsDirPath();
  Configuration configuration = _tableContext.getConfiguration();
  FileSystem fileSystem = hdfsDirPath.getFileSystem(configuration);
  ContentSummary contentSummary = fileSystem.getContentSummary(hdfsDirPath);
  return contentSummary.getLength();
}
 
Example 16
Source File: Query.java    From tajo with Apache License 2.0 4 votes vote down vote up
public static long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
  FileSystem fs = tablePath.getFileSystem(systemConf);
  ContentSummary directorySummary = fs.getContentSummary(tablePath);
  return directorySummary.getLength();
}
 
Example 17
Source File: TestQuota.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
  * Violate a space quota using files of size < 1 block. Test that block
  * allocation conservatively assumes that for quota checking the entire
  * space of the block is used.
  */
 @Test
 public void testBlockAllocationAdjustsUsageConservatively() 
     throws Exception {
   Configuration conf = new HdfsConfiguration();
   final int BLOCK_SIZE = 6 * 1024;
   conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
   conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
   MiniDFSCluster cluster = 
     new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
   cluster.waitActive();
   FileSystem fs = cluster.getFileSystem();
   DFSAdmin admin = new DFSAdmin(conf);

   final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
   final String webhdfsuri = WebHdfsFileSystem.SCHEME  + "://" + nnAddr;
   System.out.println("webhdfsuri=" + webhdfsuri);
   final FileSystem webhdfs = new Path(webhdfsuri).getFileSystem(conf);

   try {
     Path dir = new Path("/test");
     Path file1 = new Path("/test/test1");
     Path file2 = new Path("/test/test2");
     boolean exceededQuota = false;
     final int QUOTA_SIZE = 3 * BLOCK_SIZE; // total space usage including
                                            // repl.
     final int FILE_SIZE = BLOCK_SIZE / 2;
     ContentSummary c;
     
     // Create the directory and set the quota
     assertTrue(fs.mkdirs(dir));
     runCommand(admin, false, "-setSpaceQuota", Integer.toString(QUOTA_SIZE),
         dir.toString());

     // Creating a file should use half the quota
     DFSTestUtil.createFile(fs, file1, FILE_SIZE, (short) 3, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short) 3);
     c = fs.getContentSummary(dir);
     checkContentSummary(c, webhdfs.getContentSummary(dir));
     assertEquals("Quota is half consumed", QUOTA_SIZE / 2,
                  c.getSpaceConsumed());

     // We can not create the 2nd file because even though the total spaced
     // used by two files (2 * 3 * 512/2) would fit within the quota (3 * 512)
     // when a block for a file is created the space used is adjusted
     // conservatively (3 * block size, ie assumes a full block is written)
     // which will violate the quota (3 * block size) since we've already 
     // used half the quota for the first file.
     try {
       DFSTestUtil.createFile(fs, file2, FILE_SIZE, (short) 3, 1L);
     } catch (QuotaExceededException e) {
       exceededQuota = true;
     }
     assertTrue("Quota not exceeded", exceededQuota);
   } finally {
     cluster.shutdown();
   }
}
 
Example 18
Source File: TestQuota.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
  * Violate a space quota using files of size < 1 block. Test that block
  * allocation conservatively assumes that for quota checking the entire
  * space of the block is used.
  */
 @Test
 public void testBlockAllocationAdjustsUsageConservatively() 
     throws Exception {
   Configuration conf = new HdfsConfiguration();
   final int BLOCK_SIZE = 6 * 1024;
   conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
   conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
   MiniDFSCluster cluster = 
     new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
   cluster.waitActive();
   FileSystem fs = cluster.getFileSystem();
   DFSAdmin admin = new DFSAdmin(conf);

   final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
   final String webhdfsuri = WebHdfsFileSystem.SCHEME  + "://" + nnAddr;
   System.out.println("webhdfsuri=" + webhdfsuri);
   final FileSystem webhdfs = new Path(webhdfsuri).getFileSystem(conf);

   try {
     Path dir = new Path("/test");
     Path file1 = new Path("/test/test1");
     Path file2 = new Path("/test/test2");
     boolean exceededQuota = false;
     final int QUOTA_SIZE = 3 * BLOCK_SIZE; // total space usage including
                                            // repl.
     final int FILE_SIZE = BLOCK_SIZE / 2;
     ContentSummary c;
     
     // Create the directory and set the quota
     assertTrue(fs.mkdirs(dir));
     runCommand(admin, false, "-setSpaceQuota", Integer.toString(QUOTA_SIZE),
         dir.toString());

     // Creating a file should use half the quota
     DFSTestUtil.createFile(fs, file1, FILE_SIZE, (short) 3, 1L);
     DFSTestUtil.waitReplication(fs, file1, (short) 3);
     c = fs.getContentSummary(dir);
     checkContentSummary(c, webhdfs.getContentSummary(dir));
     assertEquals("Quota is half consumed", QUOTA_SIZE / 2,
                  c.getSpaceConsumed());

     // We can not create the 2nd file because even though the total spaced
     // used by two files (2 * 3 * 512/2) would fit within the quota (3 * 512)
     // when a block for a file is created the space used is adjusted
     // conservatively (3 * block size, ie assumes a full block is written)
     // which will violate the quota (3 * block size) since we've already 
     // used half the quota for the first file.
     try {
       DFSTestUtil.createFile(fs, file2, FILE_SIZE, (short) 3, 1L);
     } catch (QuotaExceededException e) {
       exceededQuota = true;
     }
     assertTrue("Quota not exceeded", exceededQuota);
   } finally {
     cluster.shutdown();
   }
}
 
Example 19
Source File: HdfsFileSummaryService.java    From Hue-Ctrip-DI with MIT License 4 votes vote down vote up
@Scheduled(fixedDelay = 4 * 3600 * 1000)
public void start() throws IOException {
	logger.info("Start Initialize Hdfs File Summary:"
			+ System.currentTimeMillis());
	synchronized (summaryList) {
		URL configUrl = HdfsFileSummaryService.class.getClassLoader()
				.getResource(hdfsConfig);
		Configuration conf = new Configuration();
		conf.addResource(configUrl);

		System.setProperty("HADOOP_USER_NAME", hadoopUserName);
		FileSystem fs = FileSystem.get(conf);

		System.setProperty("user.name","hdfs");
		List<HdfsDirSummary> summaryListTemp = new ArrayList<HdfsDirSummary>();
		for (FileStatus fileStatus : fs.listStatus(new Path("/user"))) {
			if (fileStatus.isDirectory()) {
				Path filePath = fileStatus.getPath();
				ContentSummary summary = fs.getContentSummary(filePath);
				HdfsDirSummary hdfsDirSummay = new HdfsDirSummary();
				hdfsDirSummay.setUser(filePath.getName());
				hdfsDirSummay
						.setDirectoryCount(summary.getDirectoryCount());
				hdfsDirSummay.setFileCount(summary.getFileCount());
				hdfsDirSummay.setLength(summary.getLength());
				hdfsDirSummay.setQuota(summary.getQuota());
				hdfsDirSummay.setSpaceConsumed(summary.getSpaceConsumed());
				hdfsDirSummay.setSpaceQuota(summary.getSpaceQuota());

				summaryListTemp.add(hdfsDirSummay);

			}
		}

		if (summaryListTemp.size() > 0) {
			summaryList = summaryListTemp;
			summaryListTemp = null;
		}
	}

	logger.info("End Initialize Hdfs File Summary:"
			+ System.currentTimeMillis());
}
 
Example 20
Source File: DefaultStorageProvider.java    From kylin-on-parquet-v2 with Apache License 2.0 2 votes vote down vote up
/**
 * Warning: different cloud provider may not return full ContentSummary,
 * only return file length and count now.
 * @param fileSystem
 * @param path
 * @return
 * @throws IOException
 */
@Override
public ContentSummary getContentSummary(FileSystem fileSystem, Path path) throws IOException {
    return fileSystem.getContentSummary(path);
}