Java Code Examples for org.apache.hadoop.fs.FileSystem.listStatus()

The following are Jave code examples for showing how to use listStatus() of the org.apache.hadoop.fs.FileSystem class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: hadoop   File: TestFileOutputCommitter.java   Source Code and License Vote up 6 votes
private void validateMapFileOutputContent(
    FileSystem fs, Path dir) throws IOException {
  // map output is a directory with index and data files
  Path expectedMapDir = new Path(dir, partFile);
  assert(fs.getFileStatus(expectedMapDir).isDirectory());    
  FileStatus[] files = fs.listStatus(expectedMapDir);
  int fileCount = 0;
  boolean dataFileFound = false; 
  boolean indexFileFound = false; 
  for (FileStatus f : files) {
    if (f.isFile()) {
      ++fileCount;
      if (f.getPath().getName().equals(MapFile.INDEX_FILE_NAME)) {
        indexFileFound = true;
      }
      else if (f.getPath().getName().equals(MapFile.DATA_FILE_NAME)) {
        dataFileFound = true;
      }
    }
  }
  assert(fileCount > 0);
  assert(dataFileFound && indexFileFound);
}
 
Example 2
Project: rainbow   File: ParquetMetadataStat.java   Source Code and License Vote up 6 votes
/**
 *
 * @param nameNode the hostname of hdfs namenode
 * @param hdfsPort the port of hdfs namenode, usually 9000 or 8020
 * @param dirPath the path of the directory which contains the parquet files, begin with /, for gen /msra/column/order/parquet/
 * @throws IOException
 * @throws MetadataException
 */
public ParquetMetadataStat(String nameNode, int hdfsPort, String dirPath) throws IOException, MetadataException
{
    Configuration conf = new Configuration();
    FileSystem fileSystem = FileSystem.get(URI.create("hdfs://" + nameNode + ":" + hdfsPort), conf);
    Path hdfsDirPath = new Path(dirPath);
    if (! fileSystem.isFile(hdfsDirPath))
    {
        FileStatus[] fileStatuses = fileSystem.listStatus(hdfsDirPath);
        for (FileStatus status : fileStatuses)
        {
            // compatibility for HDFS 1.x
            if (! status.isDir())
            {
                //System.out.println(status.getPath().toString());
                this.fileMetaDataList.add(new ParquetFileMetadata(conf, status.getPath()));
            }
        }
    }
    if (this.fileMetaDataList.size() == 0)
    {
        throw new MetadataException("fileMetaDataList is empty, path is not a dir.");
    }
    this.fields = this.fileMetaDataList.get(0).getFileMetaData().getSchema().getFields();
    this.columnCount = this.fileMetaDataList.get(0).getFileMetaData().getSchema().getFieldCount();
}
 
Example 3
Project: ditb   File: FSTableDescriptorMigrationToSubdir.java   Source Code and License Vote up 6 votes
/**
 * Migrates all snapshots, user tables and system tables that require migration.
 * First migrates snapshots.
 * Then migrates each user table in order,
 * then attempts ROOT (should be gone)
 * Migrates hbase:meta last to indicate migration is complete.
 */
private static void migrateFsTableDescriptors(FileSystem fs, Path rootDir) throws IOException {
  // First migrate snapshots - will migrate any snapshot dir that contains a table info file
  Path snapshotsDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
  if (fs.exists(snapshotsDir)) {
    LOG.info("Migrating snapshots");
    FileStatus[] snapshots = fs.listStatus(snapshotsDir,
        new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
    for (FileStatus snapshot : snapshots) {
      migrateTable(fs, snapshot.getPath());
    }
  }
  
  LOG.info("Migrating user tables");
  List<Path> userTableDirs = FSUtils.getTableDirs(fs, rootDir);
  for (Path userTableDir : userTableDirs) {
    migrateTable(fs, userTableDir);
  }
  
  LOG.info("Migrating system tables");
  // migrate meta last because that's what we check to see if migration is complete
  migrateTableIfExists(fs, rootDir, TableName.META_TABLE_NAME);
}
 
Example 4
Project: alluxio   File: HdfsAndAlluxioUtils_update.java   Source Code and License Vote up 6 votes
/**
 * 此方法用于获取文件信息
 *
 * @param fileSystemInfo
 *            文件系统信息
 * @param path
 *            文件路径
 * @return 文件是否存在
 */
public static List<FileStatus> listStatus(FileSystemInfo fileSystemInfo, String path) {
	List<FileStatus> info = new ArrayList<FileStatus>();
	FileSystem fs = getFileSystem(fileSystemInfo);
	Path uri = new Path(path);
	try {
		FileStatus[] list = fs.listStatus(uri);
		for (FileStatus f : list) {
			info.add(f);
		}
	} catch (IOException e) {
		e.printStackTrace();
	} finally {
		closeFileSystem(fs);
	}
	return info;
}
 
Example 5
Project: hadoop   File: SwiftTestUtils.java   Source Code and License Vote up 6 votes
/**
 * Assert that a FileSystem.listStatus on a dir finds the subdir/child entry
 * @param fs filesystem
 * @param dir directory to scan
 * @param subdir full path to look for
 * @throws IOException IO probles
 */
public static void assertListStatusFinds(FileSystem fs,
                                         Path dir,
                                         Path subdir) throws IOException {
  FileStatus[] stats = fs.listStatus(dir);
  boolean found = false;
  StringBuilder builder = new StringBuilder();
  for (FileStatus stat : stats) {
    builder.append(stat.toString()).append('\n');
    if (stat.getPath().equals(subdir)) {
      found = true;
    }
  }
  assertTrue("Path " + subdir
                    + " not found in directory " + dir + ":" + builder,
                    found);
}
 
Example 6
Project: scheduling-connector-for-hadoop   File: FSDownload.java   Source Code and License Vote up 6 votes
private void changePermissions(FileSystem fs, final Path path)
    throws IOException, InterruptedException {
  File f = new File(path.toUri());
  if (FileUtils.isSymlink(f)) {
    // avoid following symlinks when changing permissions
    return;
  }
  boolean isDir = f.isDirectory();
  FsPermission perm = cachePerms;
  // set public perms as 755 or 555 based on dir or file
  if (resource.getVisibility() == LocalResourceVisibility.PUBLIC) {
    perm = isDir ? PUBLIC_DIR_PERMS : PUBLIC_FILE_PERMS;
  }
  // set private perms as 700 or 500
  else {
    // PRIVATE:
    // APPLICATION:
    perm = isDir ? PRIVATE_DIR_PERMS : PRIVATE_FILE_PERMS;
  }
  LOG.debug("Changing permissions for path " + path + " to perm " + perm);
  final FsPermission fPerm = perm;
  if (null == userUgi) {
    files.setPermission(path, perm);
  } else {
    userUgi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        files.setPermission(path, fPerm);
        return null;
      }
    });
  }
  if (isDir) {
    FileStatus[] statuses = fs.listStatus(path);
    for (FileStatus status : statuses) {
      changePermissions(fs, status.getPath());
    }
  }
}
 
Example 7
Project: embulk-input-parquet_hadoop   File: ParquetHadoopInputPlugin.java   Source Code and License Vote up 5 votes
private List<FileStatus> listRecursive(FileSystem fs, FileStatus status) throws IOException
{
    List<FileStatus> statusList = Lists.newArrayList();
    if (status.isDirectory()) {
        FileStatus[] entries = fs.listStatus(status.getPath(), HiddenFileFilter.INSTANCE);
        for (FileStatus entry : entries) {
            statusList.addAll(listRecursive(fs, entry));
        }
    }
    else {
        statusList.add(status);
    }
    return statusList;
}
 
Example 8
Project: ditb   File: ExportSnapshot.java   Source Code and License Vote up 5 votes
/**
 * Set path permission.
 */
private void setPermission(final FileSystem fs, final Path path, final short filesMode,
    final boolean recursive) throws IOException {
  if (filesMode > 0) {
    FsPermission perm = new FsPermission(filesMode);
    if (recursive && fs.isDirectory(path)) {
      for (FileStatus child : fs.listStatus(path)) {
        setPermission(fs, child.getPath(), filesMode, recursive);
      }
    }
    fs.setPermission(path, perm);
  }
}
 
Example 9
Project: ditb   File: TestHFileOutputFormat.java   Source Code and License Vote up 5 votes
/**
 * Run small MR job.
 */
@Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(KeyValueSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);
}
 
Example 10
Project: ditb   File: FSUtils.java   Source Code and License Vote up 5 votes
/**
 * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
 * This accommodates differences between hadoop versions, where hadoop 1
 * does not throw a FileNotFoundException, and return an empty FileStatus[]
 * while Hadoop 2 will throw FileNotFoundException.
 *
 * @param fs file system
 * @param dir directory
 * @param filter path filter
 * @return null if dir is empty or doesn't exist, otherwise FileStatus array
 */
public static FileStatus [] listStatus(final FileSystem fs,
    final Path dir, final PathFilter filter) throws IOException {
  FileStatus [] status = null;
  try {
    status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
  } catch (FileNotFoundException fnfe) {
    // if directory doesn't exist, return null
    if (LOG.isTraceEnabled()) {
      LOG.trace(dir + " doesn't exist");
    }
  }
  if (status == null || status.length < 1) return null;
  return status;
}
 
Example 11
Project: QDrill   File: FooterGatherer.java   Source Code and License Vote up 5 votes
public static List<Footer> getFooters(final Configuration conf, List<FileStatus> statuses, int parallelism) throws IOException {
  final List<TimedRunnable<Footer>> readers = Lists.newArrayList();
  List<Footer> foundFooters = Lists.newArrayList();
  for(FileStatus status : statuses){


    if(status.isDirectory()){
      // first we check for summary file.
      FileSystem fs = status.getPath().getFileSystem(conf);

      final Path summaryPath = new Path(status.getPath(), ParquetFileWriter.PARQUET_METADATA_FILE);
      if (fs.exists(summaryPath)){
        FileStatus summaryStatus = fs.getFileStatus(summaryPath);
        foundFooters.addAll(ParquetFileReader.readSummaryFile(conf, summaryStatus));
        continue;
      }

      // else we handle as normal file.
      for(FileStatus inStatus : fs.listStatus(status.getPath(), new DrillPathFilter())){
        readers.add(new FooterReader(conf, inStatus));
      }
    }else{
      readers.add(new FooterReader(conf, status));
    }

  }
  if(!readers.isEmpty()){
    foundFooters.addAll(TimedRunnable.run("Fetch Parquet Footers", logger, readers, parallelism));
  }

  return foundFooters;
}
 
Example 12
Project: hadoop   File: TestFSDownload.java   Source Code and License Vote up 5 votes
private void verifyPermsRecursively(FileSystem fs,
    FileContext files, Path p,
    LocalResourceVisibility vis) throws IOException {
  FileStatus status = files.getFileStatus(p);
  if (status.isDirectory()) {
    if (vis == LocalResourceVisibility.PUBLIC) {
      Assert.assertTrue(status.getPermission().toShort() ==
        FSDownload.PUBLIC_DIR_PERMS.toShort());
    }
    else {
      Assert.assertTrue(status.getPermission().toShort() ==
        FSDownload.PRIVATE_DIR_PERMS.toShort());
    }
    if (!status.isSymlink()) {
      FileStatus[] statuses = fs.listStatus(p);
      for (FileStatus stat : statuses) {
        verifyPermsRecursively(fs, files, stat.getPath(), vis);
      }
    }
  }
  else {
    if (vis == LocalResourceVisibility.PUBLIC) {
      Assert.assertTrue(status.getPermission().toShort() ==
        FSDownload.PUBLIC_FILE_PERMS.toShort());
    }
    else {
      Assert.assertTrue(status.getPermission().toShort() ==
        FSDownload.PRIVATE_FILE_PERMS.toShort());
    }
  }      
}
 
Example 13
Project: hadoop   File: TestNameNodeMXBean.java   Source Code and License Vote up 4 votes
@Test(timeout=120000)
@SuppressWarnings("unchecked")
public void testTopUsers() throws Exception {
  final Configuration conf = new Configuration();
  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
    cluster.waitActive();
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    ObjectName mxbeanNameFsns = new ObjectName(
        "Hadoop:service=NameNode,name=FSNamesystemState");
    FileSystem fs = cluster.getFileSystem();
    final Path path = new Path("/");
    final int NUM_OPS = 10;
    for (int i=0; i< NUM_OPS; i++) {
      fs.listStatus(path);
      fs.setTimes(path, 0, 1);
    }
    String topUsers =
        (String) (mbs.getAttribute(mxbeanNameFsns, "TopUserOpCounts"));
    ObjectMapper mapper = new ObjectMapper();
    Map<String, Object> map = mapper.readValue(topUsers, Map.class);
    assertTrue("Could not find map key timestamp", 
        map.containsKey("timestamp"));
    assertTrue("Could not find map key windows", map.containsKey("windows"));
    List<Map<String, List<Map<String, Object>>>> windows =
        (List<Map<String, List<Map<String, Object>>>>) map.get("windows");
    assertEquals("Unexpected num windows", 3, windows.size());
    for (Map<String, List<Map<String, Object>>> window : windows) {
      final List<Map<String, Object>> ops = window.get("ops");
      assertEquals("Unexpected num ops", 3, ops.size());
      for (Map<String, Object> op: ops) {
        final long count = Long.parseLong(op.get("totalCount").toString());
        final String opType = op.get("opType").toString();
        final int expected;
        if (opType.equals(TopConf.ALL_CMDS)) {
          expected = 2*NUM_OPS;
        } else {
          expected = NUM_OPS;
        }
        assertEquals("Unexpected total count", expected, count);
      }
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 14
Project: hadoop   File: TestCompressionEmulationUtils.java   Source Code and License Vote up 4 votes
/**
 * Test {@link RandomTextDataMapper} via {@link CompressionEmulationUtil}.
 */
@Test
public void testRandomCompressedTextDataGenerator() throws Exception {
  int wordSize = 10;
  int listSize = 20;
  long dataSize = 10*1024*1024;
  
  Configuration conf = new Configuration();
  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
  
  // configure the RandomTextDataGenerator to generate desired sized data
  conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, 
              listSize);
  conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, 
              wordSize);
  conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);
  conf.set("mapreduce.job.hdfs-servers", "");
  
  FileSystem lfs = FileSystem.getLocal(conf);
  
  // define the test's root temp directory
  Path rootTempDir =
      new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
          lfs.getUri(), lfs.getWorkingDirectory());

  Path tempDir = new Path(rootTempDir, "TestRandomCompressedTextDataGenr");
  lfs.delete(tempDir, true);
  
  runDataGenJob(conf, tempDir);
  
  // validate the output data
  FileStatus[] files = 
    lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
  long size = 0;
  long maxLineSize = 0;
  
  for (FileStatus status : files) {
    InputStream in = 
      CompressionEmulationUtil
        .getPossiblyDecompressedInputStream(status.getPath(), conf, 0);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String line = reader.readLine();
    if (line != null) {
      long lineSize = line.getBytes().length;
      if (lineSize > maxLineSize) {
        maxLineSize = lineSize;
      }
      while (line != null) {
        for (String word : line.split("\\s")) {
          size += word.getBytes().length;
        }
        line = reader.readLine();
      }
    }
    reader.close();
  }

  assertTrue(size >= dataSize);
  assertTrue(size <= dataSize + maxLineSize);
}
 
Example 15
Project: hadoop   File: TestDFSRemove.java   Source Code and License Vote up 4 votes
void list(FileSystem fs, String name) throws IOException {
  FileSystem.LOG.info("\n\n" + name);
  for(FileStatus s : fs.listStatus(dir)) {
    FileSystem.LOG.info("" + s.getPath());
  }
}
 
Example 16
Project: aliyun-maxcompute-data-collectors   File: TestIncrementalImport.java   Source Code and License Vote up 4 votes
/**
 * Assert that a directory contains a file with exactly one line
 * in it, containing the prescribed number 'val'.
 */
public void assertFirstSpecificNumber(String tableName, int val) {
  try {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR);
    Path tableDir = new Path(warehouse, tableName);
    FileStatus [] stats = fs.listStatus(tableDir);
    String [] filePaths = new String[stats.length];
    for (int i = 0; i < stats.length; i++) {
      filePaths[i] = stats[i].getPath().toString();
    }

    // Read the first file that is not a hidden file.
    boolean foundVal = false;
    for (String filePath : filePaths) {
      String fileName = new Path(filePath).getName();
      if (fileName.startsWith("_") || fileName.startsWith(".")) {
        continue;
      }

      if (foundVal) {
        // Make sure we don't have two or more "real" files in the dir.
        fail("Got an extra data-containing file in this directory.");
      }

      BufferedReader r = new BufferedReader(
          new InputStreamReader(fs.open(new Path(filePath))));
      try {
        String s = r.readLine();
        if (null == s) {
          fail("Unexpected empty file " + filePath + ".");
        }
        assertEquals(val, (int) Integer.valueOf(s.trim()));

        String nextLine = r.readLine();
        if (nextLine != null) {
          fail("Expected only one result, but got another line: " + nextLine);
        }

        // Successfully got the value we were looking for.
        foundVal = true;
      } finally {
        r.close();
      }
    }
  } catch (IOException e) {
    fail("Got unexpected exception: " + StringUtils.stringifyException(e));
  }
}
 
Example 17
Project: ViraPipe   File: InterleaveMulti.java   Source Code and License Vote up 4 votes
public static void main(String[] args) throws IOException {
  SparkConf conf = new SparkConf().setAppName("DecompressInterleave");
  //conf.set("spark.scheduler.mode", "FAIR");
  //conf.set("spark.scheduler.allocation.file", "/opt/cloudera/parcels/CDH-5.10.0-1.cdh5.10.0.p0.41/etc/hadoop/conf.dist/pools.xml");
  JavaSparkContext sc = new JavaSparkContext(conf);
  //sc.setLocalProperty("spark.scheduler.pool", "production");

  Options options = new Options();

  Option splitDirOpt = new Option( "out", true, "Path to output directory in hdfs." );
  Option numsplitsOpt = new Option( "splitsize", true, "Number of reads in split, depends on the size of read file, number of cores and available memory." );
  options.addOption( new Option( "decompress", "" ) );
  options.addOption( new Option( "temp", true, "" ) );
  options.addOption( new Option( "in", true, "" ) );
  options.addOption( new Option( "remtemp", "" ) );
  options.addOption( new Option( "merge", "" ) );

  options.addOption( numsplitsOpt );
  options.addOption( splitDirOpt );
  options.addOption(new Option( "help", "print this message" ));

  HelpFormatter formatter = new HelpFormatter();
  formatter.printHelp( "spark-submit <spark specific args>", options, true );

  CommandLineParser parser = new BasicParser();
  CommandLine cmd = null;
  try {
    // parse the command line arguments
    cmd = parser.parse( options, args );

  }
  catch( ParseException exp ) {
    // oops, something went wrong
    System.err.println( "Parsing failed.  Reason: " + exp.getMessage() );
  }
  String input = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null;
  int splitsize = (cmd.hasOption("splitsize")==true)? Integer.parseInt(cmd.getOptionValue("splitsize")):0;
  boolean merge = cmd.hasOption("merge");
  String outpath = cmd.getOptionValue("out");

  FileSystem fs = FileSystem.get(new Configuration());
      int splitlen = splitsize*4; //FASTQ read is expressed by 4 lines
      FileStatus[] dirs = fs.listStatus(new Path(input));

      Arrays.asList(dirs).forEach(dir ->{
        if(dir.isDirectory()){
          try {
            FileStatus fst = fs.getFileStatus(new Path(input+"/"+dir.getPath().getName()+"/1.fq"));
            FileStatus fst2 = fs.getFileStatus(new Path(input+"/"+dir.getPath().getName()+"/2.fq"));

            if(merge)
              interleaveSplitFastq(fst, fst2, outpath, splitlen, sc);
            else //SAVES SEPARATE HDFS DIRECTORIES
              interleaveSplitFastq(fst, fst2, outpath+"/"+dir.getPath().getName(), splitlen, sc);
          } catch (IOException e) {
            e.printStackTrace();
          }
        }
        });

  sc.stop();

}
 
Example 18
Project: hadoop   File: TestCompressionEmulationUtils.java   Source Code and License Vote up 4 votes
/**
 * Test if {@link RandomTextDataGenerator} can generate random text data 
 * with the desired compression ratio. This involves
 *   - using {@link CompressionEmulationUtil} to configure the MR job for 
 *     generating the random text data with the desired compression ratio
 *   - running the MR job
 *   - test {@link RandomTextDataGenerator}'s output and match the output size
 *     (compressed) with the expected compression ratio.
 */
private void testCompressionRatioConfigure(float ratio)
throws Exception {
  long dataSize = 10*1024*1024;
  
  Configuration conf = new Configuration();
  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
  
  conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);
  conf.set("mapreduce.job.hdfs-servers", "");
  
  float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO;
  if (ratio > 0) {
    // set the compression ratio in the conf
    CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio);
    expectedRatio = 
      CompressionEmulationUtil.standardizeCompressionRatio(ratio);
  }
  
  // invoke the utility to map from ratio to word-size
  CompressionEmulationUtil.setupDataGeneratorConfig(conf);
  
  FileSystem lfs = FileSystem.getLocal(conf);
  
  // define the test's root temp directory
  Path rootTempDir =
      new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(
          lfs.getUri(), lfs.getWorkingDirectory());

  Path tempDir = 
    new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr");
  lfs.delete(tempDir, true);
  
  runDataGenJob(conf, tempDir);
  
  // validate the output data
  FileStatus[] files = 
    lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
  long size = 0;
  
  for (FileStatus status : files) {
    size += status.getLen();
  }

  float compressionRatio = ((float)size)/dataSize;
  float stdRatio = 
    CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio);
  
  assertEquals(expectedRatio, stdRatio, 0.0D);
}
 
Example 19
Project: circus-train   File: SimpleCopyListing.java   Source Code and License Vote up 4 votes
private static FileStatus[] getChildren(FileSystem fileSystem, FileStatus parent) throws IOException {
  return fileSystem.listStatus(parent.getPath());
}
 
Example 20
Project: hadoop   File: SimpleCopyListing.java   Source Code and License Vote up 4 votes
/**
 * Collect the list of 
 *   {@literal <sourceRelativePath, sourceFileStatus>}
 * to be copied and write to the sequence file. In essence, any file or
 * directory that need to be copied or sync-ed is written as an entry to the
 * sequence file, with the possible exception of the source root:
 *     when either -update (sync) or -overwrite switch is specified, and if
 *     the the source root is a directory, then the source root entry is not 
 *     written to the sequence file, because only the contents of the source
 *     directory need to be copied in this case.
 * See {@link org.apache.hadoop.tools.util.DistCpUtils#getRelativePath} for
 *     how relative path is computed.
 * See computeSourceRootPath method for how the root path of the source is
 *     computed.
 * @param fileListWriter
 * @param options
 * @throws IOException
 */
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter,
    DistCpOptions options) throws IOException {
  try {
    for (Path path: options.getSourcePaths()) {
      FileSystem sourceFS = path.getFileSystem(getConf());
      final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
      final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
      final boolean preserveRawXAttrs = options.shouldPreserveRawXattrs();
      path = makeQualified(path);

      FileStatus rootStatus = sourceFS.getFileStatus(path);
      Path sourcePathRoot = computeSourceRootPath(rootStatus, options);

      FileStatus[] sourceFiles = sourceFS.listStatus(path);
      boolean explore = (sourceFiles != null && sourceFiles.length > 0);
      if (!explore || rootStatus.isDirectory()) {
        CopyListingFileStatus rootCopyListingStatus =
          DistCpUtils.toCopyListingFileStatus(sourceFS, rootStatus,
              preserveAcls, preserveXAttrs, preserveRawXAttrs);
        writeToFileListingRoot(fileListWriter, rootCopyListingStatus,
            sourcePathRoot, options);
      }
      if (explore) {
        for (FileStatus sourceStatus: sourceFiles) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
          }
          CopyListingFileStatus sourceCopyListingStatus =
            DistCpUtils.toCopyListingFileStatus(sourceFS, sourceStatus,
                preserveAcls && sourceStatus.isDirectory(),
                preserveXAttrs && sourceStatus.isDirectory(),
                preserveRawXAttrs && sourceStatus.isDirectory());
          writeToFileListing(fileListWriter, sourceCopyListingStatus,
              sourcePathRoot, options);

          if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
            if (LOG.isDebugEnabled()) {
              LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
            }
            traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
                options);
          }
        }
      }
    }
    fileListWriter.close();
    fileListWriter = null;
  } finally {
    IOUtils.cleanup(LOG, fileListWriter);
  }
}