Java Code Examples for org.apache.hadoop.fs.FileSystem.getFileStatus()

The following are Jave code examples for showing how to use getFileStatus() of the org.apache.hadoop.fs.FileSystem class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: hadoop   File: TestS3ABlocksize.java   Source Code and License Vote up 8 votes
@Test
@SuppressWarnings("deprecation")
public void testBlockSize() throws Exception {
  FileSystem fs = getFileSystem();
  long defaultBlockSize = fs.getDefaultBlockSize();
  assertEquals("incorrect blocksize",
      S3AFileSystem.DEFAULT_BLOCKSIZE, defaultBlockSize);
  long newBlockSize = defaultBlockSize * 2;
  fs.getConf().setLong(Constants.FS_S3A_BLOCK_SIZE, newBlockSize);

  Path dir = path("testBlockSize");
  Path file = new Path(dir, "file");
  createFile(fs, file, true, dataset(1024, 'a', 'z' - 'a'));
  FileStatus fileStatus = fs.getFileStatus(file);
  assertEquals("Double default block size in stat(): " + fileStatus,
      newBlockSize,
      fileStatus.getBlockSize());

  // check the listing  & assert that the block size is picked up by
  // this route too.
  boolean found = false;
  FileStatus[] listing = fs.listStatus(dir);
  for (FileStatus stat : listing) {
    LOG.info("entry: {}", stat);
    if (file.equals(stat.getPath())) {
      found = true;
      assertEquals("Double default block size in ls(): " + stat,
          newBlockSize,
          stat.getBlockSize());
    }
  }
  assertTrue("Did not find " + fileStatsToString(listing, ", "), found);
}
 
Example 2
Project: angel   File: ClientDistributedCacheManager.java   Source Code and License Vote up 6 votes
public void addResource(FileSystem fs, Configuration conf, Path destPath,
    Map<String, LocalResource> localResources, LocalResourceType resourceType, String link,
    Map<URI, FileStatus> statCache, boolean appMasterOnly) throws IOException {

  FileStatus destStatus = fs.getFileStatus(destPath);
  LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
  amJarRsrc.setType(resourceType);

  LocalResourceVisibility visibility = getVisibility(conf, destPath.toUri(), statCache);
  amJarRsrc.setVisibility(visibility);
  amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(destPath));
  amJarRsrc.setTimestamp(destStatus.getModificationTime());
  amJarRsrc.setSize(destStatus.getLen());

  if (link == null || link.isEmpty())
    throw new IOException("You must specify a valid link name");

  localResources.put(link, amJarRsrc);
}
 
Example 3
Project: hadoop   File: TestNativeAzureFileSystemBlockLocations.java   Source Code and License Vote up 6 votes
private static BlockLocation[] getBlockLocationsOutput(int fileSize,
    int blockSize, long start, long len, String blockLocationHost)
    throws Exception {
  Configuration conf = new Configuration();
  conf.set(NativeAzureFileSystem.AZURE_BLOCK_SIZE_PROPERTY_NAME, ""
      + blockSize);
  if (blockLocationHost != null) {
    conf.set(NativeAzureFileSystem.AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME,
        blockLocationHost);
  }
  AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount
      .createMock(conf);
  FileSystem fs = testAccount.getFileSystem();
  Path testFile = createTestFile(fs, fileSize);
  FileStatus stat = fs.getFileStatus(testFile);
  BlockLocation[] locations = fs.getFileBlockLocations(stat, start, len);
  testAccount.cleanup();
  return locations;
}
 
Example 4
Project: hadoop   File: BaseTestHttpFSWith.java   Source Code and License Vote up 6 votes
private void testCreate(Path path, boolean override) throws Exception {
  FileSystem fs = getHttpFSFileSystem();
  FsPermission permission = new FsPermission(FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE);
  OutputStream os = fs.create(new Path(path.toUri().getPath()), permission, override, 1024,
                              (short) 2, 100 * 1024 * 1024, null);
  os.write(1);
  os.close();
  fs.close();

  fs = FileSystem.get(getProxiedFSConf());
  FileStatus status = fs.getFileStatus(path);
  if (!isLocalFS()) {
    Assert.assertEquals(status.getReplication(), 2);
    Assert.assertEquals(status.getBlockSize(), 100 * 1024 * 1024);
  }
  Assert.assertEquals(status.getPermission(), permission);
  InputStream is = fs.open(path);
  Assert.assertEquals(is.read(), 1);
  is.close();
  fs.close();
}
 
Example 5
Project: hadoop   File: GenerateDistCacheData.java   Source Code and License Vote up 6 votes
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
Example 6
Project: hadoop   File: FSDownload.java   Source Code and License Vote up 6 votes
private Path copy(Path sCopy, Path dstdir) throws IOException {
  FileSystem sourceFs = sCopy.getFileSystem(conf);
  Path dCopy = new Path(dstdir, "tmp_"+sCopy.getName());
  FileStatus sStat = sourceFs.getFileStatus(sCopy);
  if (sStat.getModificationTime() != resource.getTimestamp()) {
    throw new IOException("Resource " + sCopy +
        " changed on src filesystem (expected " + resource.getTimestamp() +
        ", was " + sStat.getModificationTime());
  }
  if (resource.getVisibility() == LocalResourceVisibility.PUBLIC) {
    if (!isPublic(sourceFs, sCopy, sStat, statCache)) {
      throw new IOException("Resource " + sCopy +
          " is not publicly accessable and as such cannot be part of the" +
          " public cache.");
    }
  }

  FileUtil.copy(sourceFs, sStat, FileSystem.getLocal(conf), dCopy, false,
      true, conf);
  return dCopy;
}
 
Example 7
Project: hadoop   File: TestCopyMapper.java   Source Code and License Vote up 6 votes
private void verifyCopy(FileSystem fs, boolean preserveChecksum)
    throws Exception {
  for (Path path : pathList) {
    final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH,
        TARGET_PATH));
    Assert.assertTrue(fs.exists(targetPath));
    Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
    FileStatus sourceStatus = fs.getFileStatus(path);
    FileStatus targetStatus = fs.getFileStatus(targetPath);
    Assert.assertEquals(sourceStatus.getReplication(),
        targetStatus.getReplication());
    if (preserveChecksum) {
      Assert.assertEquals(sourceStatus.getBlockSize(),
          targetStatus.getBlockSize());
    }
    Assert.assertTrue(!fs.isFile(targetPath)
        || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path)));
  }
}
 
Example 8
Project: hadoop   File: JsonSerDeser.java   Source Code and License Vote up 5 votes
/**
 * Load from a Hadoop filesystem
 * @param fs filesystem
 * @param path path
 * @return a loaded CD
 * @throws IOException IO problems
 * @throws EOFException if not enough bytes were read in
 * @throws JsonParseException parse problems
 * @throws JsonMappingException O/J mapping problems
 */
public T load(FileSystem fs, Path path)
    throws IOException, JsonParseException, JsonMappingException {
  FileStatus status = fs.getFileStatus(path);
  long len = status.getLen();
  byte[] b = new byte[(int) len];
  FSDataInputStream dataInputStream = fs.open(path);
  int count = dataInputStream.read(b);
  if (count != len) {
    throw new EOFException(path.toString() + ": read finished prematurely");
  }
  return fromBytes(path.toString(), b);
}
 
Example 9
Project: hadoop   File: TestDistCpUtils.java   Source Code and License Vote up 5 votes
@Test
public void testPreserveGroupOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.GROUP);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertTrue(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertFalse(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 10
Project: dremio-oss   File: TestRemoteNodeFileSystem.java   Source Code and License Vote up 5 votes
@Test(expected = FileNotFoundException.class)
public void testGetFileStatusWithInvalidPath() throws Exception {
  {
    setupRPC(
        DFS.RpcType.GET_FILE_STATUS_REQUEST, DFS.GetFileStatusRequest.newBuilder().setPath("/foo/bar").build(),
        DFS.GetFileStatusResponse.class, newRPCException(LOCAL_ENDPOINT, new FileNotFoundException("File not found")));
  }

  FileSystem fs = newRemoteNodeFileSystem();

  Path path = new Path("/foo/bar");
  fs.getFileStatus(path);
}
 
Example 11
Project: hadoop   File: TestCopyMapper.java   Source Code and License Vote up 5 votes
private static void touchFile(String path, boolean createMultipleBlocks,
    ChecksumOpt checksumOpt) throws Exception {
  FileSystem fs;
  DataOutputStream outputStream = null;
  try {
    fs = cluster.getFileSystem();
    final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
        fs.getWorkingDirectory());
    final long blockSize = createMultipleBlocks ? NON_DEFAULT_BLOCK_SIZE : fs
        .getDefaultBlockSize(qualifiedPath) * 2;
    FsPermission permission = FsPermission.getFileDefault().applyUMask(
        FsPermission.getUMask(fs.getConf()));
    outputStream = fs.create(qualifiedPath, permission,
        EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 0,
        (short) (fs.getDefaultReplication(qualifiedPath) * 2), blockSize,
        null, checksumOpt);
    byte[] bytes = new byte[DEFAULT_FILE_SIZE];
    outputStream.write(bytes);
    long fileSize = DEFAULT_FILE_SIZE;
    if (createMultipleBlocks) {
      while (fileSize < 2*blockSize) {
        outputStream.write(bytes);
        outputStream.flush();
        fileSize += DEFAULT_FILE_SIZE;
      }
    }
    pathList.add(qualifiedPath);
    ++nFiles;

    FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
    System.out.println(fileStatus.getBlockSize());
    System.out.println(fileStatus.getReplication());
  }
  finally {
    IOUtils.cleanup(null, outputStream);
  }
}
 
Example 12
Project: hadoop   File: LogAggregationService.java   Source Code and License Vote up 5 votes
private boolean checkExists(FileSystem fs, Path path, FsPermission fsPerm)
    throws IOException {
  boolean exists = true;
  try {
    FileStatus appDirStatus = fs.getFileStatus(path);
    if (!APP_DIR_PERMISSIONS.equals(appDirStatus.getPermission())) {
      fs.setPermission(path, APP_DIR_PERMISSIONS);
    }
  } catch (FileNotFoundException fnfe) {
    exists = false;
  }
  return exists;
}
 
Example 13
Project: hadoop   File: BaseTestHttpFSWith.java   Source Code and License Vote up 5 votes
private void testListStatus() throws Exception {
  FileSystem fs = FileSystem.get(getProxiedFSConf());
  Path path = new Path(getProxiedFSTestDir(), "foo.txt");
  OutputStream os = fs.create(path);
  os.write(1);
  os.close();
  FileStatus status1 = fs.getFileStatus(path);
  fs.close();

  fs = getHttpFSFileSystem();
  FileStatus status2 = fs.getFileStatus(new Path(path.toUri().getPath()));
  fs.close();

  Assert.assertEquals(status2.getPermission(), status1.getPermission());
  Assert.assertEquals(status2.getPath().toUri().getPath(), status1.getPath().toUri().getPath());
  Assert.assertEquals(status2.getReplication(), status1.getReplication());
  Assert.assertEquals(status2.getBlockSize(), status1.getBlockSize());
  Assert.assertEquals(status2.getAccessTime(), status1.getAccessTime());
  Assert.assertEquals(status2.getModificationTime(), status1.getModificationTime());
  Assert.assertEquals(status2.getOwner(), status1.getOwner());
  Assert.assertEquals(status2.getGroup(), status1.getGroup());
  Assert.assertEquals(status2.getLen(), status1.getLen());

  FileStatus[] stati = fs.listStatus(path.getParent());
  Assert.assertEquals(stati.length, 1);
  Assert.assertEquals(stati[0].getPath().getName(), path.getName());
}
 
Example 14
Project: hadoop   File: TestDistCpUtils.java   Source Code and License Vote up 5 votes
@Test
public void testPreserveReplicationOnFile() throws IOException {
  FileSystem fs = FileSystem.get(config);
  EnumSet<FileAttribute> attributes = EnumSet.of(FileAttribute.REPLICATION);

  Path dst = new Path("/tmp/dest2");
  Path src = new Path("/tmp/src2");

  createFile(fs, src);
  createFile(fs, dst);

  fs.setPermission(src, fullPerm);
  fs.setOwner(src, "somebody", "somebody-group");
  fs.setTimes(src, 0, 0);
  fs.setReplication(src, (short) 1);

  fs.setPermission(dst, noPerm);
  fs.setOwner(dst, "nobody", "nobody-group");
  fs.setTimes(dst, 100, 100);
  fs.setReplication(dst, (short) 2);

  CopyListingFileStatus srcStatus = new CopyListingFileStatus(fs.getFileStatus(src));

  DistCpUtils.preserve(fs, dst, srcStatus, attributes, false);

  CopyListingFileStatus dstStatus = new CopyListingFileStatus(fs.getFileStatus(dst));

  // FileStatus.equals only compares path field, must explicitly compare all fields
  Assert.assertFalse(srcStatus.getPermission().equals(dstStatus.getPermission()));
  Assert.assertFalse(srcStatus.getOwner().equals(dstStatus.getOwner()));
  Assert.assertFalse(srcStatus.getGroup().equals(dstStatus.getGroup()));
  Assert.assertFalse(srcStatus.getAccessTime() == dstStatus.getAccessTime());
  Assert.assertFalse(srcStatus.getModificationTime() == dstStatus.getModificationTime());
  Assert.assertTrue(srcStatus.getReplication() == dstStatus.getReplication());
}
 
Example 15
Project: hadoop   File: TestLeveldbTimelineStore.java   Source Code and License Vote up 5 votes
@Test
public void testRootDirPermission() throws IOException {
  FileSystem fs = FileSystem.getLocal(new YarnConfiguration());
  FileStatus file = fs.getFileStatus(
      new Path(fsPath.getAbsolutePath(), LeveldbTimelineStore.FILENAME));
  assertNotNull(file);
  assertEquals(LeveldbTimelineStore.LEVELDB_DIR_UMASK, file.getPermission());
}
 
Example 16
Project: hadoop   File: JobSubmissionFiles.java   Source Code and License Vote up 5 votes
/**
 * Initializes the staging directory and returns the path. It also
 * keeps track of all necessary ownership and permissions
 * @param cluster
 * @param conf
 */
public static Path getStagingDir(Cluster cluster, Configuration conf) 
throws IOException,InterruptedException {
  Path stagingArea = cluster.getStagingAreaDir();
  FileSystem fs = stagingArea.getFileSystem(conf);
  String realUser;
  String currentUser;
  UserGroupInformation ugi = UserGroupInformation.getLoginUser();
  realUser = ugi.getShortUserName();
  currentUser = UserGroupInformation.getCurrentUser().getShortUserName();
  if (fs.exists(stagingArea)) {
    FileStatus fsStatus = fs.getFileStatus(stagingArea);
    String owner = fsStatus.getOwner();
    if (!(owner.equals(currentUser) || owner.equals(realUser))) {
       throw new IOException("The ownership on the staging directory " +
                    stagingArea + " is not as expected. " +
                    "It is owned by " + owner + ". The directory must " +
                    "be owned by the submitter " + currentUser + " or " +
                    "by " + realUser);
    }
    if (!fsStatus.getPermission().equals(JOB_DIR_PERMISSION)) {
      LOG.info("Permissions on staging directory " + stagingArea + " are " +
        "incorrect: " + fsStatus.getPermission() + ". Fixing permissions " +
        "to correct value " + JOB_DIR_PERMISSION);
      fs.setPermission(stagingArea, JOB_DIR_PERMISSION);
    }
  } else {
    fs.mkdirs(stagingArea, 
        new FsPermission(JOB_DIR_PERMISSION));
  }
  return stagingArea;
}
 
Example 17
Project: ViraPipe   File: Interleave.java   Source Code and License Vote up 4 votes
public static void main(String[] args) throws IOException {
  SparkConf conf = new SparkConf().setAppName("Interleave");
  //conf.set("spark.scheduler.mode", "FAIR");
  //conf.set("spark.scheduler.allocation.file", "/opt/cloudera/parcels/CDH-5.10.0-1.cdh5.10.0.p0.41/etc/hadoop/conf.dist/pools.xml");
  JavaSparkContext sc = new JavaSparkContext(conf);
  //sc.setLocalProperty("spark.scheduler.pool", "production");

  Options options = new Options();
  Option pairedOpt = new Option( "paired", "Split paired end reads to separate folders, does not interleave." );
  Option intOpt = new Option( "singlesplit", "" );
  options.addOption( new Option( "decompress", "" ) );

  options.addOption( pairedOpt );
  options.addOption( intOpt );
  options.addOption(new Option( "help", "print this message" ));

  HelpFormatter formatter = new HelpFormatter();
  formatter.printHelp( "spark-submit <spark specific args>", options, true );

  CommandLineParser parser = new BasicParser();
  CommandLine cmd = null;
  try {
    // parse the command line arguments
    cmd = parser.parse( options, args );

  }
  catch( ParseException exp ) {
    // oops, something went wrong
    System.err.println( "Parsing failed.  Reason: " + exp.getMessage() );
  }

  String fastq = args[0];
  String fastq2 = args[1];
  String outdir = args[2];
  int splitsize = Integer.valueOf(args[3]);
  boolean paired = cmd.hasOption("paired");
  boolean singlesplit = cmd.hasOption("singlesplit");
  boolean decompress = cmd.hasOption("decompress");

  String outdir2 = null;
  if(paired)
    outdir2 = outdir+"2";

  FileSystem fs = FileSystem.get(new Configuration());
  if(decompress){
    decompress(fs, fastq, "temp1.fq");
    decompress(fs, fastq2, "temp2.fq");

    fastq = "temp1.fq";
    fastq2 = "temp2.fq";

  }

    //Count split positions
    int splitlen = splitsize*4; //FASTQ read is expressed by 4 lines

    if(singlesplit){
      FileStatus fstatus = fs.getFileStatus(new Path(fastq));
      splitFastq(fstatus, fastq, outdir, splitlen, sc);
      if(paired){
        FileStatus fstatus2 = fs.getFileStatus(new Path(fastq2));
        splitFastq(fstatus2, fastq2, outdir2, splitlen, sc);
      }
    }else{
      FileStatus fst = fs.getFileStatus(new Path(fastq));
      FileStatus fst2 = fs.getFileStatus(new Path(fastq2));

      interleaveSplitFastq(fst, fst2, outdir, splitlen, sc);
    }

  if(decompress){
    fs.delete(new Path("temp1.fq"), false);
    fs.delete(new Path("temp2.fq"), false);
  }

  sc.stop();

}
 
Example 18
Project: hadoop   File: SimpleCopyListing.java   Source Code and License Vote up 4 votes
/**
 * Collect the list of 
 *   {@literal <sourceRelativePath, sourceFileStatus>}
 * to be copied and write to the sequence file. In essence, any file or
 * directory that need to be copied or sync-ed is written as an entry to the
 * sequence file, with the possible exception of the source root:
 *     when either -update (sync) or -overwrite switch is specified, and if
 *     the the source root is a directory, then the source root entry is not 
 *     written to the sequence file, because only the contents of the source
 *     directory need to be copied in this case.
 * See {@link org.apache.hadoop.tools.util.DistCpUtils#getRelativePath} for
 *     how relative path is computed.
 * See computeSourceRootPath method for how the root path of the source is
 *     computed.
 * @param fileListWriter
 * @param options
 * @throws IOException
 */
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter,
    DistCpOptions options) throws IOException {
  try {
    for (Path path: options.getSourcePaths()) {
      FileSystem sourceFS = path.getFileSystem(getConf());
      final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
      final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
      final boolean preserveRawXAttrs = options.shouldPreserveRawXattrs();
      path = makeQualified(path);

      FileStatus rootStatus = sourceFS.getFileStatus(path);
      Path sourcePathRoot = computeSourceRootPath(rootStatus, options);

      FileStatus[] sourceFiles = sourceFS.listStatus(path);
      boolean explore = (sourceFiles != null && sourceFiles.length > 0);
      if (!explore || rootStatus.isDirectory()) {
        CopyListingFileStatus rootCopyListingStatus =
          DistCpUtils.toCopyListingFileStatus(sourceFS, rootStatus,
              preserveAcls, preserveXAttrs, preserveRawXAttrs);
        writeToFileListingRoot(fileListWriter, rootCopyListingStatus,
            sourcePathRoot, options);
      }
      if (explore) {
        for (FileStatus sourceStatus: sourceFiles) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
          }
          CopyListingFileStatus sourceCopyListingStatus =
            DistCpUtils.toCopyListingFileStatus(sourceFS, sourceStatus,
                preserveAcls && sourceStatus.isDirectory(),
                preserveXAttrs && sourceStatus.isDirectory(),
                preserveRawXAttrs && sourceStatus.isDirectory());
          writeToFileListing(fileListWriter, sourceCopyListingStatus,
              sourcePathRoot, options);

          if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
            if (LOG.isDebugEnabled()) {
              LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
            }
            traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
                options);
          }
        }
      }
    }
    fileListWriter.close();
    fileListWriter = null;
  } finally {
    IOUtils.cleanup(LOG, fileListWriter);
  }
}
 
Example 19
Project: hadoop   File: TestCopyMapper.java   Source Code and License Vote up 4 votes
/**
 * If a single file is being copied to a location where the file (of the same
 * name) already exists, then the file shouldn't be skipped.
 */
@Test(timeout=40000)
public void testSingleFileCopy() {
  try {
    deleteState();
    touchFile(SOURCE_PATH + "/1");
    Path sourceFilePath = pathList.get(0);
    Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(
            SOURCE_PATH, TARGET_PATH));
    touchFile(targetFilePath.toString());

    FileSystem fs = cluster.getFileSystem();
    CopyMapper copyMapper = new CopyMapper();
    StubContext stubContext = new StubContext(getConfiguration(), null, 0);
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context
            = stubContext.getContext();

    context.getConfiguration().set(
            DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
            targetFilePath.getParent().toString()); // Parent directory.
    copyMapper.setup(context);

    final CopyListingFileStatus sourceFileStatus = new CopyListingFileStatus(
      fs.getFileStatus(sourceFilePath));

    long before = fs.getFileStatus(targetFilePath).getModificationTime();
    copyMapper.map(new Text(DistCpUtils.getRelativePath(
            new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
    long after = fs.getFileStatus(targetFilePath).getModificationTime();

    Assert.assertTrue("File should have been skipped", before == after);

    context.getConfiguration().set(
            DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
            targetFilePath.toString()); // Specify the file path.
    copyMapper.setup(context);

    before = fs.getFileStatus(targetFilePath).getModificationTime();
    try { Thread.sleep(2); } catch (Throwable ignore) {}
    copyMapper.map(new Text(DistCpUtils.getRelativePath(
            new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
    after = fs.getFileStatus(targetFilePath).getModificationTime();

    Assert.assertTrue("File should have been overwritten.", before < after);

  } catch (Exception exception) {
    Assert.fail("Unexpected exception: " + exception.getMessage());
    exception.printStackTrace();
  }
}
 
Example 20
Project: aliyun-maxcompute-data-collectors   File: HdfsOdpsImportJob.java   Source Code and License Vote up 4 votes
/**
 * @return the type of the file represented by p (or the files in p, if a
 * directory)
 */
public static FileType getFileType(Configuration conf, Path p)
    throws IOException {
  FileSystem fs = p.getFileSystem(conf);

  try {
    FileStatus stat = fs.getFileStatus(p);

    if (null == stat) {
      // Couldn't get the item.
      LOG.warn("Input path " + p + " does not exist");
      return FileType.UNKNOWN;
    }

    if (stat.isDir()) {
      FileStatus [] subitems = fs.listStatus(p);
      if (subitems == null || subitems.length == 0) {
        LOG.warn("Input path " + p + " contains no files");
        return FileType.UNKNOWN; // empty dir.
      }

      // Pick a child entry to examine instead.
      boolean foundChild = false;
      for (int i = 0; i < subitems.length; i++) {
        stat = subitems[i];
        if (!stat.isDir() && !stat.getPath().getName().startsWith("_")) {
          foundChild = true;
          break; // This item is a visible file. Check it.
        }
      }

      if (!foundChild) {
        stat = null; // Couldn't find a reasonable candidate.
      }
    }

    if (null == stat) {
      LOG.warn("null FileStatus object in isSequenceFiles(); "
          + "assuming false.");
      return FileType.UNKNOWN;
    }

    Path target = stat.getPath();
    return fromMagicNumber(target, conf);
  } catch (FileNotFoundException fnfe) {
    LOG.warn("Input path " + p + " does not exist");
    return FileType.UNKNOWN; // doesn't exist!
  }
}