Java Code Examples for org.apache.hadoop.fs.FileStatus#isFile()

The following examples show how to use org.apache.hadoop.fs.FileStatus#isFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HistoryFileManager.java    From big-c with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
protected static List<FileStatus> scanDirectory(Path path, FileContext fc,
    PathFilter pathFilter) throws IOException {
  path = fc.makeQualified(path);
  List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
  try {
    RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
    while (fileStatusIter.hasNext()) {
      FileStatus fileStatus = fileStatusIter.next();
      Path filePath = fileStatus.getPath();
      if (fileStatus.isFile() && pathFilter.accept(filePath)) {
        jhStatusList.add(fileStatus);
      }
    }
  } catch (FileNotFoundException fe) {
    LOG.error("Error while scanning directory " + path, fe);
  }
  return jhStatusList;
}
 
Example 2
Source File: HadoopSegmentPreprocessingJob.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
/**
 * Finds the avro file in the input folder, and returns its avro schema
 * @param inputPathDir Path to input directory
 * @return Input schema
 * @throws IOException exception when accessing to IO
 */
private Schema getSchema(Path inputPathDir)
    throws IOException {
  FileSystem fs = FileSystem.get(new Configuration());
  Schema avroSchema = null;
  for (FileStatus fileStatus : fs.listStatus(inputPathDir)) {
    if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(".avro")) {
      _logger.info("Extracting schema from " + fileStatus.getPath());
      try (DataFileStream<GenericRecord> dataStreamReader = getAvroReader(inputPathDir)) {
        avroSchema = dataStreamReader.getSchema();
      }
      break;
    }
  }
  return avroSchema;
}
 
Example 3
Source File: ParquetReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public ParquetReader<T> build() throws IOException {
  ParquetReadOptions options = optionsBuilder.build();

  if (path != null) {
    FileSystem fs = path.getFileSystem(conf);
    FileStatus stat = fs.getFileStatus(path);

    if (stat.isFile()) {
      return new ParquetReader<>(
          Collections.singletonList((InputFile) HadoopInputFile.fromStatus(stat, conf)),
          options,
          getReadSupport());

    } else {
      List<InputFile> files = new ArrayList<>();
      for (FileStatus fileStatus : fs.listStatus(path, HiddenFileFilter.INSTANCE)) {
        files.add(HadoopInputFile.fromStatus(fileStatus, conf));
      }
      return new ParquetReader<T>(files, options, getReadSupport());
    }

  } else {
    return new ParquetReader<>(Collections.singletonList(file), options, getReadSupport());
  }
}
 
Example 4
Source File: FSFactory.java    From paraflow with Apache License 2.0 6 votes vote down vote up
public List<Path> listFiles(Path dirPath)
{
    List<Path> files = new ArrayList<>();
    FileStatus[] fileStatuses;
    if (this.fileSystem == null) {
        return ImmutableList.of();
    }
    try {
        fileStatuses = this.fileSystem.listStatus(dirPath);
        if (fileStatuses != null) {
            for (FileStatus f : fileStatuses) {
                //avoid add empty file
                if (f.isFile() && f.getLen() > 0) {
                    files.add(f.getPath());
                }
            }
        }
    }
    catch (IOException e) {
        log.error(e);
        throw new PrestoException(PARAFLOW_HDFS_FILE_ERROR, e);
    }

    return files;
}
 
Example 5
Source File: TestFileOutputCommitter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void validateMapFileOutputContent(
    FileSystem fs, Path dir) throws IOException {
  // map output is a directory with index and data files
  Path expectedMapDir = new Path(dir, partFile);
  assert(fs.getFileStatus(expectedMapDir).isDirectory());    
  FileStatus[] files = fs.listStatus(expectedMapDir);
  int fileCount = 0;
  boolean dataFileFound = false; 
  boolean indexFileFound = false; 
  for (FileStatus f : files) {
    if (f.isFile()) {
      ++fileCount;
      if (f.getPath().getName().equals(MapFile.INDEX_FILE_NAME)) {
        indexFileFound = true;
      }
      else if (f.getPath().getName().equals(MapFile.DATA_FILE_NAME)) {
        dataFileFound = true;
      }
    }
  }
  assert(fileCount > 0);
  assert(dataFileFound && indexFileFound);
}
 
Example 6
Source File: HistoryFileManager.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
protected static List<FileStatus> scanDirectory(Path path, FileContext fc,
    PathFilter pathFilter) throws IOException {
  path = fc.makeQualified(path);
  List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
  try {
    RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
    while (fileStatusIter.hasNext()) {
      FileStatus fileStatus = fileStatusIter.next();
      Path filePath = fileStatus.getPath();
      if (fileStatus.isFile() && pathFilter.accept(filePath)) {
        jhStatusList.add(fileStatus);
      }
    }
  } catch (FileNotFoundException fe) {
    LOG.error("Error while scanning directory " + path, fe);
  }
  return jhStatusList;
}
 
Example 7
Source File: Configuration.java    From laser with Apache License 2.0 6 votes vote down vote up
public synchronized void load(Path path, FileSystem fs) throws IOException {
	final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
	FileStatus[] fileStatus = fs.listStatus(path, new GlobFilter(
			"*.properties"));
	for (FileStatus file : fileStatus) {
		if (file.isFile()) {
			Path p = file.getPath();
			FSDataInputStream in = fs.open(p);
			Collection configuration = OBJECT_MAPPER.readValue(in,
					Collection.class);
			String collection = p.getName().substring(0,
					p.getName().lastIndexOf(".properties"));
			configuration.setCollecion(collection);
			mapper.put(collection, configuration);
		}
	}
}
 
Example 8
Source File: AbstractViolationPolicyEnforcement.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the size of a single file on the filesystem. If the size cannot be computed for some
 * reason, a {@link SpaceLimitingException} is thrown, as the file may violate a quota. If the
 * provided path does not reference a file, an {@link IllegalArgumentException} is thrown.
 *
 * @param fs The FileSystem which the path refers to a file upon
 * @param path The path on the {@code fs} to a file whose size is being checked
 * @return The size in bytes of the file
 */
long getFileSize(FileSystem fs, String path) throws SpaceLimitingException {
  final FileStatus status;
  try {
    status = fs.getFileStatus(new Path(Objects.requireNonNull(path)));
  } catch (IOException e) {
    throw new SpaceLimitingException(
        getPolicyName(), "Could not verify length of file to bulk load: " + path, e);
  }
  if (!status.isFile()) {
    throw new IllegalArgumentException(path + " is not a file.");
  }
  return status.getLen();
}
 
Example 9
Source File: SafeFileOutputCommitter.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * I could have used the fs.listFiles(path, true), however that provides the LocatedFileStatus which returns all of the block locations as well as the file
 * status. This is a cheaper iterator which only requests the FileStatus for each file as all we need to know is which paths are files vs directories.
 * 
 * @param fs
 * @param path
 * @return A remote iterator of paths for file only
 */
protected RemoteIterator<Path> listFiles(final FileSystem fs, final Path path) {
    return new RemoteIterator<Path>() {
        private ArrayDeque<FileStatus> files = new ArrayDeque<>();
        private Path curFile = null;
        private boolean initialized = false;
        
        private void initialize() throws IOException {
            if (!initialized) {
                files.add(fs.getFileStatus(path));
                initialized = true;
            }
        }
        
        @Override
        public boolean hasNext() throws FileNotFoundException, IOException {
            initialize();
            while (curFile == null && !files.isEmpty()) {
                FileStatus file = files.removeLast();
                if (file.isFile()) {
                    curFile = file.getPath();
                } else {
                    FileStatus[] status = fs.listStatus(file.getPath());
                    Collections.addAll(files, status);
                }
            }
            return curFile != null;
        }
        
        @Override
        public Path next() throws FileNotFoundException, IOException {
            if (hasNext()) {
                Path result = curFile;
                curFile = null;
                return result;
            }
            throw new java.util.NoSuchElementException("No more files under " + path);
        }
    };
}
 
Example 10
Source File: CephFileSystem.java    From cephfs-hadoop with GNU Lesser General Public License v2.1 5 votes vote down vote up
public boolean delete(Path path, boolean recursive) throws IOException {
  path = makeAbsolute(path);

  /* path exists? */
  FileStatus status;
  try {
    status = getFileStatus(path);
  } catch (FileNotFoundException e) {
    return false;
  }

  /* we're done if its a file */
  if (status.isFile()) {
    ceph.unlink(path);
    return true;
  }

  /* get directory contents */
  FileStatus[] dirlist = listStatus(path);
  if (dirlist == null)
    return false;

  if (!recursive && dirlist.length > 0)
    throw new IOException("Directory " + path.toString() + "is not empty.");

  for (FileStatus fs : dirlist) {
    if (!delete(fs.getPath(), recursive))
      return false;
  }

  ceph.rmdir(path);
  return true;
}
 
Example 11
Source File: FragmentMerger.java    From indexr with Apache License 2.0 5 votes vote down vote up
private void mergeFolder(Path path) throws IOException {
    logger.debug("mergeFolder: {}", path);

    FileStatus[] fileStatuses = fileSystem.listStatus(path);

    List<FileStatus> files = new ArrayList<>();
    List<FileStatus> folders = new ArrayList<>();
    for (int i = 0; i < fileStatuses.length; i++) {
        FileStatus fileStatus = fileStatuses[i];
        if (!SegmentHelper.checkSegmentByPath(fileStatus.getPath())) {
            continue;
        }
        if (fileStatus.isFile()) {
            files.add(fileStatus);
        } else if (fileStatus.isDirectory()) {
            folders.add(fileStatus);
        }
    }

    //logger.debug("files: {}", files);
    //logger.debug("folders: {}", folders);

    // Merge files under current folder
    if (files.size() > 1) {
        mergeFiles(path, files);
    }

    // Merge sub folders
    for (FileStatus folder : folders) {
        mergeFolder(folder.getPath());
    }
}
 
Example 12
Source File: BasicFormatMatcher.java    From Bats with Apache License 2.0 5 votes vote down vote up
public boolean matches(DrillFileSystem fs, FileStatus status) throws IOException{
  if (ranges.isEmpty() || status.isDirectory()) {
    return false;
  }
  // walk all the way down in the symlinks until a hard entry is reached
  FileStatus current = status;
  while (current.isSymlink()) {
    current = fs.getFileStatus(status.getSymlink());
  }
  // if hard entry is not a file nor can it be a symlink then it is not readable simply deny matching.
  if (!current.isFile()) {
    return false;
  }

  final Range<Long> fileRange = Range.closedOpen( 0L, status.getLen());

  try (FSDataInputStream is = fs.open(status.getPath())) {
    for(RangeMagics rMagic : ranges) {
      Range<Long> r = rMagic.range;
      if (!fileRange.encloses(r)) {
        continue;
      }
      int len = (int) (r.upperEndpoint() - r.lowerEndpoint());
      byte[] bytes = new byte[len];
      is.readFully(r.lowerEndpoint(), bytes);
      for (byte[] magic : rMagic.magics) {
        if (Arrays.equals(magic, bytes)) {
          return true;
        }
      }
    }
  }
  return false;
}
 
Example 13
Source File: FileSystemUtil.java    From Bats with Apache License 2.0 5 votes vote down vote up
/**
 * Checks if file status is applicable based on file system object {@link Scope}.
 *
 * @param status file status
 * @param scope file system objects scope
 * @return true if status is applicable, false otherwise
 */
private static boolean isStatusApplicable(FileStatus status, Scope scope) {
  switch (scope) {
    case DIRECTORIES:
      return status.isDirectory();
    case FILES:
      return status.isFile();
    case ALL:
      return true;
    default:
      return false;
  }
}
 
Example 14
Source File: PseudoDistributedFileSystem.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
protected Callable<FileStatus> newMapTask(final String address) throws IOException {
  // TODO Auto-generated method stub
  return new Callable<FileStatus>() {
    @Override
    public FileStatus call() throws Exception {
      // Only directories should be removed with a fork/join task
      FileStatus status = getDelegateFileSystem(address).getFileStatus(path);
      if (status.isFile()) {
        throw new FileNotFoundException("Directory not found: " + path);
      }
      return status;
    }
  };
}
 
Example 15
Source File: FileStatusTreeTraverser.java    From circus-train with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<FileStatus> children(FileStatus root) {
  if (root.isFile()) {
    return ImmutableList.of();
  }
  try {
    FileStatus[] listStatus = fileSystem.listStatus(root.getPath());
    if (listStatus == null || listStatus.length == 0) {
      return ImmutableList.of();
    }
    return ImmutableList.copyOf(listStatus);
  } catch (IOException e) {
    throw new CircusTrainException("Unable to list children for path: " + root.getPath());
  }
}
 
Example 16
Source File: HttpFSFileSystem.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static FILE_TYPE getType(FileStatus fileStatus) {
  if (fileStatus.isFile()) {
    return FILE;
  }
  if (fileStatus.isDirectory()) {
    return DIRECTORY;
  }
  if (fileStatus.isSymlink()) {
    return SYMLINK;
  }
  throw new IllegalArgumentException("Could not determine filetype for: " +
                                     fileStatus.getPath());
}
 
Example 17
Source File: Type.java    From examples with Apache License 2.0 4 votes vote down vote up
public boolean matches(FileStatus stat) {
  return stat.isFile();
}
 
Example 18
Source File: SemiTransactionalHiveMetastore.java    From presto with Apache License 2.0 4 votes vote down vote up
private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory, Set<String> queryIds, boolean deleteEmptyDirectories)
{
    // don't delete hidden presto directories
    if (directory.getName().startsWith(".presto")) {
        return new RecursiveDeleteResult(false, ImmutableList.of());
    }

    FileStatus[] allFiles;
    try {
        allFiles = fileSystem.listStatus(directory);
    }
    catch (IOException e) {
        ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder();
        notDeletedItems.add(directory.toString() + "/**");
        return new RecursiveDeleteResult(false, notDeletedItems.build());
    }

    boolean allDescendentsDeleted = true;
    ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
    for (FileStatus fileStatus : allFiles) {
        if (fileStatus.isFile()) {
            Path filePath = fileStatus.getPath();
            String fileName = filePath.getName();
            boolean eligible = false;
            // never delete presto dot files
            if (!fileName.startsWith(".presto")) {
                eligible = queryIds.stream().anyMatch(id -> fileName.startsWith(id) || fileName.endsWith(id));
            }
            if (eligible) {
                if (!deleteIfExists(fileSystem, filePath, false)) {
                    allDescendentsDeleted = false;
                    notDeletedEligibleItems.add(filePath.toString());
                }
            }
            else {
                allDescendentsDeleted = false;
            }
        }
        else if (fileStatus.isDirectory()) {
            RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(), queryIds, deleteEmptyDirectories);
            if (!subResult.isDirectoryNoLongerExists()) {
                allDescendentsDeleted = false;
            }
            if (!subResult.getNotDeletedEligibleItems().isEmpty()) {
                notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems());
            }
        }
        else {
            allDescendentsDeleted = false;
            notDeletedEligibleItems.add(fileStatus.getPath().toString());
        }
    }
    if (allDescendentsDeleted && deleteEmptyDirectories) {
        verify(notDeletedEligibleItems.build().isEmpty());
        if (!deleteIfExists(fileSystem, directory, false)) {
            return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/"));
        }
        return new RecursiveDeleteResult(true, ImmutableList.of());
    }
    return new RecursiveDeleteResult(false, notDeletedEligibleItems.build());
}
 
Example 19
Source File: COSAPIClient.java    From stocator with Apache License 2.0 4 votes vote down vote up
@Override
public boolean rename(String hostName, String srcPath, String dstPath) throws IOException {
  LOG.debug("Rename path {} to {}", srcPath, dstPath);
  Path src = new Path(srcPath);
  Path dst = new Path(dstPath);
  String srcKey = pathToKey(src);
  String dstKey = pathToKey(dst);

  if (srcKey.isEmpty()) {
    throw new IOException("Rename failed " + srcPath + " to " + dstPath
        + " source is root directory");
  }
  if (dstKey.isEmpty()) {
    throw new IOException("Rename failed " + srcPath + " to " + dstPath
        + " dest is root directory");
  }

  // get the source file status; this raises a FNFE if there is no source
  // file.
  FileStatus srcStatus = getFileStatus(hostName, src, "rename");

  if (srcKey.equals(dstKey)) {
    LOG.debug("rename: src and dest refer to the same file or directory: {}",
        dstPath);
    throw new IOException("source + " + srcPath + "and dest " + dstPath
        + " refer to the same file or directory");
  }

  FileStatus dstStatus = null;
  try {
    dstStatus = getFileStatus(hostName, dst, "rename");
    // if there is no destination entry, an exception is raised.
    // hence this code sequence can assume that there is something
    // at the end of the path; the only detail being what it is and
    // whether or not it can be the destination of the rename.
    if (srcStatus.isDirectory()) {
      if (dstStatus.isFile()) {
        throw new IOException("source + " + srcPath + "and dest " + dstPath
            +  "source is a directory and dest is a file");
      }
      // at this point the destination is an empty directory
    } else {
      // source is a file. The destination must be a directory,
      // empty or not
      if (dstStatus.isFile()) {
        throw new IOException("source + " + srcPath + "and dest " + dstPath
            +  "Cannot rename onto an existing file");
      }
    }

  } catch (FileNotFoundException e) {
    LOG.debug("rename: destination path {} not found", dstPath);
  }

  if (srcStatus.isFile()) {
    LOG.debug("rename: renaming file {} to {}", src, dst);
    long length = srcStatus.getLen();
    if (dstStatus != null && dstStatus.isDirectory()) {
      String newDstKey = dstKey;
      if (!newDstKey.endsWith("/")) {
        newDstKey = newDstKey + "/";
      }
      String filename =
          srcKey.substring(pathToKey(src.getParent()).length() + 1);
      newDstKey = newDstKey + filename;
      copyFile(srcKey, newDstKey, length);
    } else {
      copyFile(srcKey, dstKey, srcStatus.getLen());
    }
    delete(hostName, src, false);
  } else {
    LOG.debug("rename: renaming file {} to {} failed. Source file is directory", src, dst);
  }

  if (!(src.getParent().equals(dst.getParent()))) {
    LOG.debug("{} is not equal to {}. Going to create directory {}",src.getParent(),
        dst.getParent(), src.getParent());
    createDirectoryIfNecessary(hostName, src.getParent());
  }
  return true;
}
 
Example 20
Source File: FileOutputCommitter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from,
    final Path to) throws IOException {
  if (LOG.isDebugEnabled()) {
    LOG.debug("Merging data from " + from + " to " + to);
  }
  FileStatus toStat;
  try {
    toStat = fs.getFileStatus(to);
  } catch (FileNotFoundException fnfe) {
    toStat = null;
  }

  if (from.isFile()) {
    if (toStat != null) {
      if (!fs.delete(to, true)) {
        throw new IOException("Failed to delete " + to);
      }
    }

    if (!fs.rename(from.getPath(), to)) {
      throw new IOException("Failed to rename " + from + " to " + to);
    }
  } else if (from.isDirectory()) {
    if (toStat != null) {
      if (!toStat.isDirectory()) {
        if (!fs.delete(to, true)) {
          throw new IOException("Failed to delete " + to);
        }
        renameOrMerge(fs, from, to);
      } else {
        //It is a directory so merge everything in the directories
        for (FileStatus subFrom : fs.listStatus(from.getPath())) {
          Path subTo = new Path(to, subFrom.getPath().getName());
          mergePaths(fs, subFrom, subTo);
        }
      }
    } else {
      renameOrMerge(fs, from, to);
    }
  }
}