Java Code Examples for org.apache.hadoop.fs.FileSystem#listLocatedStatus()

The following examples show how to use org.apache.hadoop.fs.FileSystem#listLocatedStatus() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 2
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 3
Source File: FileInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 4
Source File: FileInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 5
Source File: IncrementalCheckpointManagerTest.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
@Test
public void testPurge() throws IOException, InterruptedException
{
  FileSystem fileSystem = FileSystem.newInstance(new Configuration());

  testTransferWindowFiles();
  RemoteIterator<LocatedFileStatus> iterator = fileSystem.listLocatedStatus(
      new Path(testMeta.applicationPath + "/bucket_data"));
  Assert.assertTrue(iterator.hasNext());

  testMeta.managedStateContext.getBucketsFileSystem().deleteTimeBucketsLessThanEqualTo(200);

  iterator = fileSystem.listLocatedStatus(new Path(testMeta.applicationPath + "/bucket_data"));
  if (iterator.hasNext()) {
    Assert.fail("All buckets should be deleted");
  }
}
 
Example 6
Source File: RaidNode.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static List<LocatedFileStatus> listDirectoryRaidLocatedFileStatus(
		Configuration conf, FileSystem srcFs, Path p) throws IOException {
	long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY,
			MINIMUM_RAIDABLE_FILESIZE);
	List<LocatedFileStatus> lfs = new ArrayList<LocatedFileStatus>();
	RemoteIterator<LocatedFileStatus> iter = srcFs.listLocatedStatus(p);
	while (iter.hasNext()) {
		LocatedFileStatus stat = iter.next();
		if (stat.isDir()) {
			return null;
		}
		// We don't raid too small files
		if (stat.getLen() < minFileSize) {
			continue;
		}
		lfs.add(stat);
	}
	if (lfs.size() == 0)
		return null;
	return lfs;
}
 
Example 7
Source File: CachingDirectoryLister.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public RemoteIterator<LocatedFileStatus> list(FileSystem fs, Table table, Path path)
        throws IOException
{
    List<LocatedFileStatus> files = cache.getIfPresent(path);
    if (files != null) {
        return simpleRemoteIterator(files);
    }
    RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path);

    if (tablePrefixes.stream().noneMatch(prefix -> prefix.matches(table.getSchemaTableName()))) {
        return iterator;
    }
    return cachingRemoteIterator(iterator, path);
}
 
Example 8
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }
  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 9
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 10
Source File: SpillServiceImpl.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private void sweep(String spillDir, long targetTime) {
  try {
    final Path spillDirPath = new Path(spillDir);
    FileSystem fileSystem = spillDirPath.getFileSystem(SPILLING_CONFIG);
    RemoteIterator<LocatedFileStatus> files = fileSystem.listLocatedStatus(spillDirPath);
    while (files.hasNext()) {
      LocatedFileStatus st = files.next();
      if (st.getModificationTime() <= targetTime) {
        fileSystem.delete(st.getPath(), true);
      }
    }
  } catch (IOException e) {
    // exception silently ignored. Directory will be revisited at the next sweep
  }
}
 
Example 11
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }
  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 12
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 13
Source File: WALInputFormat.java    From hbase with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime)
    throws IOException {
  List<FileStatus> result = new ArrayList<>();
  LOG.debug("Scanning " + dir.toString() + " for WAL files");

  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir);
  if (!iter.hasNext()) return Collections.emptyList();
  while (iter.hasNext()) {
    LocatedFileStatus file = iter.next();
    if (file.isDirectory()) {
      // recurse into sub directories
      result.addAll(getFiles(fs, file.getPath(), startTime, endTime));
    } else {
      String name = file.getPath().toString();
      int idx = name.lastIndexOf('.');
      if (idx > 0) {
        try {
          long fileStartTime = Long.parseLong(name.substring(idx+1));
          if (fileStartTime <= endTime) {
            LOG.info("Found: " + file);
            result.add(file);
          }
        } catch (NumberFormatException x) {
          idx = 0;
        }
      }
      if (idx == 0) {
        LOG.warn("File " + name + " does not appear to be an WAL file. Skipping...");
      }
    }
  }
  return result;
}
 
Example 14
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files together with their block locations
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addLocatedInputPathRecursively(List<LocatedFileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  for(RemoteIterator<LocatedFileStatus> itor = 
    fs.listLocatedStatus(path, inputFilter); itor.hasNext();) {
    LocatedFileStatus stat = itor.next();
    if (stat.isDir()) {
      addLocatedInputPathRecursively(result, fs, stat.getPath(), inputFilter);
    } else {
      result.add(stat);
    }
  }          
}
 
Example 15
Source File: PlacementMonitor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
LocatedFileStatus getLocatedFileStatus(
    FileSystem fs, Path p) throws IOException {
  HashMap<String, LocatedFileStatus> cache = 
      locatedFileStatusCache.get();
  LocatedFileStatus result = cache.get(p.toUri().getPath());
  if (result != null) {
    return result;
  }
  Path parent = p.getParent();
  String parentPath = parent.toUri().getPath();
  //If we already did listlocatedStatus on parent path,
  //it means path p doesn't exist, we don't need to list again 
  if (cache.containsKey(parentPath) && 
      cache.get(parentPath) == null) {
    return null;
  }
  
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(parent);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    cache.put(stat.getPath().toUri().getPath(), stat);
  }
  // trick: add parent path to the cache with value = null 
  cache.put(parentPath, null);
  result = cache.get(p.toUri().getPath());
  // This may still return null
  return result;
}
 
Example 16
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of LocatedFileStatus objects
 * @throws IOException if zero items.
 */
protected List<LocatedFileStatus> listLocatedStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<IOException> errors = new ArrayList<IOException>();
  
  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);
  List<LocatedFileStatus> result = new ArrayList<LocatedFileStatus>();    
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        for(RemoteIterator<LocatedFileStatus> itor = 
          fs.listLocatedStatus(globStat.getPath(),
            inputFilter); itor.hasNext();) {
          result.add(itor.next());
        }          
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}