Java Code Examples for org.apache.hadoop.fs.FileSystem#isFile()

The following examples show how to use org.apache.hadoop.fs.FileSystem#isFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DownloadHdfsFileBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
private void download(List<String> uris, Configuration conf, File dstRootDir) throws IOException {
  synchronized (DONE) { 
    for (String uri : uris) {
      Path path = new Path(uri);
      File dst = new File(dstRootDir, path.getName()).getCanonicalFile();
      if (!DONE.contains(dst.getPath())) {
        if (dst.isDirectory()) {
          LOG.debug("Deleting dir {}", dst);
          FileUtils.deleteDirectory(dst);
        }
        FileSystem fs = path.getFileSystem(conf);
        if (fs.isFile(path)) {
          dst.getParentFile().mkdirs();
        }
        LOG.debug("Downloading {} to {}", uri, dst);
        if (!FileUtil.copy(fs, path, dst, false, conf)) {
          throw new IOException("Cannot download URI " + uri + " to " + dst);
        }
        DONE.add(dst.getPath());
        LOG.debug("Succeeded downloading {} to {}", uri, dst);
      }
    }
  }
}
 
Example 2
Source File: SimpleCopyListing.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private Path computeSourceRootPath(FileStatus sourceStatus,
                                   DistCpOptions options) throws IOException {

  Path target = options.getTargetPath();
  FileSystem targetFS = target.getFileSystem(getConf());
  final boolean targetPathExists = options.getTargetPathExists();

  boolean solitaryFile = options.getSourcePaths().size() == 1
                                              && !sourceStatus.isDirectory();

  if (solitaryFile) {
    if (targetFS.isFile(target) || !targetPathExists) {
      return sourceStatus.getPath();
    } else {
      return sourceStatus.getPath().getParent();
    }
  } else {
    boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetPathExists) ||
        options.shouldSyncFolder() || options.shouldOverwrite();

    return specialHandling && sourceStatus.isDirectory() ? sourceStatus.getPath() :
        sourceStatus.getPath().getParent();
  }
}
 
Example 3
Source File: HBaseFsck.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * ls -r for debugging purposes
 */
public static void debugLsr(Configuration conf,
    Path p, HbckErrorReporter errors) throws IOException {
  if (!LOG.isDebugEnabled() || p == null) {
    return;
  }
  FileSystem fs = p.getFileSystem(conf);

  if (!fs.exists(p)) {
    // nothing
    return;
  }
  errors.print(p.toString());

  if (fs.isFile(p)) {
    return;
  }

  if (fs.getFileStatus(p).isDirectory()) {
    FileStatus[] fss= fs.listStatus(p);
    for (FileStatus status : fss) {
      debugLsr(conf, status.getPath(), errors);
    }
  }
}
 
Example 4
Source File: HDFSSequenceFile.java    From Transwarp-Sample-Code with MIT License 6 votes vote down vote up
protected void open(Path dstPath, CompressionCodec codeC,
    CompressionType compType, Configuration conf, FileSystem hdfs)
        throws IOException {
  if(useRawLocalFileSystem) {
    if(hdfs instanceof LocalFileSystem) {
      hdfs = ((LocalFileSystem)hdfs).getRaw();
    } else {
      logger.warn("useRawLocalFileSystem is set to true but file system " +
          "is not of type LocalFileSystem: " + hdfs.getClass().getName());
    }
  }
  if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
          (dstPath)) {
    outStream = hdfs.append(dstPath);
  } else {
    outStream = hdfs.create(dstPath);
  }
  writer = SequenceFile.createWriter(conf, outStream,
      serializer.getKeyClass(), serializer.getValueClass(), compType, codeC);

  registerCurrentStream(outStream, hdfs, dstPath);
}
 
Example 5
Source File: ProcedureWALPrettyPrinter.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Reads a log file and outputs its contents.
 *
 * @param conf   HBase configuration relevant to this log file
 * @param p       path of the log file to be read
 * @throws IOException  IOException
 */
public void processFile(final Configuration conf, final Path p)
    throws IOException {

  FileSystem fs = p.getFileSystem(conf);
  if (!fs.exists(p)) {
    System.err.println("ERROR, file doesnt exist: " + p);
    return;
  }
  if (!fs.isFile(p)) {
    System.err.println(p + " is not a file");
    return;
  }

  FileStatus logFile = fs.getFileStatus(p);
  if (logFile.getLen() == 0) {
    out.println("Zero length file: " + p);
    return;
  }

  out.println("Opening procedure state-log: " + p);
  ProcedureWALFile log = new ProcedureWALFile(fs, logFile);
  processProcedureWALFile(log);
}
 
Example 6
Source File: FileSystemUtil.java    From kite with Apache License 2.0 6 votes vote down vote up
private static <T> T visit(PathVisitor<T> visitor, FileSystem fs, Path path,
                    List<Path> followedLinks) throws IOException {
  if (fs.isFile(path)) {
    return visitor.file(fs, path);
  } else if (IS_SYMLINK != null &&
      IS_SYMLINK.<Boolean>invoke(fs.getFileStatus(path))) {
    Preconditions.checkArgument(!followedLinks.contains(path),
        "Encountered recursive path structure at link: " + path);
    followedLinks.add(path); // no need to remove
    return visit(visitor, fs, fs.getLinkTarget(path), followedLinks);
  }

  List<T> children = Lists.newArrayList();

  FileStatus[] statuses = fs.listStatus(path, PathFilters.notHidden());
  for (FileStatus stat : statuses) {
    children.add(visit(visitor, fs, stat.getPath()));
  }

  return visitor.directory(fs, path, children);
}
 
Example 7
Source File: SimpleCopyListing.java    From big-c with Apache License 2.0 6 votes vote down vote up
private Path computeSourceRootPath(FileStatus sourceStatus,
                                   DistCpOptions options) throws IOException {

  Path target = options.getTargetPath();
  FileSystem targetFS = target.getFileSystem(getConf());
  final boolean targetPathExists = options.getTargetPathExists();

  boolean solitaryFile = options.getSourcePaths().size() == 1
                                              && !sourceStatus.isDirectory();

  if (solitaryFile) {
    if (targetFS.isFile(target) || !targetPathExists) {
      return sourceStatus.getPath();
    } else {
      return sourceStatus.getPath().getParent();
    }
  } else {
    boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetPathExists) ||
        options.shouldSyncFolder() || options.shouldOverwrite();

    return specialHandling && sourceStatus.isDirectory() ? sourceStatus.getPath() :
        sourceStatus.getPath().getParent();
  }
}
 
Example 8
Source File: HalvadeConf.java    From halvade with GNU General Public License v3.0 6 votes vote down vote up
public static void setKnownSitesOnHDFS(Configuration conf, String[] val) throws IOException, URISyntaxException {
    conf.setInt(numberOfSites, val.length);
    FileSystem fs;
    for(int i = 0; i < val.length;i ++) {
        // check if dir add all files!
        fs = FileSystem.get(new URI(val[i]), conf);
        if(fs.isFile(new Path(val[i]))) {
            conf.set(sitesOnHDFSName + i, val[i]);
        } else {
            FileStatus[] files = fs.listStatus(new Path(val[i]));
            for(FileStatus file : files) {
                if (!file.isDir()) {
                    conf.set(sitesOnHDFSName + i, file.getPath().toString());
                }
            }
        }
    }
}
 
Example 9
Source File: IndexImporter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private Map<Path, Path> toMap(FileSystem fileSystem, Set<Path> inuseDirs) throws IOException {
  Map<Path, Path> result = new TreeMap<Path, Path>();
  for (Path p : inuseDirs) {
    if (!fileSystem.isFile(p)) {
      FileStatus[] listStatus = fileSystem.listStatus(p);
      for (FileStatus status : listStatus) {
        result.put(status.getPath(), p);
      }
    }
  }
  return result;
}
 
Example 10
Source File: PathPartitioner.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
    * Searches for the key=value pairs in the path pointer by the location
    * parameter.
    *
    * @param location
    *            String root path in hdsf e.g. /user/hive/warehouse or
    *            /logs/repo
    * @param conf
    *            Configuration
    * @return Set of String. The order is maintained as per the directory tree.
    *         i.e. if /logs/repo/year=2010/month=2010 exists the first item in
    *         the set will be year and the second month.
    * @throws IOException
    */
   public Set<String> getPartitionKeys(String location, Configuration conf)
    throws IOException {

// find the hive type partition key=value pairs from the path.
// first parse the string alone.
Path path = new Path(location);
FileSystem fs = path.getFileSystem(conf);

FileStatus[] fileStatusArr = null;

// use LinkedHashSet because order is important here.
Set<String> partitionKeys = new LinkedHashSet<String>();

parseAndPutKeyValue(location, partitionKeys);

while (!((fileStatusArr = fs.listStatus(path)) == null || fs
	.isFile(path) || fileStatusArr.length == 0)) {
    for (FileStatus fileStatus : fileStatusArr) {

	path = fileStatus.getPath();

	// ignore hidden directories
	if (fileStatus.getPath().getName().startsWith("_")
		|| !fileStatus.isDir())
	    continue;

	parseAndPutKeyValue(path.getName(), partitionKeys);
	// at the first directory found stop the for loop after parsing
	// for key value pairs
	break;
    }

}

return partitionKeys;
   }
 
Example 11
Source File: ConsistentListingAspect.java    From s3mper with Apache License 2.0 5 votes vote down vote up
private List<Path> recursiveList(FileSystem fs, Path path) throws IOException {
    List<Path> result = new ArrayList<Path>();
    
    try {
        result.add(path);
        
        if (!fs.isFile(path)) {
            FileStatus[] children = fs.listStatus(path);
            
            if (children == null) {
                return result;
            }
            
            for (FileStatus child : children) {
                if (child.isDir()) {
                    result.addAll(recursiveList(fs, child.getPath()));
                } else {
                    result.add(child.getPath());
                }
            }
        }
    } catch (Exception e) {
        log.info("A problem occurred recursively deleting path: " + path + " " + e.getMessage());
    }
    
    return result;
}
 
Example 12
Source File: MiniDfsResource.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a file on the HDFS cluster contains the given parquet.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 */
public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(), new Path(path))) {
            IndexedRecord record = null;
            while (null != (record = reader.read())){
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if(indexedRecord.equals(record)){
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}
 
Example 13
Source File: MiniDfsResource.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that a file on the HDFS cluster contains the given avro.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 */
public static void assertReadAvroFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
                new BufferedInputStream(fs.open(new Path(path))), new GenericDatumReader<GenericRecord>())) {
            IndexedRecord record = null;
            while (reader.hasNext()){
                record = reader.iterator().next();
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if(indexedRecord.equals(record)){
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadAvroFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}
 
Example 14
Source File: HDFSUtil.java    From Scribengin with GNU Affero General Public License v3.0 5 votes vote down vote up
static void dump(FileSystem fs, Path path, String indentation) throws IOException {
  System.out.println(indentation + " " + path.getName());
  if(fs.isFile(path)) return;
  FileStatus[] status = fs.listStatus(path) ;
  String nextIndentation = indentation + "  " ;
  for(int i = 0; i < status.length; i++) {
    dump(fs, status[i].getPath(), nextIndentation);
  }
}
 
Example 15
Source File: Utils.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Finds a valid path for a file from a FileStatus object.
 * @param fileStatus FileStatus object corresponding to a file,
 * or a directory.
 * @param fileSystem FileSystem in with the file should be found
 * @return The first file found
 * @throws IOException
 */

public static Path depthFirstSearchForFile(final FileStatus fileStatus,
    final FileSystem fileSystem) throws IOException {
  if (fileSystem.isFile(fileStatus.getPath())) {
    return fileStatus.getPath();
  } else {
    return depthFirstSearchForFile(
        fileSystem.listStatus(fileStatus.getPath(), VISIBLE_FILES),
        fileSystem);
  }

}
 
Example 16
Source File: WasbFsck.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Recursively check if a given path and its child paths have colons in their
 * names. It returns true if none of them has a colon or this path does not
 * exist, and false otherwise.
 */
private boolean recursiveCheckChildPathName(FileSystem fs, Path p)
    throws IOException {
  if (p == null) {
    return true;
  }
  if (!fs.exists(p)) {
    System.out.println("Path " + p + " does not exist!");
    return true;
  }

  if (fs.isFile(p)) {
    if (containsColon(p)) {
      System.out.println("Warning: file " + p + " has a colon in its name.");
      return false;
    } else {
      return true;
    }
  } else {
    boolean flag;
    if (containsColon(p)) {
      System.out.println("Warning: directory " + p
          + " has a colon in its name.");
      flag = false;
    } else {
      flag = true;
    }
    FileStatus[] listed = fs.listStatus(p);
    for (FileStatus l : listed) {
      if (!recursiveCheckChildPathName(fs, l.getPath())) {
        flag = false;
      }
    }
    return flag;
  }
}
 
Example 17
Source File: BlurOutputCommitter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
  LOG.info("Running commit task.");
  Conf conf = setup(context);
  FileSystem fileSystem = conf._newIndex.getFileSystem(conf._configuration);
  if (fileSystem.exists(conf._newIndex) && !fileSystem.isFile(conf._newIndex)) {
    Path dst = new Path(conf._indexPath, conf._taskAttemptID.toString() + ".task_complete");
    LOG.info("Committing [{0}] to [{1}]", conf._newIndex, dst);
    fileSystem.rename(conf._newIndex, dst);
  } else {
    throw new IOException("Path [" + conf._newIndex + "] does not exist, can not commit.");
  }
}
 
Example 18
Source File: HDFSCompressedDataStream.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Override
public void open(String filePath, CompressionCodec codec,
    CompressionType cType) throws IOException {
  Configuration conf = new Configuration();
  Path dstPath = new Path(filePath);
  FileSystem hdfs = dstPath.getFileSystem(conf);
  if(useRawLocalFileSystem) {
    if(hdfs instanceof LocalFileSystem) {
      hdfs = ((LocalFileSystem)hdfs).getRaw();
    } else {
      logger.warn("useRawLocalFileSystem is set to true but file system " +
          "is not of type LocalFileSystem: " + hdfs.getClass().getName());
    }
  }

  boolean appending = false;
  if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
  (dstPath)) {
    fsOut = hdfs.append(dstPath);
    appending = true;
  } else {
    fsOut = hdfs.create(dstPath);
  }
  cmpOut = codec.createOutputStream(fsOut);
  serializer = EventSerializerFactory.getInstance(serializerType,
      serializerContext, cmpOut);
  if (appending && !serializer.supportsReopen()) {
    cmpOut.close();
    serializer = null;
    throw new IOException("serializer (" + serializerType
        + ") does not support append");
  }

  registerCurrentStream(fsOut, hdfs, dstPath);

  if (appending) {
    serializer.afterReopen();
  } else {
    serializer.afterCreate();
  }
  isFinished = false;
}
 
Example 19
Source File: HDFSCompressedDataStream.java    From Transwarp-Sample-Code with MIT License 4 votes vote down vote up
@Override
public void open(String filePath, CompressionCodec codec,
    CompressionType cType) throws IOException {
  Configuration conf = new Configuration();
  Path dstPath = new Path(filePath);
  FileSystem hdfs = dstPath.getFileSystem(conf);
  if(useRawLocalFileSystem) {
    if(hdfs instanceof LocalFileSystem) {
      hdfs = ((LocalFileSystem)hdfs).getRaw();
    } else {
      logger.warn("useRawLocalFileSystem is set to true but file system " +
          "is not of type LocalFileSystem: " + hdfs.getClass().getName());
    }
  }
  boolean appending = false;
  if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
  (dstPath)) {
    fsOut = hdfs.append(dstPath);
    appending = true;
  } else {
    fsOut = hdfs.create(dstPath);
  }
  if(compressor == null) {
    compressor = CodecPool.getCompressor(codec, conf);
  }
  cmpOut = codec.createOutputStream(fsOut, compressor);
  serializer = EventSerializerFactory.getInstance(serializerType,
      serializerContext, cmpOut);
  if (appending && !serializer.supportsReopen()) {
    cmpOut.close();
    serializer = null;
    throw new IOException("serializer (" + serializerType
        + ") does not support append");
  }

  registerCurrentStream(fsOut, hdfs, dstPath);

  if (appending) {
    serializer.afterReopen();
  } else {
    serializer.afterCreate();
  }
  isFinished = false;
}
 
Example 20
Source File: HDFSDataStream.java    From mt-flume with Apache License 2.0 4 votes vote down vote up
@Override
public void open(String filePath) throws IOException {
  Configuration conf = new Configuration();
  Path dstPath = new Path(filePath);
  FileSystem hdfs = dstPath.getFileSystem(conf);
  if(useRawLocalFileSystem) {
    if(hdfs instanceof LocalFileSystem) {
      hdfs = ((LocalFileSystem)hdfs).getRaw();
    } else {
      logger.warn("useRawLocalFileSystem is set to true but file system " +
          "is not of type LocalFileSystem: " + hdfs.getClass().getName());
    }
  }

  boolean appending = false;
  if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
          (dstPath)) {
    outStream = hdfs.append(dstPath);
    appending = true;
  } else {
    outStream = hdfs.create(dstPath);
  }

  serializer = EventSerializerFactory.getInstance(
      serializerType, serializerContext, outStream);
  if (appending && !serializer.supportsReopen()) {
    outStream.close();
    serializer = null;
    throw new IOException("serializer (" + serializerType +
        ") does not support append");
  }

  // must call superclass to check for replication issues
  registerCurrentStream(outStream, hdfs, dstPath);

  if (appending) {
    serializer.afterReopen();
  } else {
    serializer.afterCreate();
  }
}