Java Code Examples for org.apache.hadoop.hdfs.DistributedFileSystem#isFileClosed()

The following examples show how to use org.apache.hadoop.hdfs.DistributedFileSystem#isFileClosed() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HadoopRecoverableFsDataOutputStream.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	final StopWatch sw = new StopWatch();
	sw.start();

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 2
Source File: HadoopRecoverableFsDataOutputStream.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	final StopWatch sw = new StopWatch();
	sw.start();

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 3
Source File: HadoopRecoverableFsDataOutputStream.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 4
Source File: FSHDFSUtils.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p, final Configuration conf, CallerInfo callerInfo)
throws IOException {
  log.info("Recovering lease on dfs file {}", p);
  long startWaiting = System.nanoTime();
  // Default is 15 minutes. It's huge, but the idea is that if we have a major issue, HDFS
  // usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves
  // beyond that limit 'to be safe'.
  long recoveryTimeout = TimeUnit.NANOSECONDS.convert(conf.getInt("solr.hdfs.lease.recovery.timeout", 900000), TimeUnit.MILLISECONDS) + startWaiting;
  // This setting should be a little bit above what the cluster dfs heartbeat is set to.
  long firstPause = conf.getInt("solr.hdfs.lease.recovery.first.pause", 4000);
  // This should be set to how long it'll take for us to timeout against primary datanode if it
  // is dead.  We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
  // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
  long subsequentPause = TimeUnit.NANOSECONDS.convert(conf.getInt("solr.hdfs.lease.recovery.dfs.timeout", 61 * 1000), TimeUnit.MILLISECONDS);
  
  if (dfs.isFileClosed(p)) {
    return true;
  }
  
  boolean recovered = false;
  // We break the loop if we succeed the lease recovery, timeout, or we throw an exception.
  for (int nbAttempt = 0; !recovered; nbAttempt++) {
    recovered = recoverLease(dfs, nbAttempt, p, startWaiting);
    if (recovered) break;
    if (checkIfTimedout(conf, recoveryTimeout, nbAttempt, p, startWaiting) || callerInfo.isCallerClosed()) break;
    try {
      // On the first time through wait the short 'firstPause'.
      if (nbAttempt == 0) {
        Thread.sleep(firstPause);
      } else {
        // Cycle here until subsequentPause elapses.  While spinning, check isFileClosed
        long localStartWaiting = System.nanoTime();
        while ((System.nanoTime() - localStartWaiting) < subsequentPause && !callerInfo.isCallerClosed()) {
          Thread.sleep(conf.getInt("solr.hdfs.lease.recovery.pause", 1000));

          if (dfs.isFileClosed(p)) {
            recovered = true;
            break;
          }
        }
      }
    } catch (InterruptedException ie) {
      InterruptedIOException iioe = new InterruptedIOException();
      iioe.initCause(ie);
      throw iioe;
    }
  }
  if (recovered) {
    RECOVER_LEASE_SUCCESS_COUNT.incrementAndGet();
  }
  return recovered;
}