Java Code Examples for org.apache.hadoop.hdfs.DistributedFileSystem#recoverLease()

The following examples show how to use org.apache.hadoop.hdfs.DistributedFileSystem#recoverLease() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HadoopRecoverableFsDataOutputStream.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	final StopWatch sw = new StopWatch();
	sw.start();

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 2
Source File: HadoopRecoverableFsDataOutputStream.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	final StopWatch sw = new StopWatch();
	sw.start();

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 3
Source File: HiveProducer.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void closeFout() {
    if (fout != null) {
        try {
            fout.close();
        } catch (Exception e) {
            logger.error("Close the path: " + curPartitionContentPath + " failed", e);
            if (fs instanceof DistributedFileSystem) {
                DistributedFileSystem hdfs = (DistributedFileSystem) fs;
                try {
                    boolean recovered = hdfs.recoverLease(curPartitionContentPath);
                } catch (Exception e1) {
                    logger.error("Recover lease for path: " + curPartitionContentPath + " failed", e1);
                }
            }
        }
    }
    fout = null;
}
 
Example 4
Source File: HDFSUtil.java    From phoenix-tephra with Apache License 2.0 6 votes vote down vote up
/**
 * Try to recover the lease.
 * @param dfs The filesystem instance.
 * @param nbAttempt Count number of this attempt.
 * @param p Path of the file to recover.
 * @param startWaiting Timestamp of when we started attempting to recover the file lease.
 * @return True if dfs#recoverLease came by true.
 * @throws java.io.FileNotFoundException
 */
boolean recoverLease(final DistributedFileSystem dfs, final int nbAttempt, final Path p,
                     final long startWaiting)
  throws FileNotFoundException {
  boolean recovered = false;
  try {
    recovered = dfs.recoverLease(p);
    LOG.info("recoverLease=" + recovered + ", " +
               getLogMessageDetail(nbAttempt, p, startWaiting));
  } catch (IOException e) {
    if (e instanceof LeaseExpiredException && e.getMessage().contains("File does not exist")) {
      // This exception comes out instead of FNFE, fix it
      throw new FileNotFoundException("The given file wasn't found at " + p);
    } else if (e instanceof FileNotFoundException) {
      throw (FileNotFoundException) e;
    }
    LOG.warn(getLogMessageDetail(nbAttempt, p, startWaiting), e);
  }
  return recovered;
}
 
Example 5
Source File: FSHDFSUtils.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Try to recover the lease.
 * @return True if dfs#recoverLease came by true.
 */
static boolean recoverLease(final DistributedFileSystem dfs, final int nbAttempt, final Path p, final long startWaiting)
  throws FileNotFoundException {
  boolean recovered = false;
  try {
    recovered = dfs.recoverLease(p);
    if (log.isInfoEnabled()) {
      log.info("recoverLease={}, {}", recovered, getLogMessageDetail(nbAttempt, p, startWaiting));
    }
  } catch (IOException e) {
    if (e.getMessage().contains("File does not exist")) {
      // This exception comes out instead of FNFE, fix it
      throw new FileNotFoundException("The given transactionlog file wasn't found at " + p);
    } else if (e instanceof FileNotFoundException) {
      throw (FileNotFoundException)e;
    }
    log.warn(getLogMessageDetail(nbAttempt, p, startWaiting), e);
  }
  return recovered;
}
 
Example 6
Source File: FSUtils.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * When a file was opened and the task died without closing the stream, another task executor cannot open because the
 * existing lease will be active. We will try to recover the lease, from HDFS. If a data node went down, it takes
 * about 10 minutes for the lease to be rocovered. But if the client dies, this should be instant.
 */
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
    throws IOException, InterruptedException {
  LOG.info("Recover lease on dfs file " + p);
  // initiate the recovery
  boolean recovered = false;
  for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
    LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
    recovered = dfs.recoverLease(p);
    if (recovered) {
      break;
    }
    // Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover
    // under default settings
    Thread.sleep(1000);
  }
  return recovered;
}
 
Example 7
Source File: HiveProducer.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void closeFout() {
    if (fout != null) {
        try {
            logger.debug("Flush output stream {}.", curPartitionContentPath);
            fout.close();
        } catch (Exception e) {
            logger.error("Close the path: " + curPartitionContentPath + " failed", e);
            if (fs instanceof DistributedFileSystem) {
                DistributedFileSystem hdfs = (DistributedFileSystem) fs;
                try {
                    boolean recovered = hdfs.recoverLease(curPartitionContentPath);
                } catch (Exception e1) {
                    logger.error("Recover lease for path: " + curPartitionContentPath + " failed", e1);
                }
            }
        }
    }
    fout = null;
}
 
Example 8
Source File: HadoopRecoverableFsDataOutputStream.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Called when resuming execution after a failure and waits until the lease
 * of the file we are resuming is free.
 *
 * <p>The lease of the file we are resuming writing/committing to may still
 * belong to the process that failed previously and whose state we are
 * recovering.
 *
 * @param path The path to the file we want to resume writing to.
 */
private static boolean waitUntilLeaseIsRevoked(final FileSystem fs, final Path path) throws IOException {
	Preconditions.checkState(fs instanceof DistributedFileSystem);

	final DistributedFileSystem dfs = (DistributedFileSystem) fs;
	dfs.recoverLease(path);

	final Deadline deadline = Deadline.now().plus(Duration.ofMillis(LEASE_TIMEOUT));

	boolean isClosed = dfs.isFileClosed(path);
	while (!isClosed && deadline.hasTimeLeft()) {
		try {
			Thread.sleep(500L);
		} catch (InterruptedException e1) {
			throw new IOException("Recovering the lease failed: ", e1);
		}
		isClosed = dfs.isFileClosed(path);
	}
	return isClosed;
}
 
Example 9
Source File: RecoverLeaseFSUtils.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Try to recover the lease.
 * @return True if dfs#recoverLease came by true.
 */
private static boolean recoverLease(final DistributedFileSystem dfs, final int nbAttempt,
  final Path p, final long startWaiting) throws FileNotFoundException {
  boolean recovered = false;
  try {
    recovered = dfs.recoverLease(p);
    LOG.info((recovered ? "Recovered lease, " : "Failed to recover lease, ") +
      getLogMessageDetail(nbAttempt, p, startWaiting));
  } catch (IOException e) {
    if (e instanceof LeaseExpiredException && e.getMessage().contains("File does not exist")) {
      // This exception comes out instead of FNFE, fix it
      throw new FileNotFoundException("The given WAL wasn't found at " + p);
    } else if (e instanceof FileNotFoundException) {
      throw (FileNotFoundException) e;
    }
    LOG.warn(getLogMessageDetail(nbAttempt, p, startWaiting), e);
  }
  return recovered;
}
 
Example 10
Source File: TestHASafeMode.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test NN crash and client crash/stuck immediately after block allocation */
@Test(timeout = 100000)
public void testOpenFileWhenNNAndClientCrashAfterAddBlock() throws Exception {
  cluster.getConfiguration(0).set(
      DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "1.0f");
  String testData = "testData";
  // to make sure we write the full block before creating dummy block at NN.
  cluster.getConfiguration(0).setInt("io.bytes.per.checksum",
      testData.length());
  cluster.restartNameNode(0);
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    cluster.transitionToStandby(1);
    DistributedFileSystem dfs = cluster.getFileSystem(0);
    String pathString = "/tmp1.txt";
    Path filePath = new Path(pathString);
    FSDataOutputStream create = dfs.create(filePath,
        FsPermission.getDefault(), true, 1024, (short) 3, testData.length(),
        null);
    create.write(testData.getBytes());
    create.hflush();
    long fileId = ((DFSOutputStream)create.
        getWrappedStream()).getFileId();
    FileStatus fileStatus = dfs.getFileStatus(filePath);
    DFSClient client = DFSClientAdapter.getClient(dfs);
    // add one dummy block at NN, but not write to DataNode
    ExtendedBlock previousBlock =
        DFSClientAdapter.getPreviousBlock(client, fileId);
    DFSClientAdapter.getNamenode(client).addBlock(
        pathString,
        client.getClientName(),
        new ExtendedBlock(previousBlock),
        new DatanodeInfo[0],
        DFSClientAdapter.getFileId((DFSOutputStream) create
            .getWrappedStream()), null);
    cluster.restartNameNode(0, true);
    cluster.restartDataNode(0);
    cluster.transitionToActive(0);
    // let the block reports be processed.
    Thread.sleep(2000);
    FSDataInputStream is = dfs.open(filePath);
    is.close();
    dfs.recoverLease(filePath);// initiate recovery
    assertTrue("Recovery also should be success", dfs.recoverLease(filePath));
  } finally {
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestHASafeMode.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Test NN crash and client crash/stuck immediately after block allocation */
@Test(timeout = 100000)
public void testOpenFileWhenNNAndClientCrashAfterAddBlock() throws Exception {
  cluster.getConfiguration(0).set(
      DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "1.0f");
  String testData = "testData";
  // to make sure we write the full block before creating dummy block at NN.
  cluster.getConfiguration(0).setInt("io.bytes.per.checksum",
      testData.length());
  cluster.restartNameNode(0);
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    cluster.transitionToStandby(1);
    DistributedFileSystem dfs = cluster.getFileSystem(0);
    String pathString = "/tmp1.txt";
    Path filePath = new Path(pathString);
    FSDataOutputStream create = dfs.create(filePath,
        FsPermission.getDefault(), true, 1024, (short) 3, testData.length(),
        null);
    create.write(testData.getBytes());
    create.hflush();
    long fileId = ((DFSOutputStream)create.
        getWrappedStream()).getFileId();
    FileStatus fileStatus = dfs.getFileStatus(filePath);
    DFSClient client = DFSClientAdapter.getClient(dfs);
    // add one dummy block at NN, but not write to DataNode
    ExtendedBlock previousBlock =
        DFSClientAdapter.getPreviousBlock(client, fileId);
    DFSClientAdapter.getNamenode(client).addBlock(
        pathString,
        client.getClientName(),
        new ExtendedBlock(previousBlock),
        new DatanodeInfo[0],
        DFSClientAdapter.getFileId((DFSOutputStream) create
            .getWrappedStream()), null);
    cluster.restartNameNode(0, true);
    cluster.restartDataNode(0);
    cluster.transitionToActive(0);
    // let the block reports be processed.
    Thread.sleep(2000);
    FSDataInputStream is = dfs.open(filePath);
    is.close();
    dfs.recoverLease(filePath);// initiate recovery
    assertTrue("Recovery also should be success", dfs.recoverLease(filePath));
  } finally {
    cluster.shutdown();
  }
}