Java Code Examples for org.apache.hadoop.hdfs.DFSUtil#getCorruptFiles()

The following examples show how to use org.apache.hadoop.hdfs.DFSUtil#getCorruptFiles() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LocalBlockIntegrityMonitor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * @return A list of corrupt files as obtained from the namenode
 */
List<String> getCorruptFiles() throws IOException {
  DistributedFileSystem dfs = helper.getDFS(new Path("/"));

  String[] files = DFSUtil.getCorruptFiles(dfs);
  List<String> corruptFiles = new LinkedList<String>();
  for (String f: files) {
    corruptFiles.add(f);
  }
  RaidUtils.filterTrash(getConf(), corruptFiles);
  return corruptFiles;
}
 
Example 2
Source File: TestRaidShellFsck.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * sleeps for up to 20s until the number of corrupt files 
 * in the file system is equal to the number specified
 */
static public void waitUntilCorruptFileCount(DistributedFileSystem dfs,
                                       int corruptFiles)
  throws IOException, InterruptedException {
  long waitStart = System.currentTimeMillis();
  while (DFSUtil.getCorruptFiles(dfs).length != corruptFiles &&
      System.currentTimeMillis() < waitStart + 20000L) {
    Thread.sleep(1000);
  }
  assertEquals("expected " + corruptFiles + " corrupt files", 
      corruptFiles, DFSUtil.getCorruptFiles(dfs).length);
}
 
Example 3
Source File: TestRaidShellFsck_CorruptCounter.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * sleeps for up to 20s until the number of corrupt files 
 * in the file system is equal to the number specified
 */
private void waitUntilCorruptFileCount(DistributedFileSystem dfs,
                                       int corruptFiles)
  throws IOException {
  int initialCorruptFiles = DFSUtil.getCorruptFiles(dfs).length;
  long waitStart = System.currentTimeMillis();
  while (DFSUtil.getCorruptFiles(dfs).length != corruptFiles) {
    try {
      Thread.sleep(1000);
    } catch (InterruptedException ignore) {
      
    }

    if (System.currentTimeMillis() > waitStart + 20000L) {
      break;
    }
  }
  
  long waited = System.currentTimeMillis() - waitStart;

  int corruptFilesFound = DFSUtil.getCorruptFiles(dfs).length;
  if (corruptFilesFound != corruptFiles) {
    throw new IOException("expected " + corruptFiles + 
                          " corrupt files but got " +
                          corruptFilesFound);
  }
}
 
Example 4
Source File: FileFixer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * @return A list of corrupt files as obtained from the namenode
 * If the namenode is down, then return an empty list.
 */
List<Path> getCorruptFilesFromNamenode(FileSystem fs) throws IOException {
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Only DistributedFileSystem can be handled " +
                          " by HighTide.");
  }

  DistributedFileSystem dfs = (DistributedFileSystem) fs;
  List<Path> corruptFiles = new LinkedList<Path>();

  try {
    LOG.info("Checking filesystem: " + dfs.getUri());
    String[] files = 
      DFSUtil.getCorruptFiles(dfs);
    for (String f: files) {
      Path p = new Path(f).makeQualified(fs);
      corruptFiles.add(p);
    }
    return corruptFiles;
  } catch (Exception e) {
    // if a single namenode is down, log it and ignore. Continue to
    // fix other namenodes.
    LOG.warn("getCorruptFilesFromNamenode: Unable to contact filesystem: " + fs.getUri() +
             " ignoring..." + e);
    e.printStackTrace();
    return corruptFiles;
  }
}
 
Example 5
Source File: TestFileCorruption.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * check if listCorruptFileBlocks() returns the right number of
 * corrupt files if there are two corrupt files with the same name
 * in different directories
 */
public void test2CorruptFilesWithSameName() throws Exception {
  MiniDFSCluster cluster = null;
  Random random = new Random();
  
  try {
    Configuration conf = new Configuration();
    // datanode scans directories
    conf.setInt("dfs.datanode.directoryscan.interval", 1);
    // datanode sends block reports 
    conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000);
    conf.setBoolean("dfs.permissions", false);
    cluster = new MiniDFSCluster(conf, 1, true, null);
    FileSystem fs = cluster.getFileSystem();
    
    assertTrue("fs is not a DFS", fs instanceof DistributedFileSystem);
    DistributedFileSystem dfs = (DistributedFileSystem) fs;

    Path file1 = new Path("/srcdat12/test2file.test");
    Path file2 = new Path("/srcdat13/test2file.test");
    // create two files with the same name
    DFSTestUtil.createFile(fs, file1, 1L, (short)1, 1L);
    DFSTestUtil.createFile(fs, file2, 1L, (short)1, 1L);

    // fetch bad file list from namenode. There should be none.
    ClientProtocol namenode = DFSClient.createNamenode(conf);
    String[] badFiles = DFSUtil.getCorruptFiles(dfs);
    assertTrue("Namenode has " + badFiles.length +
               " corrupt files. Expecting None.",
        badFiles.length == 0);

    // Now deliberately corrupt one block in each file
    Path[] files = {file1, file2};
    for (Path file: files) {
      LocatedBlocks fileBlocks = 
        namenode.getBlockLocations(file.toString(), 0, 1L);
      LocatedBlock block = fileBlocks.get(0);
      File data_dir = 
        new File(TEST_ROOT_DIR, "dfs/data/");
      File dir1 = cluster.getBlockDirectory("data"+(2 * 0 + 1));
      File dir2 = cluster.getBlockDirectory("data"+(2 * 0 + 2));
      if (!(dir1.isDirectory() && dir2.isDirectory())) {
        throw new IOException("data directories not found for data node 0: " +
                              dir1.toString() + " " + dir2.toString());
      }

      File[] dirs = new File[2];
      dirs[0] = dir1; 
      dirs[1] = dir2;
      for (File dir: dirs) {
        File[] blockFiles = dir.listFiles();
        if ((blockFiles == null) || (blockFiles.length == 0)) {
          throw 
            new IOException("no blocks found in data node's data directory");
        }

        for (File blockFile: blockFiles) {
          if ((blockFile.getName().
               startsWith("blk_" + block.getBlock().getBlockId())) &&
              (!blockFile.getName().endsWith(".meta"))) {
            blockFile.delete();
          }
        }
      }
      LocatedBlock[] toReport = { block };
      namenode.reportBadBlocks(toReport);
    }

    // fetch bad file list from namenode. There should be 2.
    badFiles = DFSUtil.getCorruptFiles(dfs);
    assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 2.",
        badFiles.length == 2);
  } finally {
    if (cluster != null) {
      cluster.shutdown(); 
    }
  }
}
 
Example 6
Source File: TestDirectoryBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Create a file with three stripes, corrupt a block each in two stripes,
 * and wait for the the file to be fixed.
 */
private void implDirBlockFix(boolean local) throws Exception {
  LOG.info("Test testDirBlockFix started.");
  int stripeLength = 3;
  mySetup(stripeLength);
  long[] crcs = new long[3];
  int[] seeds = new int[3];
  Path dirPath = new Path("/user/dhruba/raidtestrs");
  Path[] files = TestRaidDfs.createTestFiles(dirPath,
      fileSizes, blockSizes, crcs, seeds, fileSys, (short)1);
  Path destPath = new Path("/destraidrs/user/dhruba");
  LOG.info("Test testDirBlockFix created test files");
  Configuration localConf = this.getRaidNodeConfig(conf, local);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath);
    cnode.stop(); cnode.join();
    
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());
    this.corruptFiles(dirPath, crcs, rsCorruptFileIdx1, dfs, files, 
        rsNumCorruptBlocksInFiles1);
    cnode = RaidNode.createRaidNode(null, localConf);
    long start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 3 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test testDirBlockFix waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("file not fixed", 3,
        cnode.blockIntegrityMonitor.getNumFilesFixed());
    
    dfs = getDFS(conf, dfs);
    for (int i = 0; i < fileSizes.length; i++) {
      assertTrue("file " + files[i] + " not fixed",
          TestRaidDfs.validateFile(dfs, files[i], fileSizes[i],
            crcs[i]));
    }
  } catch (Exception e) {
    LOG.info("Test testDirBlockFix Exception " + e, e);
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test testDirBlockFix completed.");
}
 
Example 7
Source File: TestDirectoryBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Tests integrity of generated block.
 * Create a file and delete a block entirely. Wait for the block to be
 * regenerated. Now stop RaidNode and corrupt the generated block.
 * Test that corruption in the generated block can be detected by clients.
 */
private void generatedBlockTestCommon(String testName, int blockToCorrupt,
                                      boolean local) throws Exception {
  LOG.info("Test " + testName + " started.");
  int stripeLength = 3;
  mySetup(stripeLength);
  long[] crcs = new long[3];
  int[] seeds = new int[3];
  Path dirPath = new Path("/user/dhruba/raidtest");
  Path[] files = TestRaidDfs.createTestFiles(dirPath,
      fileSizes, blockSizes, crcs, seeds, fileSys, (short)1);
  Path destPath = new Path("/destraid/user/dhruba");
  LOG.info("Test " + testName + " created test files");
  Configuration localConf = this.getRaidNodeConfig(conf, local);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath);
    cnode.stop(); cnode.join();
    
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());
    
    Integer[] corruptBlockIdxs = new Integer[]{blockToCorrupt};
    TestDirectoryRaidDfs.corruptBlocksInDirectory(conf, dirPath,
        crcs, corruptBlockIdxs, fileSys, dfsCluster, false, true);
    
    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("files not corrupted", corruptBlockIdxs.length,
        corruptFiles.length);
    int corruptFileIdx = -1;
    for (int i = 0; i < files.length; i++) {
      if (files[i].toUri().getPath().equals(corruptFiles[0])) {
        corruptFileIdx = i;
        break;
      }
    }
    assertNotSame("Wrong corrupt file", -1, corruptFileIdx);
    cnode = RaidNode.createRaidNode(null, localConf);
    long start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test testDirBlockFix waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("file not fixed", 1,
        cnode.blockIntegrityMonitor.getNumFilesFixed());
    // Stop RaidNode
    cnode.stop(); cnode.join(); cnode = null;

    // The block has successfully been reconstructed.
    dfs = getDFS(conf, dfs);
    assertTrue("file not fixed",
               TestRaidDfs.validateFile(dfs, files[corruptFileIdx], 
                   fileSizes[corruptFileIdx], crcs[corruptFileIdx]));

    // Now corrupt the generated block.
    TestDirectoryRaidDfs.corruptBlocksInDirectory(conf, dirPath,
        crcs, corruptBlockIdxs, dfs, dfsCluster, false, false);
    try {
      TestRaidDfs.validateFile(dfs, files[corruptFileIdx], 
          fileSizes[corruptFileIdx], crcs[corruptFileIdx]);
      fail("Expected exception not thrown");
    } catch (org.apache.hadoop.fs.ChecksumException ce) {
    } catch (org.apache.hadoop.fs.BlockMissingException bme) {
    }
  } catch (Exception e) {
    LOG.info("Test " + testName + " Exception " + e, e);
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test " + testName + " completed.");
}
 
Example 8
Source File: TestDirectoryBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Corrupt a parity file and wait for it to get fixed.
 */
private void implParityBlockFix(String testName, boolean local)
  throws Exception {
  LOG.info("Test " + testName + " started.");
  int stripeLength = 3;
  mySetup(stripeLength); 
  long[] crcs = new long[3];
  int[] seeds = new int[3];
  Path dirPath = new Path("/user/dhruba/raidtest");
  Path[] files = TestRaidDfs.createTestFiles(dirPath,
      fileSizes, blockSizes, crcs, seeds, fileSys, (short)1);
  Path destPath = new Path("/destraid/user/dhruba");
  Path parityFile = new Path("/destraid/user/dhruba/raidtest");
  LOG.info("Test " + testName + " created test files");
  Configuration localConf = this.getRaidNodeConfig(conf, local);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath, destPath);
    cnode.stop(); cnode.join();

    long parityCRC = RaidDFSUtil.getCRC(fileSys, parityFile);

    FileStatus parityStat = fileSys.getFileStatus(parityFile);
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
      dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());

    // Corrupt parity blocks for different stripes.
    int[] corruptBlockIdxs = new int[]{0, 1, 2};
    for (int idx: corruptBlockIdxs)
      corruptBlock(locs.get(idx).getBlock().getBlockName(), dfsCluster);
    RaidDFSUtil.reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs,
        2*blockSize);

    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("file not corrupted",
                 1, corruptFiles.length);
    assertEquals("wrong file corrupted",
                 corruptFiles[0], parityFile.toUri().getPath());

    cnode = RaidNode.createRaidNode(null, localConf);
    long start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test " + testName + " waiting for files to be fixed.");
      Thread.sleep(3000);
    }
    assertEquals("file not fixed",
                 1, cnode.blockIntegrityMonitor.getNumFilesFixed());

    long checkCRC = RaidDFSUtil.getCRC(fileSys, parityFile);

    assertEquals("file not fixed",
                 parityCRC, checkCRC);

  } catch (Exception e) {
    LOG.info("Test " + testName + " Exception " + e +
             StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test " + testName + " completed.");
}
 
Example 9
Source File: TestDirectoryBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * tests that we can have 2 concurrent jobs fixing files 
 * (dist block fixer)
 */
@Test
public void testConcurrentJobs() throws Exception {
  LOG.info("Test testConcurrentJobs started.");
  int stripeLength = 3;
  mySetup(stripeLength); 
  long[] crcs1 = new long[3];
  int[] seeds1 = new int[3];
  long[] crcs2 = new long[3];
  int[] seeds2 = new int[3];
  Path dirPath1 = new Path("/user/dhruba/raidtestrs/1");
  Path[] files1 = TestRaidDfs.createTestFiles(dirPath1,
      fileSizes, blockSizes, crcs1, seeds1, fileSys, (short)1);
  Path dirPath2 = new Path("/user/dhruba/raidtestrs/2");
  Path[] files2 = TestRaidDfs.createTestFiles(dirPath2,
      fileSizes, blockSizes, crcs2, seeds2, fileSys, (short)1);
  Path destPath = new Path("/destraidrs/user/dhruba/raidtestrs");
  
  LOG.info("Test testConcurrentJobs created test files");
  Configuration localConf = this.getRaidNodeConfig(conf, false);
  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath1, destPath);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath2, destPath);
    cnode.stop(); cnode.join();
    
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());
    //corrupt directory 1
    this.corruptFiles(dirPath1, crcs1, rsCorruptFileIdx1, dfs, files1, 
        rsNumCorruptBlocksInFiles1);

    cnode = RaidNode.createRaidNode(null, localConf);
    DistBlockIntegrityMonitor blockFixer =
        (DistBlockIntegrityMonitor) cnode.blockIntegrityMonitor;
    long start = System.currentTimeMillis();

    // All files are HIGH-PRI corrupt files
    while (blockFixer.jobsRunning() < 1 &&
           System.currentTimeMillis() - start < 60000) {
      LOG.info("Test testDirBlockFix waiting for fixing job 1 to start");
      Thread.sleep(1000);
    }
    assertEquals("job 1 not running", 1, blockFixer.jobsRunning());
    
    //Corrupt directory 2
    this.corruptFiles(dirPath2, crcs2, rsCorruptFileIdx2, dfs, files2,
        rsNumCorruptBlocksInFiles2);
    
    // 1 LOW-PRI file and 2 HIGH-PRI files 
    while (blockFixer.jobsRunning() < 3 &&
           System.currentTimeMillis() - start < 60000) {
      LOG.info("Test testDirBlockFix waiting for fixing job 2 and 3 to start");
      Thread.sleep(1000);
    }
    assertEquals("3 jobs are running", 3, blockFixer.jobsRunning());

    while (blockFixer.getNumFilesFixed() < 6 &&
           System.currentTimeMillis() - start < 240000) {
      LOG.info("Test testDirBlockFix waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("files not fixed", 6, blockFixer.getNumFilesFixed());
    dfs = getDFS(conf, dfs);
    for (int i = 0; i < fileSizes.length; i++) {
      assertTrue("file " + files1[i] + " not fixed",
               TestRaidDfs.validateFile(dfs, files1[i], fileSizes[i], crcs1[i]));
    }
    for (int i = 0; i < fileSizes.length; i++) {
      assertTrue("file " + files2[i] + " not fixed",
               TestRaidDfs.validateFile(dfs, files2[i], fileSizes[i], crcs2[i]));
    }
  } catch (Exception e) {
    LOG.info("Test testConcurrentJobs exception " + e, e);
    throw e;
  } finally {
    myTearDown();
  }
}
 
Example 10
Source File: TestDirectoryBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * tests that the distributed block fixer obeys
 * the limit on how many jobs to submit simultaneously.
 */
@Test
public void testMaxPendingJobs() throws Exception {
  LOG.info("Test testMaxPendingJobs started.");
  int stripeLength = 3;
  mySetup(stripeLength); 
  long[] crcs1 = new long[3];
  int[] seeds1 = new int[3];
  long[] crcs2 = new long[3];
  int[] seeds2 = new int[3];
  Path dirPath1 = new Path("/user/dhruba/raidtestrs/1");
  Path[] files1 = TestRaidDfs.createTestFiles(dirPath1,
      fileSizes, blockSizes, crcs1, seeds1, fileSys, (short)1);
  Path dirPath2 = new Path("/user/dhruba/raidtestrs/2");
  Path[] files2 = TestRaidDfs.createTestFiles(dirPath2,
      fileSizes, blockSizes, crcs2, seeds2, fileSys, (short)1);
  Path destPath = new Path("/destraidrs/user/dhruba/raidtestrs");
  LOG.info("Test testMaxPendingJobs created test files");
  Configuration localConf = this.getRaidNodeConfig(conf, false);
  localConf.setLong("raid.blockfix.maxpendingjobs", 1L);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath1, destPath);
    TestRaidDfs.waitForDirRaided(LOG, fileSys, dirPath2, destPath);
    cnode.stop(); cnode.join();

    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());
    //corrupt directory 1
    this.corruptFiles(dirPath1, crcs1, rsCorruptFileIdx1, dfs, files1, 
        rsNumCorruptBlocksInFiles1);

    cnode = RaidNode.createRaidNode(null, localConf);
    DistBlockIntegrityMonitor blockFixer = (DistBlockIntegrityMonitor) cnode.blockIntegrityMonitor;
    long start = System.currentTimeMillis();

    while (blockFixer.jobsRunning() < 1 &&
           System.currentTimeMillis() - start < 60000) {
      LOG.info("Test testDirBlockFix waiting for fixing job 1 to start");
      Thread.sleep(1000);
    }
    assertEquals("job not running", 1, blockFixer.jobsRunning());

    //corrupt directory 2
    this.corruptFiles(dirPath2, crcs2, rsCorruptFileIdx2, dfs, files2,
        rsNumCorruptBlocksInFiles2);
    
    // wait until both files are fixed
    while (blockFixer.getNumFilesFixed() < 6 &&
           System.currentTimeMillis() - start < 120000) {
      // make sure the block fixer does not start a second job while
      // the first one is still running
      assertTrue("too many jobs running", blockFixer.jobsRunning() <= 1);
      Thread.sleep(1000);
    }
    assertEquals("files not fixed", 6, blockFixer.getNumFilesFixed());
    dfs = getDFS(conf, dfs);
    for (int i = 0; i < fileSizes.length; i++) {
      assertTrue("file " + files1[i] + " not fixed",
               TestRaidDfs.validateFile(dfs, files1[i], fileSizes[i], crcs1[i]));
    }
    for (int i = 0; i < fileSizes.length; i++) {
      assertTrue("file " + files2[i] + " not fixed",
               TestRaidDfs.validateFile(dfs, files2[i], fileSizes[i], crcs2[i]));
    }
  } catch (Exception e) {
    LOG.info("Test testMaxPendingJobs exception " + e +
             StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
}
 
Example 11
Source File: TestBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Create a file with three stripes, corrupt a block each in two stripes,
 * and wait for the the file to be fixed.
 */
private void implBlockFix(boolean local) throws Exception {
  LOG.info("Test testBlockFix started.");
  long blockSize = 8192L;
  int stripeLength = 3;
  mySetup(stripeLength, -1); // never har
  Path file1 = new Path("/user/dhruba/raidtest/file1");
  Path destPath = new Path("/destraid/user/dhruba/raidtest");
  long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
                                                        1, 7, blockSize);
  long file1Len = fileSys.getFileStatus(file1).getLen();
  LOG.info("Test testBlockFix created test files");

  // create an instance of the RaidNode
  Configuration localConf = new Configuration(conf);
  localConf.setInt("raid.blockfix.interval", 1000);
  if (local) {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
  } else {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.DistBlockIntegrityMonitor");
  }
  localConf.setLong("raid.blockfix.filespertask", 2L);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
    cnode.stop(); cnode.join();
    
    FileStatus srcStat = fileSys.getFileStatus(file1);
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
      dfs, file1.toUri().getPath(), 0, srcStat.getLen());

    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());
    
    // Corrupt blocks in two different stripes. We can fix them.
    int[] corruptBlockIdxs = new int[]{0, 4, 6};
    for (int idx: corruptBlockIdxs)
      corruptBlock(locs.get(idx).getBlock().getBlockName(), dfsCluster);
    RaidDFSUtil.reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
    
    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("file not corrupted", 1, corruptFiles.length);
    assertEquals("wrong file corrupted",
                 corruptFiles[0], file1.toUri().getPath());
    assertEquals("wrong number of corrupt blocks", 3,
      RaidDFSUtil.corruptBlocksInFile(dfs, file1.toUri().getPath(), 0,
        srcStat.getLen()).size());

    cnode = RaidNode.createRaidNode(null, localConf);
    long start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test testBlockFix waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("file not fixed", 1, cnode.blockIntegrityMonitor.getNumFilesFixed());
    
    dfs = getDFS(conf, dfs);
    assertTrue("file not fixed",
               TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));

  } catch (Exception e) {
    LOG.info("Test testBlockFix Exception " + e +
             StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test testBlockFix completed.");
}
 
Example 12
Source File: TestBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Tests integrity of generated block.
 * Create a file and delete a block entirely. Wait for the block to be
 * regenerated. Now stop RaidNode and corrupt the generated block.
 * Test that corruption in the generated block can be detected by clients.
 */
private void generatedBlockTestCommon(String testName, int blockToCorrupt,
                                      boolean local) throws Exception {
  LOG.info("Test " + testName + " started.");
  long blockSize = 8192L;
  int stripeLength = 3;
  mySetup(stripeLength, -1); // never har
  Path file1 = new Path("/user/dhruba/raidtest/file1");
  Path destPath = new Path("/destraid/user/dhruba/raidtest");
  long crc1 = TestRaidDfs.createTestFile(fileSys, file1, 1, 7, blockSize);
  long file1Len = fileSys.getFileStatus(file1).getLen();
  LOG.info("Test " + testName + " created test files");

  // create an instance of the RaidNode
  Configuration localConf = new Configuration(conf);
  localConf.setInt("raid.blockfix.interval", 1000);
  if (local) {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
  } else {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.DistBlockIntegrityMonitor");
  }
  localConf.setLong("raid.blockfix.filespertask", 2L);
  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
    cnode.stop(); cnode.join();
    
    FileStatus srcStat = fileSys.getFileStatus(file1);
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
      dfs, file1.toUri().getPath(), 0, srcStat.getLen());

    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());
    
    corruptBlock(locs.get(0).getBlock().getBlockName(), dfsCluster);
    RaidDFSUtil.reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
    
    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("file not corrupted",
                 1, corruptFiles.length);
    assertEquals("wrong file corrupted",
                 corruptFiles[0], file1.toUri().getPath());
    
    cnode = RaidNode.createRaidNode(null, localConf);
    long start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test " + testName + " waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("file not fixed",
                 1, cnode.blockIntegrityMonitor.getNumFilesFixed());
    
    // Stop RaidNode
    cnode.stop(); cnode.join(); cnode = null;

    // The block has successfully been reconstructed.
    dfs = getDFS(conf, dfs);
    assertTrue("file not fixed",
               TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));

    // Now corrupt the generated block.
    locs = RaidDFSUtil.getBlockLocations(
      dfs, file1.toUri().getPath(), 0, srcStat.getLen());
    corruptBlock(locs.get(0).getBlock().getBlockName(), dfsCluster);
    RaidDFSUtil.reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);

    try {
      Thread.sleep(5*1000);
    } catch (InterruptedException ignore) {
    }
    try {
      TestRaidDfs.validateFile(dfs, file1, file1Len, crc1);
      fail("Expected exception not thrown");
    } catch (org.apache.hadoop.fs.ChecksumException ce) {
    } catch (org.apache.hadoop.fs.BlockMissingException bme) {
    }
  } catch (Exception e) {
    LOG.info("Test " + testName + " Exception " + e +
             StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test " + testName + " completed.");
}
 
Example 13
Source File: TestBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Corrupt a parity file and wait for it to get fixed.
 */
private void implParityBlockFix(String testName, boolean local)
  throws Exception {
  LOG.info("Test " + testName + " started.");
  long blockSize = 8192L;
  int stripeLength = 3;
  mySetup(stripeLength, -1); // never har
  Path file1 = new Path("/user/dhruba/raidtest/file1");
  Path destPath = new Path("/destraid/user/dhruba/raidtest");
  Path parityFile = new Path("/destraid/user/dhruba/raidtest/file1");
  TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
                                                        1, 7, blockSize);
  long file1Len = fileSys.getFileStatus(file1).getLen();
  LOG.info("Test " + testName + " created test files");

  // create an instance of the RaidNode
  Configuration localConf = new Configuration(conf);
  localConf.setInt("raid.blockfix.interval", 1000);
  if (local) {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
  } else {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.DistBlockIntegrityMonitor");
  }
  localConf.setLong("raid.blockfix.filespertask", 2L);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
    cnode.stop(); cnode.join();

    long parityCRC = RaidDFSUtil.getCRC(fileSys, parityFile);

    FileStatus parityStat = fileSys.getFileStatus(parityFile);
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
      dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());

    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());

    // Corrupt parity blocks for different stripes.
    int[] corruptBlockIdxs = new int[]{0, 1, 2};
    for (int idx: corruptBlockIdxs)
      corruptBlock(locs.get(idx).getBlock().getBlockName(), dfsCluster);
    RaidDFSUtil.reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs, blockSize);

    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("file not corrupted",
                 1, corruptFiles.length);
    assertEquals("wrong file corrupted",
                 corruptFiles[0], parityFile.toUri().getPath());

    cnode = RaidNode.createRaidNode(null, localConf);
    long start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test " + testName + " waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("file not fixed",
                 1, cnode.blockIntegrityMonitor.getNumFilesFixed());

    long checkCRC = RaidDFSUtil.getCRC(fileSys, parityFile);

    assertEquals("file not fixed",
                 parityCRC, checkCRC);

  } catch (Exception e) {
    LOG.info("Test " + testName + " Exception " + e +
             StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test " + testName + " completed.");
}
 
Example 14
Source File: TestBlockFixer.java    From RDFS with Apache License 2.0 4 votes vote down vote up
private void implParityHarBlockFix(String testName, boolean local)
  throws Exception {
  LOG.info("Test " + testName + " started.");
  long blockSize = 8192L;
  int stripeLength = 3;
  mySetup(stripeLength, 0); // Time before har = 0 days.
  Path file1 = new Path("/user/dhruba/raidtest/file1");
  Path destPath = new Path("/destraid/user/dhruba/raidtest");
  // Parity file will have 7 blocks.
  TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
                                             1, 20, blockSize);
  long file1Len = fileSys.getFileStatus(file1).getLen();
  LOG.info("Test " + testName + " created test files");

  // create an instance of the RaidNode
  Configuration localConf = new Configuration(conf);
  localConf.setInt("raid.blockfix.interval", 1000);
  localConf.setInt(RaidNode.RAID_PARITY_HAR_THRESHOLD_DAYS_KEY, 0);
  if (local) {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
  } else {
    localConf.set("raid.blockfix.classname",
                  "org.apache.hadoop.raid.DistBlockIntegrityMonitor");
  }
  localConf.setLong("raid.blockfix.filespertask", 2L);

  try {
    cnode = RaidNode.createRaidNode(null, localConf);
    Path harDirectory =
      new Path("/destraid/user/dhruba/raidtest/raidtest" +
               RaidNode.HAR_SUFFIX);
    long start = System.currentTimeMillis();
    while (System.currentTimeMillis() - start < 1000 * 120) {
      if (fileSys.exists(harDirectory)) {
        break;
      }
      LOG.info("Test " + testName + " waiting for har");
      Thread.sleep(1000);
    }
    assertEquals(true, fileSys.exists(harDirectory));

    Path partFile = new Path(harDirectory, "part-0");
    long partCRC = RaidDFSUtil.getCRC(fileSys, partFile);
    FileStatus partStat = fileSys.getFileStatus(partFile);
    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
      dfs, partFile.toUri().getPath(), 0, partStat.getLen());
    assertEquals("wrong number of har blocks",
                 7, locs.getLocatedBlocks().size());
    cnode.stop(); cnode.join();

    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("no corrupt files expected", 0, corruptFiles.length);
    assertEquals("filesFixed() should return 0 before fixing files",
                 0, cnode.blockIntegrityMonitor.getNumFilesFixed());

    // Corrupt parity blocks for different stripes.
    int[] corruptBlockIdxs = new int[]{0, 1, 6};
    for (int idx: corruptBlockIdxs)
      corruptBlock(locs.get(idx).getBlock().getBlockName(), dfsCluster);
    RaidDFSUtil.reportCorruptBlocks(dfs, partFile, corruptBlockIdxs,
      partStat.getBlockSize());

    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals("file not corrupted", 1, corruptFiles.length);
    assertEquals("wrong file corrupted",
                 corruptFiles[0], partFile.toUri().getPath());

    cnode = RaidNode.createRaidNode(null, localConf);
    start = System.currentTimeMillis();
    while (cnode.blockIntegrityMonitor.getNumFilesFixed() < 1 &&
           System.currentTimeMillis() - start < 120000) {
      LOG.info("Test " + testName + " waiting for files to be fixed.");
      Thread.sleep(1000);
    }
    assertEquals("file not fixed",
                 1, cnode.blockIntegrityMonitor.getNumFilesFixed());

    long checkCRC = RaidDFSUtil.getCRC(fileSys, partFile);

    assertEquals("file not fixed",
                 partCRC, checkCRC);
  } catch (Exception e) {
    LOG.info("Test " + testName + " Exception " + e +
             StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test " + testName + " completed.");
}
 
Example 15
Source File: TestRaidShell.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Create a file with three stripes, corrupt a block each in two stripes,
 * and wait for the the file to be fixed.
 */
public void testBlockFix() throws Exception {
  LOG.info("Test testBlockFix started.");
  long blockSize = 8192L;
  int stripeLength = 3;
  mySetup(stripeLength, -1);
  Path file1 = new Path(RAID_SRC_PATH, "file1");
  Path destPath = new Path("/raid"+RAID_SRC_PATH);
  long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
                                                        1, 7, blockSize);
  long file1Len = fileSys.getFileStatus(file1).getLen();
  LOG.info("Test testBlockFix created test files");

  // create an instance of the RaidNode
  Configuration localConf = new Configuration(conf);
  localConf.setInt("raid.blockfix.interval", 1000);
  localConf.set("raid.blockfix.classname",
                "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
  // the RaidNode does the raiding inline (instead of submitting to map/reduce)
  conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
  // use local block fixer
  conf.set("raid.blockfix.classname", 
           "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");

  cnode = RaidNode.createRaidNode(null, localConf);

  try {
    TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
    cnode.stop();
    cnode.join();
    cnode = null;

    FileStatus srcStat = fileSys.getFileStatus(file1);
    LocatedBlocks locations = getBlockLocations(file1, srcStat.getLen());

    DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
    ClientProtocol namenode = dfs.getClient().namenode;

    String[] corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals(corruptFiles.length, 0);

    // Corrupt blocks in two different stripes. We can fix them.
    TestRaidDfs.corruptBlock(file1, locations.get(0).getBlock(),
             NUM_DATANODES, true, dfsCluster); // delete block
    TestRaidDfs.corruptBlock(file1, locations.get(4).getBlock(),
             NUM_DATANODES, false, dfsCluster); // corrupt block
    TestRaidDfs.corruptBlock(file1, locations.get(6).getBlock(),
             NUM_DATANODES, true, dfsCluster); // delete last (partial) block
    LocatedBlock[] toReport = new LocatedBlock[3];
    toReport[0] = locations.get(0);
    toReport[1] = locations.get(4);
    toReport[2] = locations.get(6);
    namenode.reportBadBlocks(toReport);

    corruptFiles = DFSUtil.getCorruptFiles(dfs);
    assertEquals(corruptFiles.length, 1);
    assertEquals(corruptFiles[0], file1.toString());

    // Create RaidShell and fix the file.
    RaidShell shell = new RaidShell(conf);
    String[] args = new String[2];
    args[0] = "-recoverBlocks";
    args[1] = file1.toUri().getPath();
    ToolRunner.run(shell, args);

    long start = System.currentTimeMillis();
    do {
      LOG.info("Test testBlockFix waiting for files to be fixed.");
      Thread.sleep(1000);
      corruptFiles = DFSUtil.getCorruptFiles(dfs);
    } while (corruptFiles.length != 0 &&
           System.currentTimeMillis() - start < 120000);

    assertEquals(0, corruptFiles.length);

    dfs = getDFS(conf, dfs);
    assertTrue(TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));

  } catch (Exception e) {
    LOG.info("Test testBlockFix Exception " + e + StringUtils.stringifyException(e));
    throw e;
  } finally {
    myTearDown();
  }
  LOG.info("Test testBlockFix completed.");
}