Java Code Examples for org.apache.hadoop.fs.FileUtil#unTar()

The following examples show how to use org.apache.hadoop.fs.FileUtil#unTar() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OozieShareLibUtil.java    From hadoop-mini-clusters with Apache License 2.0 6 votes vote down vote up
public String extractOozieShareLibTarFileToTempDir(File fullOozieShareLibTarFilePath) throws IOException {
    File tempDir = File.createTempFile(SHARE_LIB_LOCAL_TEMP_PREFIX, "");
    tempDir.delete();
    tempDir.mkdir();
    tempDir.deleteOnExit();

    FileUtil.unTar(fullOozieShareLibTarFilePath, tempDir);

    // Remove spark to try to get the CP down.
    if (oozieShareLibFrameworks != null || !oozieShareLibFrameworks.isEmpty()) {
        for (Framework framework : Framework.values()) {
            if (!oozieShareLibFrameworks.contains(framework)) {
                LOG.info("OOZIE: Excluding framework " + framework.getValue() + " from shared lib.");
                File removeShareLibDir = new File(tempDir.getAbsolutePath() + "/share/lib/" + framework.getValue());
                if (removeShareLibDir.isDirectory()) {
                    FileUtils.deleteDirectory(removeShareLibDir);
                }
            }
        }
    }
    return tempDir.getAbsolutePath();
}
 
Example 2
Source File: TestFSImage.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * In this test case, I have created an image with a file having
 * preferredblockSize = 0. We are trying to read this image (since file with
 * preferredblockSize = 0 was allowed pre 2.1.0-beta version. The namenode 
 * after 2.6 version will not be able to read this particular file.
 * See HDFS-7788 for more information.
 * @throws Exception
 */
@Test
public void testZeroBlockSize() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  String tarFile = System.getProperty("test.cache.data", "build/test/cache")
    + "/" + HADOOP_2_7_ZER0_BLOCK_SIZE_TGZ;
  String testDir = PathUtils.getTestDirName(getClass());
  File dfsDir = new File(testDir, "image-with-zero-block-size");
  if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(testDir));
  File nameDir = new File(dfsDir, "name");
  GenericTestUtils.assertExists(nameDir);
  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, 
      nameDir.getAbsolutePath());
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(false)
      .manageDataDfsDirs(false)
      .manageNameDfsDirs(false)
      .waitSafeMode(false)
      .startupOption(StartupOption.UPGRADE)
      .build();
  try {
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/tmp/zeroBlockFile");
    assertTrue("File /tmp/zeroBlockFile doesn't exist ", fs.exists(testPath));
    assertTrue("Name node didn't come up", cluster.isNameNodeUp(0));
  } finally {
    cluster.shutdown();
    //Clean up
    FileUtil.fullyDelete(dfsDir);
  }
}
 
Example 3
Source File: TestFileAppendRestart.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Earlier versions of HDFS had a bug (HDFS-2991) which caused
 * append(), when called exactly at a block boundary,
 * to not log an OP_ADD. This ensures that we can read from
 * such buggy versions correctly, by loading an image created
 * using a namesystem image created with 0.23.1-rc2 exhibiting
 * the issue.
 */
@Test
public void testLoadLogsFromBuggyEarlierVersions() throws IOException {
  final Configuration conf = new HdfsConfiguration();

  String tarFile = System.getProperty("test.cache.data", "build/test/cache")
    + "/" + HADOOP_23_BROKEN_APPEND_TGZ;
  String testDir = PathUtils.getTestDirName(getClass());
  File dfsDir = new File(testDir, "image-with-buggy-append");
  if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(testDir));

  File nameDir = new File(dfsDir, "name");
  GenericTestUtils.assertExists(nameDir);

  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
    .format(false)
    .manageDataDfsDirs(false)
    .manageNameDfsDirs(false)
    .numDataNodes(0)
    .waitSafeMode(false)
    .startupOption(StartupOption.UPGRADE)
    .build();
  try {
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/tmp/io_data/test_io_0");
    assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen());
  } finally {
    cluster.shutdown();
  }
}
 
Example 4
Source File: TestFSImage.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * In this test case, I have created an image with a file having
 * preferredblockSize = 0. We are trying to read this image (since file with
 * preferredblockSize = 0 was allowed pre 2.1.0-beta version. The namenode 
 * after 2.6 version will not be able to read this particular file.
 * See HDFS-7788 for more information.
 * @throws Exception
 */
@Test
public void testZeroBlockSize() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  String tarFile = System.getProperty("test.cache.data", "build/test/cache")
    + "/" + HADOOP_2_7_ZER0_BLOCK_SIZE_TGZ;
  String testDir = PathUtils.getTestDirName(getClass());
  File dfsDir = new File(testDir, "image-with-zero-block-size");
  if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(testDir));
  File nameDir = new File(dfsDir, "name");
  GenericTestUtils.assertExists(nameDir);
  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, 
      nameDir.getAbsolutePath());
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(false)
      .manageDataDfsDirs(false)
      .manageNameDfsDirs(false)
      .waitSafeMode(false)
      .startupOption(StartupOption.UPGRADE)
      .build();
  try {
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/tmp/zeroBlockFile");
    assertTrue("File /tmp/zeroBlockFile doesn't exist ", fs.exists(testPath));
    assertTrue("Name node didn't come up", cluster.isNameNodeUp(0));
  } finally {
    cluster.shutdown();
    //Clean up
    FileUtil.fullyDelete(dfsDir);
  }
}
 
Example 5
Source File: TestFileAppendRestart.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Earlier versions of HDFS had a bug (HDFS-2991) which caused
 * append(), when called exactly at a block boundary,
 * to not log an OP_ADD. This ensures that we can read from
 * such buggy versions correctly, by loading an image created
 * using a namesystem image created with 0.23.1-rc2 exhibiting
 * the issue.
 */
@Test
public void testLoadLogsFromBuggyEarlierVersions() throws IOException {
  final Configuration conf = new HdfsConfiguration();

  String tarFile = System.getProperty("test.cache.data", "build/test/cache")
    + "/" + HADOOP_23_BROKEN_APPEND_TGZ;
  String testDir = PathUtils.getTestDirName(getClass());
  File dfsDir = new File(testDir, "image-with-buggy-append");
  if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(testDir));

  File nameDir = new File(dfsDir, "name");
  GenericTestUtils.assertExists(nameDir);

  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
    .format(false)
    .manageDataDfsDirs(false)
    .manageNameDfsDirs(false)
    .numDataNodes(0)
    .waitSafeMode(false)
    .startupOption(StartupOption.UPGRADE)
    .build();
  try {
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/tmp/io_data/test_io_0");
    assertEquals(2*1024*1024, fs.getFileStatus(testPath).getLen());
  } finally {
    cluster.shutdown();
  }
}
 
Example 6
Source File: OozieShareLibUtil.java    From hadoop-mini-clusters with Apache License 2.0 5 votes vote down vote up
public String extractOozieTarFileToTempDir(File fullOozieTarFilePath) throws IOException {
    File tempDir = File.createTempFile(SHARE_LIB_LOCAL_TEMP_PREFIX, "");
    tempDir.delete();
    tempDir.mkdir();
    tempDir.deleteOnExit();

    FileUtil.unTar(fullOozieTarFilePath, tempDir);

    return tempDir.getAbsolutePath();
}
 
Example 7
Source File: TestDFSUpgradeFromImage.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void unpackStorage() throws IOException {
  String tarFile = System.getProperty("test.cache.data", "build/test/cache") +
                   "/hadoop-26-dfs-dir.tgz";
  String dataDir = System.getProperty("test.build.data", "build/test/data");
  File dfsDir = new File(dataDir, "dfs");
  if ( dfsDir.exists() && !FileUtil.fullyDelete(dfsDir) ) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(dataDir));
  //Now read the reference info
  
  BufferedReader reader = new BufferedReader( 
                      new FileReader(System.getProperty("test.cache.data", "build/test/cache") +
                                     "/hadoop-dfs-dir.txt"));
  String line;
  while ( (line = reader.readLine()) != null ) {
    
    line = line.trim();
    if (line.length() <= 0 || line.startsWith("#")) {
      continue;
    }
    String[] arr = line.split("\\s+\t\\s+");
    if (arr.length < 1) {
      continue;
    }
    if (arr[0].equals("printChecksums")) {
      printChecksum = true;
      break;
    }
    if (arr.length < 2) {
      continue;
    }
    ReferenceFileInfo info = new ReferenceFileInfo();
    info.path = arr[0];
    info.checksum = Long.parseLong(arr[1]);
    refList.add(info);
  }
  reader.close();
}
 
Example 8
Source File: TestDFSUpgradeFromImage.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void unpackStorage() throws IOException {
  String tarFile = System.getProperty("test.cache.data", "build/test/cache") +
                   "/hadoop-14-dfs-dir.tgz";
  String dataDir = System.getProperty("test.build.data", "build/test/data");
  File dfsDir = new File(dataDir, "dfs");
  if ( dfsDir.exists() && !FileUtil.fullyDelete(dfsDir) ) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(dataDir));
  //Now read the reference info
  
  BufferedReader reader = new BufferedReader( 
                      new FileReader(System.getProperty("test.cache.data", "build/test/cache") +
                                     "/hadoop-dfs-dir.txt"));
  String line;
  while ( (line = reader.readLine()) != null ) {
    
    line = line.trim();
    if (line.length() <= 0 || line.startsWith("#")) {
      continue;
    }
    String[] arr = line.split("\\s+\t\\s+");
    if (arr.length < 1) {
      continue;
    }
    if (arr[0].equals("printChecksums")) {
      printChecksum = true;
      break;
    }
    if (arr.length < 2) {
      continue;
    }
    ReferenceFileInfo info = new ReferenceFileInfo();
    info.path = arr[0];
    info.checksum = Long.parseLong(arr[1]);
    refList.add(info);
  }
  reader.close();
}
 
Example 9
Source File: OzoneManagerSnapshotProvider.java    From hadoop-ozone with Apache License 2.0 4 votes vote down vote up
/**
 * Download the latest checkpoint from OM Leader via HTTP.
 * @param leaderOMNodeID leader OM Node ID.
 * @return the DB checkpoint (including the ratis snapshot index)
 */
public DBCheckpoint getOzoneManagerDBSnapshot(String leaderOMNodeID)
    throws IOException {
  String snapshotTime = Long.toString(System.currentTimeMillis());
  String snapshotFileName = Paths.get(omSnapshotDir.getAbsolutePath(),
      snapshotTime, OM_DB_NAME).toFile().getAbsolutePath();
  File targetFile = new File(snapshotFileName + ".tar.gz");

  String omCheckpointUrl = peerNodesMap.get(leaderOMNodeID)
      .getOMDBCheckpointEnpointUrl(httpPolicy);

  LOG.info("Downloading latest checkpoint from Leader OM {}. Checkpoint " +
      "URL: {}", leaderOMNodeID, omCheckpointUrl);
  SecurityUtil.doAsCurrentUser(() -> {
    HttpURLConnection httpURLConnection = (HttpURLConnection)
        connectionFactory.openConnection(new URL(omCheckpointUrl),
            spnegoEnabled);
    httpURLConnection.connect();
    int errorCode = httpURLConnection.getResponseCode();
    if ((errorCode != HTTP_OK) && (errorCode != HTTP_CREATED)) {
      throw new IOException("Unexpected exception when trying to reach " +
          "OM to download latest checkpoint. Checkpoint URL: " +
          omCheckpointUrl + ". ErrorCode: " + errorCode);
    }

    try (InputStream inputStream = httpURLConnection.getInputStream()) {
      FileUtils.copyInputStreamToFile(inputStream, targetFile);
    }
    return null;
  });

  // Untar the checkpoint file.
  Path untarredDbDir = Paths.get(snapshotFileName);
  FileUtil.unTar(targetFile, untarredDbDir.toFile());
  FileUtils.deleteQuietly(targetFile);

  LOG.info("Sucessfully downloaded latest checkpoint from leader OM: {}",
      leaderOMNodeID);

  RocksDBCheckpoint omCheckpoint = new RocksDBCheckpoint(untarredDbDir);
  return omCheckpoint;
}
 
Example 10
Source File: TestPersistBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Earlier versions of HDFS didn't persist block allocation to the edit log.
 * This makes sure that we can still load an edit log when the OP_CLOSE
 * is the opcode which adds all of the blocks. This is a regression
 * test for HDFS-2773.
 * This test uses a tarred pseudo-distributed cluster from Hadoop 1.0
 * which has a multi-block file. This is similar to the tests in
 * {@link TestDFSUpgradeFromImage} but none of those images include
 * a multi-block file.
 */
@Test
public void testEarlierVersionEditLog() throws Exception {
  final Configuration conf = new HdfsConfiguration();
      
  String tarFile = System.getProperty("test.cache.data", "build/test/cache")
    + "/" + HADOOP_1_0_MULTIBLOCK_TGZ;
  String testDir = PathUtils.getTestDirName(getClass());
  File dfsDir = new File(testDir, "image-1.0");
  if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(testDir));

  File nameDir = new File(dfsDir, "name");
  GenericTestUtils.assertExists(nameDir);
  File dataDir = new File(dfsDir, "data");
  GenericTestUtils.assertExists(dataDir);
  
  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
  conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDir.getAbsolutePath());
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
    .format(false)
    .manageDataDfsDirs(false)
    .manageNameDfsDirs(false)
    .numDataNodes(1)
    .startupOption(StartupOption.UPGRADE)
    .build();
  try {
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/user/todd/4blocks");
    // Read it without caring about the actual data within - we just need
    // to make sure that the block states and locations are OK.
    DFSTestUtil.readFile(fs, testPath);
    
    // Ensure that we can append to it - if the blocks were in some funny
    // state we'd get some kind of issue here. 
    FSDataOutputStream stm = fs.append(testPath);
    try {
      stm.write(1);
    } finally {
      IOUtils.closeStream(stm);
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestPersistBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Earlier versions of HDFS didn't persist block allocation to the edit log.
 * This makes sure that we can still load an edit log when the OP_CLOSE
 * is the opcode which adds all of the blocks. This is a regression
 * test for HDFS-2773.
 * This test uses a tarred pseudo-distributed cluster from Hadoop 1.0
 * which has a multi-block file. This is similar to the tests in
 * {@link TestDFSUpgradeFromImage} but none of those images include
 * a multi-block file.
 */
@Test
public void testEarlierVersionEditLog() throws Exception {
  final Configuration conf = new HdfsConfiguration();
      
  String tarFile = System.getProperty("test.cache.data", "build/test/cache")
    + "/" + HADOOP_1_0_MULTIBLOCK_TGZ;
  String testDir = PathUtils.getTestDirName(getClass());
  File dfsDir = new File(testDir, "image-1.0");
  if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
    throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
  }
  FileUtil.unTar(new File(tarFile), new File(testDir));

  File nameDir = new File(dfsDir, "name");
  GenericTestUtils.assertExists(nameDir);
  File dataDir = new File(dfsDir, "data");
  GenericTestUtils.assertExists(dataDir);
  
  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
  conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDir.getAbsolutePath());
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
    .format(false)
    .manageDataDfsDirs(false)
    .manageNameDfsDirs(false)
    .numDataNodes(1)
    .startupOption(StartupOption.UPGRADE)
    .build();
  try {
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/user/todd/4blocks");
    // Read it without caring about the actual data within - we just need
    // to make sure that the block states and locations are OK.
    DFSTestUtil.readFile(fs, testPath);
    
    // Ensure that we can append to it - if the blocks were in some funny
    // state we'd get some kind of issue here. 
    FSDataOutputStream stm = fs.append(testPath);
    try {
      stm.write(1);
    } finally {
      IOUtils.closeStream(stm);
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 12
Source File: HbaseImporter.java    From Kylin with Apache License 2.0 4 votes vote down vote up
public static void importHBaseData(String hbaseTarLocation, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {

        if (System.getenv("JAVA_HOME") == null) {
            logger.error("Didn't find $JAVA_HOME, this will cause HBase data import failed. Please set $JAVA_HOME.");
            logger.error("Skipping table import...");
            return;
        }

        File exportFile = new File(hbaseTarLocation);
        if (!exportFile.exists()) {
            logger.error("Didn't find the export achieve file on " + exportFile.getAbsolutePath());
            return;
        }

        File folder = new File("/tmp/hbase-export/");
        if (folder.exists()) {
            FileUtils.deleteDirectory(folder);
        }
        folder.mkdirs();
        folder.deleteOnExit();

        //TarGZUtil.uncompressTarGZ(exportFile, folder);
        FileUtil.unTar(exportFile, folder);
        String[] child = folder.list();
        Preconditions.checkState(child.length == 1);
        String backupFolderName = child[0];
        File backupFolder = new File(folder, backupFolderName);
        String[] tableNames = backupFolder.list();

        for (String table : tableNames) {

            if (!(table.equalsIgnoreCase(HBaseMiniclusterHelper.TEST_METADATA_TABLE) || table.startsWith(HBaseMiniclusterHelper.SHARED_STORAGE_PREFIX))) {
                continue;
            }

            // create the htable; otherwise the import will fail.
            if (table.startsWith(HBaseMiniclusterHelper.II_STORAGE_PREFIX)) {
                HBaseConnection.createHTableIfNeeded(KylinConfig.getInstanceFromEnv().getStorageUrl(), table, "f");
            } else if (table.startsWith(HBaseMiniclusterHelper.CUBE_STORAGE_PREFIX)) {
                HBaseConnection.createHTableIfNeeded(KylinConfig.getInstanceFromEnv().getStorageUrl(), table, "F1", "F2");
            }

            // directly import from local fs, no need to copy to hdfs
            String importLocation = "file://" + backupFolder.getAbsolutePath() + "/" + table;
            String[] args = new String[] { table, importLocation };
            boolean result = runImport(args, conf);
            logger.info("importing table '" + table + "' with result:" + result);

            if (!result)
                break;
        }

    }