Java Code Examples for org.apache.hadoop.fs.FsShell#run()
The following examples show how to use
org.apache.hadoop.fs.FsShell#run() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Pig.java From spork with Apache License 2.0 | 6 votes |
/** * Run a filesystem command. Any output from this command is written to * stdout or stderr as appropriate. * @param cmd Filesystem command to run along with its arguments as one * string. * @throws IOException */ public static int fs(String cmd) throws IOException { ScriptPigContext ctx = getScriptContext(); FsShell shell = new FsShell(ConfigurationUtil.toConfiguration(ctx .getPigContext().getProperties())); int code = -1; if (cmd != null) { String[] cmdTokens = cmd.split("\\s+"); if (!cmdTokens[0].startsWith("-")) cmdTokens[0] = "-" + cmdTokens[0]; try { code = shell.run(cmdTokens); } catch (Exception e) { throw new IOException("Run filesystem command failed", e); } } return code; }
Example 2
Source File: TestSnapshotRename.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testRenameSnapshotCommandWithIllegalArguments() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); PrintStream psOut = new PrintStream(out); System.setOut(psOut); System.setErr(psOut); FsShell shell = new FsShell(); shell.setConf(conf); String[] argv1 = {"-renameSnapshot", "/tmp", "s1"}; int val = shell.run(argv1); assertTrue(val == -1); assertTrue(out.toString().contains( argv1[0] + ": Incorrect number of arguments.")); out.reset(); String[] argv2 = {"-renameSnapshot", "/tmp", "s1", "s2", "s3"}; val = shell.run(argv2); assertTrue(val == -1); assertTrue(out.toString().contains( argv2[0] + ": Incorrect number of arguments.")); psOut.close(); out.close(); }
Example 3
Source File: TestSnapshotRename.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testRenameSnapshotCommandWithIllegalArguments() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); PrintStream psOut = new PrintStream(out); System.setOut(psOut); System.setErr(psOut); FsShell shell = new FsShell(); shell.setConf(conf); String[] argv1 = {"-renameSnapshot", "/tmp", "s1"}; int val = shell.run(argv1); assertTrue(val == -1); assertTrue(out.toString().contains( argv1[0] + ": Incorrect number of arguments.")); out.reset(); String[] argv2 = {"-renameSnapshot", "/tmp", "s1", "s2", "s3"}; val = shell.run(argv2); assertTrue(val == -1); assertTrue(out.toString().contains( argv2[0] + ": Incorrect number of arguments.")); psOut.close(); out.close(); }
Example 4
Source File: TestFsShellPermission.java From hadoop with Apache License 2.0 | 5 votes |
static String execCmd(FsShell shell, final String[] args) throws Exception { ByteArrayOutputStream baout = new ByteArrayOutputStream(); PrintStream out = new PrintStream(baout, true); PrintStream old = System.out; System.setOut(out); int ret = shell.run(args); out.close(); System.setOut(old); return String.valueOf(ret); }
Example 5
Source File: TestTrash.java From RDFS with Apache License 2.0 | 5 votes |
protected int cmdUsingShell(String cmd, FsShell shell, Path myFile) { // Delete the file to trash String[] args = new String[2]; args[0] = cmd; args[1] = myFile.toString(); try { return shell.run(args); } catch (Exception e) { System.err.println("Exception raised from Trash.run " + e.getLocalizedMessage()); } return -1; }
Example 6
Source File: TestCopyFiles.java From hadoop with Apache License 2.0 | 5 votes |
static String execCmd(FsShell shell, String... args) throws Exception { ByteArrayOutputStream baout = new ByteArrayOutputStream(); PrintStream out = new PrintStream(baout, true); PrintStream old = System.out; System.setOut(out); shell.run(args); out.close(); System.setOut(old); return baout.toString(); }
Example 7
Source File: Gridmix.java From hadoop with Apache License 2.0 | 5 votes |
/** * Write random bytes at the path <inputDir> if needed. * @see org.apache.hadoop.mapred.gridmix.GenerateData * @return exit status */ @SuppressWarnings("deprecation") protected int writeInputData(long genbytes, Path inputDir) throws IOException, InterruptedException { if (genbytes > 0) { final Configuration conf = getConf(); if (inputDir.getFileSystem(conf).exists(inputDir)) { LOG.error("Gridmix input data directory " + inputDir + " already exists when -generate option is used.\n"); return STARTUP_FAILED_ERROR; } // configure the compression ratio if needed CompressionEmulationUtil.setupDataGeneratorConfig(conf); final GenerateData genData = new GenerateData(conf, inputDir, genbytes); LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data..."); launchGridmixJob(genData); FsShell shell = new FsShell(conf); try { LOG.info("Changing the permissions for inputPath " + inputDir.toString()); shell.run(new String[] {"-chmod","-R","777", inputDir.toString()}); } catch (Exception e) { LOG.error("Couldnt change the file permissions " , e); throw new IOException(e); } LOG.info("Input data generation successful."); } return 0; }
Example 8
Source File: TestFsShellPermission.java From big-c with Apache License 2.0 | 5 votes |
static String execCmd(FsShell shell, final String[] args) throws Exception { ByteArrayOutputStream baout = new ByteArrayOutputStream(); PrintStream out = new PrintStream(baout, true); PrintStream old = System.out; System.setOut(out); int ret = shell.run(args); out.close(); System.setOut(old); return String.valueOf(ret); }
Example 9
Source File: TestCopyFiles.java From big-c with Apache License 2.0 | 5 votes |
static String execCmd(FsShell shell, String... args) throws Exception { ByteArrayOutputStream baout = new ByteArrayOutputStream(); PrintStream out = new PrintStream(baout, true); PrintStream old = System.out; System.setOut(out); shell.run(args); out.close(); System.setOut(old); return baout.toString(); }
Example 10
Source File: Gridmix.java From big-c with Apache License 2.0 | 5 votes |
/** * Write random bytes at the path <inputDir> if needed. * @see org.apache.hadoop.mapred.gridmix.GenerateData * @return exit status */ @SuppressWarnings("deprecation") protected int writeInputData(long genbytes, Path inputDir) throws IOException, InterruptedException { if (genbytes > 0) { final Configuration conf = getConf(); if (inputDir.getFileSystem(conf).exists(inputDir)) { LOG.error("Gridmix input data directory " + inputDir + " already exists when -generate option is used.\n"); return STARTUP_FAILED_ERROR; } // configure the compression ratio if needed CompressionEmulationUtil.setupDataGeneratorConfig(conf); final GenerateData genData = new GenerateData(conf, inputDir, genbytes); LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data..."); launchGridmixJob(genData); FsShell shell = new FsShell(conf); try { LOG.info("Changing the permissions for inputPath " + inputDir.toString()); shell.run(new String[] {"-chmod","-R","777", inputDir.toString()}); } catch (Exception e) { LOG.error("Couldnt change the file permissions " , e); throw new IOException(e); } LOG.info("Input data generation successful."); } return 0; }
Example 11
Source File: DistCp.java From RDFS with Apache License 2.0 | 4 votes |
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = {"-rmr", null}; boolean hasnext = dstin.next(dstpath, dstfrom); for(; lsrin.next(lsrpath, lsrstatus); ) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for(; hasnext && dst_cmp_lsr < 0; ) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch(Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException("\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
Example 12
Source File: BulkLoadJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_HTABLE_NAME); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); String tableName = getOptionValue(OPTION_HTABLE_NAME); // e.g // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/ // end with "/" String input = getOptionValue(OPTION_INPUT_PATH); Configuration conf = HBaseConnection.getCurrentHBaseConfiguration(); FsShell shell = new FsShell(conf); int exitCode = -1; int retryCount = 10; while (exitCode != 0 && retryCount >= 1) { exitCode = shell.run(new String[] { "-chmod", "-R", "777", input }); retryCount--; Thread.sleep(5000); } if (exitCode != 0) { logger.error("Failed to change the file permissions: " + input); throw new IOException("Failed to change the file permissions: " + input); } String[] newArgs = new String[2]; newArgs[0] = input; newArgs[1] = tableName; int count = 0; Path inputPath = new Path(input); FileSystem fs = HadoopUtil.getFileSystem(inputPath); FileStatus[] fileStatuses = fs.listStatus(inputPath); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) { Path path = fileStatus.getPath(); if (path.getName().equals(FileOutputCommitter.TEMP_DIR_NAME)) { logger.info("Delete temporary path: " + path); fs.delete(path, true); } else { count++; } } } int ret = 0; if (count > 0) { logger.debug("Start to run LoadIncrementalHFiles"); ret = MRUtil.runMRJob(new LoadIncrementalHFiles(conf), newArgs); logger.debug("End to run LoadIncrementalHFiles"); return ret; } else { logger.debug("Nothing to load, cube is empty"); return ret; } }
Example 13
Source File: LookupTableHFilesBulkLoadJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_TABLE_NAME); options.addOption(OPTION_CUBING_JOB_ID); options.addOption(OPTION_LOOKUP_SNAPSHOT_ID); parseOptions(options, args); String tableName = getOptionValue(OPTION_TABLE_NAME); String cubingJobID = getOptionValue(OPTION_CUBING_JOB_ID); String snapshotID = getOptionValue(OPTION_LOOKUP_SNAPSHOT_ID); KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); ExecutableManager execMgr = ExecutableManager.getInstance(kylinConfig); DefaultChainedExecutable job = (DefaultChainedExecutable) execMgr.getJob(cubingJobID); ExtTableSnapshotInfoManager extTableSnapshotInfoManager = ExtTableSnapshotInfoManager.getInstance(kylinConfig); ExtTableSnapshotInfo snapshot = extTableSnapshotInfoManager.getSnapshot(tableName, snapshotID); long srcTableRowCnt = Long.parseLong(job.findExtraInfoBackward(BatchConstants.LOOKUP_EXT_SNAPSHOT_SRC_RECORD_CNT_PFX + tableName, "-1")); logger.info("update table:{} snapshot row count:{}", tableName, srcTableRowCnt); snapshot.setRowCnt(srcTableRowCnt); snapshot.setLastBuildTime(System.currentTimeMillis()); extTableSnapshotInfoManager.updateSnapshot(snapshot); String hTableName = snapshot.getStorageLocationIdentifier(); // e.g // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/ // end with "/" String input = getOptionValue(OPTION_INPUT_PATH); Configuration conf = HBaseConnection.getCurrentHBaseConfiguration(); FsShell shell = new FsShell(conf); int exitCode = -1; int retryCount = 10; while (exitCode != 0 && retryCount >= 1) { exitCode = shell.run(new String[] { "-chmod", "-R", "777", input }); retryCount--; Thread.sleep(5000); } if (exitCode != 0) { logger.error("Failed to change the file permissions: {}", input); throw new IOException("Failed to change the file permissions: " + input); } String[] newArgs = new String[2]; newArgs[0] = input; newArgs[1] = hTableName; logger.debug("Start to run LoadIncrementalHFiles"); int ret = MRUtil.runMRJob(new LoadIncrementalHFiles(conf), newArgs); logger.debug("End to run LoadIncrementalHFiles"); return ret; }
Example 14
Source File: BulkLoadJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_HTABLE_NAME); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); String tableName = getOptionValue(OPTION_HTABLE_NAME); // e.g // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/ // end with "/" String input = getOptionValue(OPTION_INPUT_PATH); Configuration conf = HBaseConnection.getCurrentHBaseConfiguration(); FsShell shell = new FsShell(conf); int exitCode = -1; int retryCount = 10; while (exitCode != 0 && retryCount >= 1) { exitCode = shell.run(new String[] { "-chmod", "-R", "777", input }); retryCount--; Thread.sleep(5000); } if (exitCode != 0) { logger.error("Failed to change the file permissions: " + input); throw new IOException("Failed to change the file permissions: " + input); } String[] newArgs = new String[2]; newArgs[0] = input; newArgs[1] = tableName; int count = 0; Path inputPath = new Path(input); FileSystem fs = HadoopUtil.getFileSystem(inputPath); FileStatus[] fileStatuses = fs.listStatus(inputPath); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) { Path path = fileStatus.getPath(); if (path.getName().equals(FileOutputCommitter.TEMP_DIR_NAME)) { logger.info("Delete temporary path: " + path); fs.delete(path, true); } else { count++; } } } int ret = 0; if (count > 0) { logger.debug("Start to run LoadIncrementalHFiles"); ret = MRUtil.runMRJob(new LoadIncrementalHFiles(conf), newArgs); logger.debug("End to run LoadIncrementalHFiles"); return ret; } else { logger.debug("Nothing to load, cube is empty"); return ret; } }
Example 15
Source File: BulkIngestExample.java From accumulo-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ClientOpts opts = new ClientOpts(); opts.parseArgs(BulkIngestExample.class.getName(), args); Job job = Job.getInstance(opts.getHadoopConfig()); job.setJobName(BulkIngestExample.class.getSimpleName()); job.setJarByClass(BulkIngestExample.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(MapClass.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(ReduceClass.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); TextInputFormat.setInputPaths(job, new Path(inputDir)); AccumuloFileOutputFormat.configure().outputPath(new Path(workDir + "/files")).store(job); FileSystem fs = FileSystem.get(opts.getHadoopConfig()); try (AccumuloClient client = opts.createAccumuloClient()) { try (PrintStream out = new PrintStream( new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt"))))) { Collection<Text> splits = client.tableOperations().listSplits(SetupTable.tableName, 100); for (Text split : splits) out.println(Base64.getEncoder().encodeToString(split.copyBytes())); job.setNumReduceTasks(splits.size() + 1); } job.setPartitionerClass(RangePartitioner.class); RangePartitioner.setSplitFile(job, workDir + "/splits.txt"); job.waitForCompletion(true); Path failures = new Path(workDir, "failures"); fs.delete(failures, true); fs.mkdirs(new Path(workDir, "failures")); // With HDFS permissions on, we need to make sure the Accumulo user can read/move the rfiles FsShell fsShell = new FsShell(opts.getHadoopConfig()); fsShell.run(new String[] {"-chmod", "-R", "777", workDir}); client.tableOperations().importDirectory(workDir + "/files").to(SetupTable.tableName).load(); } System.exit(job.isSuccessful() ? 0 : 1); }
Example 16
Source File: TestCopyFiles.java From big-c with Apache License 2.0 | 4 votes |
/** test -delete */ public void testDelete() throws Exception { final Configuration conf = new Configuration(); conf.setInt("fs.trash.interval", 60); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final URI nnURI = FileSystem.getDefaultUri(conf); final String nnUri = nnURI.toString(); final FileSystem fs = FileSystem.get(URI.create(nnUri), conf); final DistCpV1 distcp = new DistCpV1(conf); final FsShell shell = new FsShell(conf); final String srcrootdir = "/src_root"; final String dstrootdir = "/dst_root"; { //create source files createFiles(nnURI, srcrootdir); String srcresults = execCmd(shell, "-lsr", srcrootdir); srcresults = removePrefix(srcresults, srcrootdir); System.out.println("srcresults=" + srcresults); //create some files in dst createFiles(nnURI, dstrootdir); System.out.println("dstrootdir=" + dstrootdir); shell.run(new String[]{"-lsr", dstrootdir}); //run distcp ToolRunner.run(distcp, new String[]{"-delete", "-update", "-log", "/log", nnUri+srcrootdir, nnUri+dstrootdir}); //make sure src and dst contains the same files String dstresults = execCmd(shell, "-lsr", dstrootdir); dstresults = removePrefix(dstresults, dstrootdir); System.out.println("first dstresults=" + dstresults); assertEquals(srcresults, dstresults); //create additional file in dst create(fs, new Path(dstrootdir, "foo")); create(fs, new Path(dstrootdir, "foobar")); //run distcp again ToolRunner.run(distcp, new String[]{"-delete", "-update", "-log", "/log2", nnUri+srcrootdir, nnUri+dstrootdir}); //make sure src and dst contains the same files dstresults = execCmd(shell, "-lsr", dstrootdir); dstresults = removePrefix(dstresults, dstrootdir); System.out.println("second dstresults=" + dstresults); assertEquals(srcresults, dstresults); // verify that files removed in -delete were moved to the trash // regrettably, this test will break if Trash changes incompatibly assertTrue(fs.exists(new Path(fs.getHomeDirectory(), ".Trash/Current" + dstrootdir + "/foo"))); assertTrue(fs.exists(new Path(fs.getHomeDirectory(), ".Trash/Current" + dstrootdir + "/foobar"))); //cleanup deldir(fs, dstrootdir); deldir(fs, srcrootdir); } } finally { if (cluster != null) { cluster.shutdown(); } } }
Example 17
Source File: TestTrash.java From RDFS with Apache License 2.0 | 4 votes |
/** * @param fs * @param conf * @throws Exception */ protected void trashPatternEmptier(FileSystem fs, Configuration conf) throws Exception { // Trash with 12 second deletes and 6 seconds checkpoints conf.set("fs.trash.interval", "0.2"); // 12 seconds conf.set("fs.trash.checkpoint.interval", "0.1"); // 6 seconds conf.setClass("fs.trash.classname", TrashPolicyPattern.class, TrashPolicy.class); conf.set("fs.trash.base.paths", TEST_DIR + "/my_root/*/"); conf.set("fs.trash.unmatched.paths", TEST_DIR + "/unmatched/"); Trash trash = new Trash(conf); // clean up trash can fs.delete(new Path(TEST_DIR + "/my_root/*/"), true); fs.delete(new Path(TEST_DIR + "/my_root_not/*/"), true); FsShell shell = new FsShell(); shell.setConf(conf); shell.init(); // First create a new directory with mkdirs deleteAndCheckTrash(fs, shell, "my_root/sub_dir1/sub_dir1_1/myFile", "my_root/sub_dir1/.Trash/Current/" + TEST_DIR + "/my_root/sub_dir1/sub_dir1_1"); deleteAndCheckTrash(fs, shell, "my_root/sub_dir2/sub_dir2_1/myFile", "my_root/sub_dir2/.Trash/Current/" + TEST_DIR + "/my_root/sub_dir2/sub_dir2_1"); deleteAndCheckTrash(fs, shell, "my_root_not/", "unmatched/.Trash/Current" + TEST_DIR + "/my_root_not"); deleteAndCheckTrash(fs, shell, "my_root/file", "unmatched/.Trash/Current" + TEST_DIR + "/my_root/file"); Path currentTrash = new Path(TEST_DIR, "my_root/sub_dir1/.Trash/Current/"); fs.mkdirs(currentTrash); cmdUsingShell("-rmr", shell, currentTrash); TestCase.assertTrue(!fs.exists(currentTrash)); cmdUsingShell("-rmr", shell, new Path(TEST_DIR, "my_root")); TestCase.assertTrue(fs.exists(new Path(TEST_DIR, "unmatched/.Trash/Current/" + TEST_DIR + "/my_root"))); // Test Emplier // Start Emptier in background Runnable emptier = trash.getEmptier(); Thread emptierThread = new Thread(emptier); emptierThread.start(); int fileIndex = 0; Set<String> checkpoints = new HashSet<String>(); while (true) { // Create a file with a new name Path myFile = new Path(TEST_DIR, "my_root/sub_dir1/sub_dir2/myFile" + fileIndex++); writeFile(fs, myFile); // Delete the file to trash String[] args = new String[2]; args[0] = "-rm"; args[1] = myFile.toString(); int val = -1; try { val = shell.run(args); } catch (Exception e) { System.err.println("Exception raised from Trash.run " + e.getLocalizedMessage()); } assertTrue(val == 0); Path trashDir = new Path(TEST_DIR, "my_root/sub_dir1/.Trash/Current/"); FileStatus files[] = fs.listStatus(trashDir.getParent()); // Scan files in .Trash and add them to set of checkpoints for (FileStatus file : files) { String fileName = file.getPath().getName(); checkpoints.add(fileName); } // If checkpoints has 5 objects it is Current + 4 checkpoint directories if (checkpoints.size() == 5) { // The actual contents should be smaller since the last checkpoint // should've been deleted and Current might not have been recreated yet assertTrue(5 > files.length); break; } Thread.sleep(5000); } emptierThread.interrupt(); emptierThread.join(); }
Example 18
Source File: TestCopyFiles.java From hadoop with Apache License 2.0 | 4 votes |
/** test -delete */ public void testDelete() throws Exception { final Configuration conf = new Configuration(); conf.setInt("fs.trash.interval", 60); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final URI nnURI = FileSystem.getDefaultUri(conf); final String nnUri = nnURI.toString(); final FileSystem fs = FileSystem.get(URI.create(nnUri), conf); final DistCpV1 distcp = new DistCpV1(conf); final FsShell shell = new FsShell(conf); final String srcrootdir = "/src_root"; final String dstrootdir = "/dst_root"; { //create source files createFiles(nnURI, srcrootdir); String srcresults = execCmd(shell, "-lsr", srcrootdir); srcresults = removePrefix(srcresults, srcrootdir); System.out.println("srcresults=" + srcresults); //create some files in dst createFiles(nnURI, dstrootdir); System.out.println("dstrootdir=" + dstrootdir); shell.run(new String[]{"-lsr", dstrootdir}); //run distcp ToolRunner.run(distcp, new String[]{"-delete", "-update", "-log", "/log", nnUri+srcrootdir, nnUri+dstrootdir}); //make sure src and dst contains the same files String dstresults = execCmd(shell, "-lsr", dstrootdir); dstresults = removePrefix(dstresults, dstrootdir); System.out.println("first dstresults=" + dstresults); assertEquals(srcresults, dstresults); //create additional file in dst create(fs, new Path(dstrootdir, "foo")); create(fs, new Path(dstrootdir, "foobar")); //run distcp again ToolRunner.run(distcp, new String[]{"-delete", "-update", "-log", "/log2", nnUri+srcrootdir, nnUri+dstrootdir}); //make sure src and dst contains the same files dstresults = execCmd(shell, "-lsr", dstrootdir); dstresults = removePrefix(dstresults, dstrootdir); System.out.println("second dstresults=" + dstresults); assertEquals(srcresults, dstresults); // verify that files removed in -delete were moved to the trash // regrettably, this test will break if Trash changes incompatibly assertTrue(fs.exists(new Path(fs.getHomeDirectory(), ".Trash/Current" + dstrootdir + "/foo"))); assertTrue(fs.exists(new Path(fs.getHomeDirectory(), ".Trash/Current" + dstrootdir + "/foobar"))); //cleanup deldir(fs, dstrootdir); deldir(fs, srcrootdir); } } finally { if (cluster != null) { cluster.shutdown(); } } }
Example 19
Source File: DistCp.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = {"-rmr", null}; boolean hasnext = dstin.next(dstpath, dstfrom); for(; lsrin.next(lsrpath, lsrstatus); ) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for(; hasnext && dst_cmp_lsr < 0; ) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch(Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException("\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
Example 20
Source File: LookupTableHFilesBulkLoadJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_TABLE_NAME); options.addOption(OPTION_CUBING_JOB_ID); options.addOption(OPTION_LOOKUP_SNAPSHOT_ID); parseOptions(options, args); String tableName = getOptionValue(OPTION_TABLE_NAME); String cubingJobID = getOptionValue(OPTION_CUBING_JOB_ID); String snapshotID = getOptionValue(OPTION_LOOKUP_SNAPSHOT_ID); KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); ExecutableManager execMgr = ExecutableManager.getInstance(kylinConfig); DefaultChainedExecutable job = (DefaultChainedExecutable) execMgr.getJob(cubingJobID); ExtTableSnapshotInfoManager extTableSnapshotInfoManager = ExtTableSnapshotInfoManager.getInstance(kylinConfig); ExtTableSnapshotInfo snapshot = extTableSnapshotInfoManager.getSnapshot(tableName, snapshotID); long srcTableRowCnt = Long.parseLong(job.findExtraInfoBackward(BatchConstants.LOOKUP_EXT_SNAPSHOT_SRC_RECORD_CNT_PFX + tableName, "-1")); logger.info("update table:{} snapshot row count:{}", tableName, srcTableRowCnt); snapshot.setRowCnt(srcTableRowCnt); snapshot.setLastBuildTime(System.currentTimeMillis()); extTableSnapshotInfoManager.updateSnapshot(snapshot); String hTableName = snapshot.getStorageLocationIdentifier(); // e.g // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/ // end with "/" String input = getOptionValue(OPTION_INPUT_PATH); Configuration conf = HBaseConnection.getCurrentHBaseConfiguration(); FsShell shell = new FsShell(conf); int exitCode = -1; int retryCount = 10; while (exitCode != 0 && retryCount >= 1) { exitCode = shell.run(new String[] { "-chmod", "-R", "777", input }); retryCount--; Thread.sleep(5000); } if (exitCode != 0) { logger.error("Failed to change the file permissions: {}", input); throw new IOException("Failed to change the file permissions: " + input); } String[] newArgs = new String[2]; newArgs[0] = input; newArgs[1] = hTableName; logger.debug("Start to run LoadIncrementalHFiles"); int ret = MRUtil.runMRJob(new LoadIncrementalHFiles(conf), newArgs); logger.debug("End to run LoadIncrementalHFiles"); return ret; }