org.apache.hadoop.fs.Trash Java Examples

The following examples show how to use org.apache.hadoop.fs.Trash. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NameNode.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void startTrashEmptier(final Configuration conf) throws IOException {
  long trashInterval =
      conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
  if (trashInterval == 0) {
    return;
  } else if (trashInterval < 0) {
    throw new IOException("Cannot start trash emptier with negative interval."
        + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
  }
  
  // This may be called from the transitionToActive code path, in which
  // case the current user is the administrator, not the NN. The trash
  // emptier needs to run as the NN. See HDFS-3972.
  FileSystem fs = SecurityUtil.doAsLoginUser(
      new PrivilegedExceptionAction<FileSystem>() {
        @Override
        public FileSystem run() throws IOException {
          return FileSystem.get(conf);
        }
      });
  this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
  this.emptier.setDaemon(true);
  this.emptier.start();
}
 
Example #2
Source File: Delete.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private boolean moveToTrash(PathData item) throws IOException {
   boolean success = false;
   if (!skipTrash) {
     try {
       success = Trash.moveToAppropriateTrash(item.fs, item.path, getConf());
     } catch(FileNotFoundException fnfe) {
       throw fnfe;
     } catch (IOException ioe) {
       String msg = ioe.getMessage();
       if (ioe.getCause() != null) {
         msg += ": " + ioe.getCause().getMessage();
}
       throw new IOException(msg + ". Consider using -skipTrash option", ioe);
     }
   }
   return success;
 }
 
Example #3
Source File: NameNode.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void startTrashEmptier(final Configuration conf) throws IOException {
  long trashInterval =
      conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
  if (trashInterval == 0) {
    return;
  } else if (trashInterval < 0) {
    throw new IOException("Cannot start trash emptier with negative interval."
        + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
  }
  
  // This may be called from the transitionToActive code path, in which
  // case the current user is the administrator, not the NN. The trash
  // emptier needs to run as the NN. See HDFS-3972.
  FileSystem fs = SecurityUtil.doAsLoginUser(
      new PrivilegedExceptionAction<FileSystem>() {
        @Override
        public FileSystem run() throws IOException {
          return FileSystem.get(conf);
        }
      });
  this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
  this.emptier.setDaemon(true);
  this.emptier.start();
}
 
Example #4
Source File: Delete.java    From big-c with Apache License 2.0 6 votes vote down vote up
private boolean moveToTrash(PathData item) throws IOException {
   boolean success = false;
   if (!skipTrash) {
     try {
       success = Trash.moveToAppropriateTrash(item.fs, item.path, getConf());
     } catch(FileNotFoundException fnfe) {
       throw fnfe;
     } catch (IOException ioe) {
       String msg = ioe.getMessage();
       if (ioe.getCause() != null) {
         msg += ": " + ioe.getCause().getMessage();
}
       throw new IOException(msg + ". Consider using -skipTrash option", ioe);
     }
   }
   return success;
 }
 
Example #5
Source File: TestTrash.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public static void trashNonDefaultFS(Configuration conf) throws IOException {
  conf.set("fs.trash.interval", "10"); // 10 minute
  // attempt non-default FileSystem trash
  {
    final FileSystem lfs = FileSystem.getLocal(conf);
    Path p = TEST_DIR;
    Path f = new Path(p, "foo/bar");
    if (lfs.exists(p)) {
      lfs.delete(p, true);
    }
    try {
      f = writeFile(lfs, f);

      FileSystem.closeAll();
      FileSystem localFs = FileSystem.get(URI.create("file:///"), conf);
      Trash lTrash = new Trash(localFs, conf);
      lTrash.moveToTrash(f.getParent());
      checkTrash(localFs, lTrash.getCurrentTrashDir(), f);
    } finally {
      if (lfs.exists(p)) {
        lfs.delete(p, true);
      }
    }
  }
}
 
Example #6
Source File: HadoopUtilsTest.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
@Test
public void testMoveToTrash() throws IOException {
  Path hadoopUtilsTestDir = new Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir");
  Configuration conf = new Configuration();
  // Set the time to keep it in trash to 10 minutes.
  // 0 means object will be deleted instantly.
  conf.set("fs.trash.interval", "10");
  FileSystem fs = FileSystem.getLocal(conf);
  Trash trash = new Trash(fs, conf);
  TrashPolicy trashPolicy = TrashPolicy.getInstance(conf, fs, fs.getHomeDirectory());
  Path trashPath = trashPolicy.getCurrentTrashDir();

  fs.mkdirs(hadoopUtilsTestDir);
  Assert.assertTrue(fs.exists(hadoopUtilsTestDir));
  trash.moveToTrash(hadoopUtilsTestDir.getParent());
  Assert.assertFalse(fs.exists(hadoopUtilsTestDir));
  Assert.assertTrue(fs.exists(trashPath));
}
 
Example #7
Source File: TestTrash.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static void trashNonDefaultFS(Configuration conf) throws IOException {
  conf.set("fs.trash.interval", "10"); // 10 minute
  // attempt non-default FileSystem trash
  {
    final FileSystem lfs = FileSystem.getLocal(conf);
    Path p = TEST_DIR;
    Path f = new Path(p, "foo/bar");
    if (lfs.exists(p)) {
      lfs.delete(p, true);
    }
    try {
      f = writeFile(lfs, f);

      FileSystem.closeAll();
      FileSystem localFs = FileSystem.get(URI.create("file:///"), conf);
      Trash lTrash = new Trash(localFs, conf);
      lTrash.moveToTrash(f.getParent());
      checkTrash(localFs, lTrash.getCurrentTrashDir(), f);
    } finally {
      if (lfs.exists(p)) {
        lfs.delete(p, true);
      }
    }
  }
}
 
Example #8
Source File: Delete.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected void processArguments(LinkedList<PathData> args)
throws IOException {
  Trash trash = new Trash(getConf());
  trash.expunge();
  trash.checkpoint();    
}
 
Example #9
Source File: ShardWriter.java    From linden with Apache License 2.0 5 votes vote down vote up
public static void moveToTrash(Configuration conf, Path path) throws IOException {
  Trash t = new Trash(conf);
  boolean isMoved = t.moveToTrash(path);
  t.expunge();
  if (!isMoved) {
    logger.error("Trash is not enabled or file is already in the trash.");
  }
}
 
Example #10
Source File: Delete.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
protected void processArguments(LinkedList<PathData> args)
throws IOException {
  Trash trash = new Trash(getConf());
  trash.expunge();
  trash.checkpoint();    
}
 
Example #11
Source File: TestTrash.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testPluggableTrash() throws IOException {
  Configuration conf = new Configuration();

  // Test plugged TrashPolicy
  conf.setClass("fs.trash.classname", TestTrashPolicy.class, TrashPolicy.class);
  Trash trash = new Trash(conf);
  assertTrue(trash.getTrashPolicy().getClass().equals(TestTrashPolicy.class));
}
 
Example #12
Source File: NameNode.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void startTrashEmptier(Configuration conf) throws IOException {
  if (conf.getInt("fs.trash.interval", 0) == 0) {
    return;
  }
  FileSystem fs = NameNode.getTrashFileSystem(conf);
  this.trash = new Trash(fs, conf);
  this.emptier = new Thread(trash.getEmptier(), "Trash Emptier");
  this.emptier.setDaemon(true);
  this.emptier.start();
}
 
Example #13
Source File: DistCpV1.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the dst files/dirs which do not exist in src
 * 
 * @return total count of files and directories deleted from destination
 * @throws IOException
 */
static private long deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (dstroot.isFile()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf,
      Writer.file(dstlsr), Writer.keyClass(Text.class),
      Writer.valueClass(NullWritable.class), Writer.compression(
      SequenceFile.CompressionType.NONE))) {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDirectory()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), NullWritable.get());
          lsrstack.push(child);
        }
      }
    }
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, NullWritable.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  long deletedPathsCount = 0;
  try (SequenceFile.Reader lsrin =
           new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
       SequenceFile.Reader  dstin =
           new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final Trash trash = new Trash(dstfs, conf);
    Path lastpath = null;

    boolean hasnext = dstin.next(dstpath, dstfrom);
    while (lsrin.next(lsrpath, NullWritable.get())) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      while (hasnext && dst_cmp_lsr < 0) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      } else {
        //lsrpath does not exist, delete it
        final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
        ++deletedPathsCount;
        if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
          if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
            throw new IOException("Failed to delete " + rmpath);
          }
          lastpath = rmpath;
        }
      }
    }
  }
  return deletedPathsCount;
}
 
Example #14
Source File: FileSystemDatasetRepository.java    From kite with Apache License 2.0 4 votes vote down vote up
private boolean deleteWithTrash(String namespace, String name, boolean useTrash){
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");

  LOG.debug("Deleting dataset:{}", name);

  DatasetDescriptor descriptor;
  try {
    descriptor = metadataProvider.load(namespace, name);
  } catch (DatasetNotFoundException ex) {
    return false;
  }

  // don't care about the return value here -- if it already doesn't exist
  // we still need to delete the data directory
  boolean changed = useTrash ? metadataProvider.moveToTrash(namespace, name) :
          metadataProvider.delete(namespace, name);

  Path dataLocation = new Path(descriptor.getLocation().toString());
  FileSystem dataFS = fsForPath(dataLocation, conf);

  if (fs.getUri().equals(dataFS.getUri())) {
    // the data location is on the right FS, so cleanlyDelete will work
    changed |= (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, rootDirectory, dataLocation)
        : FileSystemUtil.cleanlyDelete(fs, rootDirectory, dataLocation));
  } else {
    try {
      if (dataFS.exists(dataLocation)) {

        changed = (useTrash ? Trash.moveToAppropriateTrash(dataFS, dataLocation, dataFS.getConf())
            : dataFS.delete(dataLocation, true));

        if (!changed){
          throw new IOException(
              "Failed to delete dataset name:" + name +
                  " location:" + dataLocation);
        }
      }
    } catch (IOException e) {
      throw new DatasetIOException(
          "Internal failure when removing location:" + dataLocation, e);
    }
  }
  return changed;
}
 
Example #15
Source File: NameNode.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
private void startTrashEmptier(Configuration conf) throws IOException {
  this.emptier = new Thread(new Trash(conf).getEmptier(), "Trash Emptier");
  this.emptier.setDaemon(true);
  this.emptier.start();
}
 
Example #16
Source File: TestTrash.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * @param fs
 * @param conf
 * @throws Exception
 */
protected void trashPatternEmptier(FileSystem fs, Configuration conf) throws Exception {
  // Trash with 12 second deletes and 6 seconds checkpoints
  conf.set("fs.trash.interval", "0.2"); // 12 seconds
  conf.set("fs.trash.checkpoint.interval", "0.1"); // 6 seconds
  conf.setClass("fs.trash.classname", TrashPolicyPattern.class, TrashPolicy.class);
  conf.set("fs.trash.base.paths", TEST_DIR + "/my_root/*/");
  conf.set("fs.trash.unmatched.paths", TEST_DIR + "/unmatched/");
  Trash trash = new Trash(conf);
  // clean up trash can
  fs.delete(new Path(TEST_DIR + "/my_root/*/"), true);
  fs.delete(new Path(TEST_DIR + "/my_root_not/*/"), true);


  FsShell shell = new FsShell();
  shell.setConf(conf);
  shell.init();
  // First create a new directory with mkdirs
  deleteAndCheckTrash(fs, shell, "my_root/sub_dir1/sub_dir1_1/myFile",
      "my_root/sub_dir1/.Trash/Current/" + TEST_DIR
          + "/my_root/sub_dir1/sub_dir1_1");
  deleteAndCheckTrash(fs, shell, "my_root/sub_dir2/sub_dir2_1/myFile",
      "my_root/sub_dir2/.Trash/Current/" + TEST_DIR
          + "/my_root/sub_dir2/sub_dir2_1");
  deleteAndCheckTrash(fs, shell, "my_root_not/", "unmatched/.Trash/Current"
      + TEST_DIR + "/my_root_not");
  deleteAndCheckTrash(fs, shell, "my_root/file", "unmatched/.Trash/Current"
      + TEST_DIR + "/my_root/file");

  Path currentTrash = new Path(TEST_DIR, "my_root/sub_dir1/.Trash/Current/");
  fs.mkdirs(currentTrash);
  cmdUsingShell("-rmr", shell, currentTrash);
  TestCase.assertTrue(!fs.exists(currentTrash));

  cmdUsingShell("-rmr", shell, new Path(TEST_DIR, "my_root"));
  TestCase.assertTrue(fs.exists(new Path(TEST_DIR,
      "unmatched/.Trash/Current/" + TEST_DIR + "/my_root")));
  
  // Test Emplier
  // Start Emptier in background
  Runnable emptier = trash.getEmptier();
  Thread emptierThread = new Thread(emptier);
  emptierThread.start();

  int fileIndex = 0;
  Set<String> checkpoints = new HashSet<String>();
  while (true)  {
    // Create a file with a new name
    Path myFile = new Path(TEST_DIR, "my_root/sub_dir1/sub_dir2/myFile" + fileIndex++);
    writeFile(fs, myFile);

    // Delete the file to trash
    String[] args = new String[2];
    args[0] = "-rm";
    args[1] = myFile.toString();
    int val = -1;
    try {
      val = shell.run(args);
    } catch (Exception e) {
      System.err.println("Exception raised from Trash.run " +
                         e.getLocalizedMessage());
    }
    assertTrue(val == 0);

    Path trashDir = new Path(TEST_DIR, "my_root/sub_dir1/.Trash/Current/");
    FileStatus files[] = fs.listStatus(trashDir.getParent());
    // Scan files in .Trash and add them to set of checkpoints
    for (FileStatus file : files) {
      String fileName = file.getPath().getName();
      checkpoints.add(fileName);
    }
    // If checkpoints has 5 objects it is Current + 4 checkpoint directories
    if (checkpoints.size() == 5) {
      // The actual contents should be smaller since the last checkpoint
      // should've been deleted and Current might not have been recreated yet
      assertTrue(5 > files.length);
      break;
    }
    Thread.sleep(5000);
  }
  emptierThread.interrupt();
  emptierThread.join();
}
 
Example #17
Source File: TestTrash.java    From RDFS with Apache License 2.0 4 votes vote down vote up
protected void trashEmptier(FileSystem fs, Configuration conf) throws Exception {
  // Trash with 12 second deletes and 6 seconds checkpoints
  conf.set("fs.trash.interval", "0.2"); // 12 seconds
  conf.set("fs.trash.checkpoint.interval", "0.1"); // 6 seconds
  Trash trash = new Trash(conf);
  // clean up trash can
  fs.delete(trash.getCurrentTrashDir().getParent(), true);

  // Start Emptier in background
  Runnable emptier = trash.getEmptier();
  Thread emptierThread = new Thread(emptier);
  emptierThread.start();

  FsShell shell = new FsShell();
  shell.setConf(conf);
  shell.init();
  // First create a new directory with mkdirs
  Path myPath = new Path(TEST_DIR, "test/mkdirs");
  mkdir(fs, myPath);
  int fileIndex = 0;
  Set<String> checkpoints = new HashSet<String>();
  while (true)  {
    // Create a file with a new name
    Path myFile = new Path(TEST_DIR, "test/mkdirs/myFile" + fileIndex++);
    writeFile(fs, myFile);

    // Delete the file to trash
    assertTrue(rmUsingShell(shell, myFile) == 0);

    Path trashDir = shell.getCurrentTrashDir();
    FileStatus files[] = fs.listStatus(trashDir.getParent());
    // Scan files in .Trash and add them to set of checkpoints
    for (FileStatus file : files) {
      String fileName = file.getPath().getName();
      checkpoints.add(fileName);
    }
    // If checkpoints has 5 objects it is Current + 4 checkpoint directories
    if (checkpoints.size() == 5) {
      // The actual contents should be smaller since the last checkpoint
      // should've been deleted and Current might not have been recreated yet
      assertTrue(5 > files.length);
      break;
    }
    Thread.sleep(5000);
  }
  emptierThread.interrupt();
  emptierThread.join();
}
 
Example #18
Source File: DistCpV1.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the dst files/dirs which do not exist in src
 * 
 * @return total count of files and directories deleted from destination
 * @throws IOException
 */
static private long deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (dstroot.isFile()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf,
      Writer.file(dstlsr), Writer.keyClass(Text.class),
      Writer.valueClass(NullWritable.class), Writer.compression(
      SequenceFile.CompressionType.NONE))) {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDirectory()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), NullWritable.get());
          lsrstack.push(child);
        }
      }
    }
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, NullWritable.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  long deletedPathsCount = 0;
  try (SequenceFile.Reader lsrin =
           new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
       SequenceFile.Reader  dstin =
           new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final Trash trash = new Trash(dstfs, conf);
    Path lastpath = null;

    boolean hasnext = dstin.next(dstpath, dstfrom);
    while (lsrin.next(lsrpath, NullWritable.get())) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      while (hasnext && dst_cmp_lsr < 0) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      } else {
        //lsrpath does not exist, delete it
        final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
        ++deletedPathsCount;
        if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
          if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
            throw new IOException("Failed to delete " + rmpath);
          }
          lastpath = rmpath;
        }
      }
    }
  }
  return deletedPathsCount;
}
 
Example #19
Source File: LindenJob.java    From linden with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] strings) throws Exception {
  Configuration conf = getConf();
  String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
  logger.info("input dir:" + dir);
  Path inputPath = new Path(StringUtils.unEscapeString(dir));
  Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
  String indexPath = conf.get(LindenJobConfig.INDEX_PATH);

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }
  if (fs.exists(new Path(indexPath))) {
    fs.delete(new Path(indexPath), true);
  }

  int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
  Shard[] shards = createShards(indexPath, numShards);

  Shard.setIndexShards(conf, shards);

  //empty trash;
  (new Trash(conf)).expunge();

  Job job = Job.getInstance(conf, "linden-hadoop-indexing");
  job.setJarByClass(LindenJob.class);
  job.setMapperClass(LindenMapper.class);
  job.setCombinerClass(LindenCombiner.class);
  job.setReducerClass(LindenReducer.class);
  job.setMapOutputKeyClass(Shard.class);
  job.setMapOutputValueClass(IntermediateForm.class);
  job.setOutputKeyClass(Shard.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(IndexUpdateOutputFormat.class);
  job.setReduceSpeculativeExecution(false);
  job.setNumReduceTasks(numShards);

  String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
  if (lindenSchemaFile == null) {
    throw new IOException("no schema file is found");
  }
  logger.info("Adding schema file: " + lindenSchemaFile);
  job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
  String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
  if (lindenPropertiesFile == null) {
    throw new IOException("no linden properties file is found");
  }
  logger.info("Adding linden properties file: " + lindenPropertiesFile);
  job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  Path[] inputs = FileInputFormat.getInputPaths(job);
  StringBuilder buffer = new StringBuilder(inputs[0].toString());
  for (int i = 1; i < inputs.length; i++) {
    buffer.append(",");
    buffer.append(inputs[i].toString());
  }
  logger.info("mapreduce.input.dir = " + buffer.toString());
  logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
  logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
  logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
  logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
  logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
  logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));

  job.waitForCompletion(true);
  if (!job.isSuccessful()) {
    throw new RuntimeException("Job failed");
  }
  return 0;
}
 
Example #20
Source File: HadoopUtils.java    From incubator-gobblin with Apache License 2.0 2 votes vote down vote up
/**
 * Moves the object to the filesystem trash according to the file system policy.
 * @param fs FileSystem object
 * @param path Path to the object to be moved to trash.
 * @throws IOException
 */
public static void moveToTrash(FileSystem fs, Path path) throws IOException {
  Trash trash = new Trash(fs, new Configuration());
  trash.moveToTrash(path);
}