Java Code Examples for org.apache.hadoop.io.Text#compareTo()

The following examples show how to use org.apache.hadoop.io.Text#compareTo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TeraValidate.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, 
                   Reporter reporter) throws IOException {
  if (error.equals(key)) {
    while(values.hasNext()) {
      output.collect(key, values.next());
    }
  } else {
    Text value = values.next();
    if (firstKey) {
      firstKey = false;
    } else {
      if (value.compareTo(lastValue) < 0) {
        output.collect(error, 
                       new Text("misordered keys last: " + 
                                lastKey + " '" + lastValue +
                                "' current: " + key + " '" + value + "'"));
      }
    }
    lastKey.set(key);
    lastValue.set(value);
  }
}
 
Example 2
Source File: TestIFile.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKeys,
    List<KVPair> data, CompressionCodec codec) throws IOException {
  assertNotNull(writer);

  Text previousKey = null;
  for (KVPair kvp : data) {
    if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
      //RLE is enabled in IFile when IFile.REPEAT_KEY is set
      writer.append(IFile.REPEAT_KEY, kvp.getvalue());
    } else {
      writer.append(kvp.getKey(), kvp.getvalue());
    }
    previousKey = kvp.getKey();
  }

  writer.close();

  LOG.info("Uncompressed: " + writer.getRawLength());
  LOG.info("CompressedSize: " + writer.getCompressedLength());

  return writer;
}
 
Example 3
Source File: TestIFile.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Test
//Test appendValue feature
public void testAppendValue() throws IOException {
  List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
  IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath,
      Text.class, IntWritable.class, codec, null, null);

  Text previousKey = null;
  for (KVPair kvp : data) {
    if ((previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
      writer.appendValue(kvp.getvalue());
    } else {
      writer.append(kvp.getKey(), kvp.getvalue());
    }
    previousKey = kvp.getKey();
  }

  writer.close();

  readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
 
Example 4
Source File: TeraValidate.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, 
                   Reporter reporter) throws IOException {
  if (error.equals(key)) {
    while(values.hasNext()) {
      output.collect(key, values.next());
    }
  } else {
    Text value = values.next();
    if (firstKey) {
      firstKey = false;
    } else {
      if (value.compareTo(lastValue) < 0) {
        output.collect(error, 
                       new Text("misordered keys last: " + 
                                lastKey + " '" + lastValue +
                                "' current: " + key + " '" + value + "'"));
      }
    }
    lastKey.set(key);
    lastValue.set(value);
  }
}
 
Example 5
Source File: TestIFile.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
//Test appendValue feature
public void testAppendValue() throws IOException {
  List<KVPair> data = KVDataGen.generateTestData(false, rnd.nextInt(100));
  IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath,
      Text.class, IntWritable.class, codec, null, null);

  Text previousKey = null;
  for (KVPair kvp : data) {
    if ((previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
      writer.appendValue(kvp.getvalue());
    } else {
      writer.append(kvp.getKey(), kvp.getvalue());
    }
    previousKey = kvp.getKey();
  }

  writer.close();

  readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec);
}
 
Example 6
Source File: TestIFile.java    From tez with Apache License 2.0 6 votes vote down vote up
private Writer writeTestFile(IFile.Writer writer, boolean repeatKeys,
    List<KVPair> data) throws IOException {
  assertNotNull(writer);

  Text previousKey = null;
  for (KVPair kvp : data) {
    if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
      //RLE is enabled in IFile when IFile.REPEAT_KEY is set
      writer.append(IFile.REPEAT_KEY, kvp.getvalue());
    } else {
      writer.append(kvp.getKey(), kvp.getvalue());
    }
    previousKey = kvp.getKey();
  }

  writer.close();

  LOG.info("Uncompressed: " + writer.getRawLength());
  LOG.info("CompressedSize: " + writer.getCompressedLength());

  return writer;
}
 
Example 7
Source File: MergeSortRowIdMatcher.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void advanceReadersIfNeeded(Text rowId) throws IOException {
  _progressable.progress();
  for (MyReader reader : _readers) {
    if (rowId.compareTo(reader.getCurrentRowId()) > 0) {
      advanceReader(reader, rowId);
    }
  }
}
 
Example 8
Source File: TeraInputFormat.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public int compare(int i, int j) {
  Text left = records.get(i);
  Text right = records.get(j);
  return left.compareTo(right);
}
 
Example 9
Source File: AccumuloClient.java    From presto with Apache License 2.0 4 votes vote down vote up
/**
 * Gets the TabletServer hostname for where the given key is located in the given table
 *
 * @param table Fully-qualified table name
 * @param key Key to locate
 * @return The tablet location, or DUMMY_LOCATION if an error occurs
 */
private Optional<String> getTabletLocation(String table, Key key)
{
    try {
        // Get the Accumulo table ID so we can scan some fun stuff
        String tableId = connector.tableOperations().tableIdMap().get(table);

        // Create our scanner against the metadata table, fetching 'loc' family
        Scanner scanner = connector.createScanner("accumulo.metadata", auths);
        scanner.fetchColumnFamily(new Text("loc"));

        // Set the scan range to just this table, from the table ID to the default tablet
        // row, which is the last listed tablet
        Key defaultTabletRow = new Key(tableId + '<');
        Key start = new Key(tableId);
        Key end = defaultTabletRow.followingKey(PartialKey.ROW);
        scanner.setRange(new Range(start, end));

        Optional<String> location = Optional.empty();
        if (key == null) {
            // if the key is null, then it is -inf, so get first tablet location
            Iterator<Entry<Key, Value>> iter = scanner.iterator();
            if (iter.hasNext()) {
                location = Optional.of(iter.next().getValue().toString());
            }
        }
        else {
            // Else, we will need to scan through the tablet location data and find the location

            // Create some text objects to do comparison for what we are looking for
            Text splitCompareKey = new Text();
            key.getRow(splitCompareKey);
            Text scannedCompareKey = new Text();

            // Scan the table!
            for (Entry<Key, Value> entry : scanner) {
                // Get the bytes of the key
                byte[] keyBytes = entry.getKey().getRow().copyBytes();

                // If the last byte is <, then we have hit the default tablet, so use this location
                if (keyBytes[keyBytes.length - 1] == '<') {
                    location = Optional.of(entry.getValue().toString());
                    break;
                }
                else {
                    // Chop off some magic nonsense
                    scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3);

                    // Compare the keys, moving along the tablets until the location is found
                    if (scannedCompareKey.getLength() > 0) {
                        int compareTo = splitCompareKey.compareTo(scannedCompareKey);
                        if (compareTo <= 0) {
                            location = Optional.of(entry.getValue().toString());
                        }
                        else {
                            // all future tablets will be greater than this key
                            break;
                        }
                    }
                }
            }
            scanner.close();
        }

        // If we were unable to find the location for some reason, return the default tablet
        // location
        return location.isPresent() ? location : getDefaultTabletLocation(table);
    }
    catch (Exception e) {
        // Swallow this exception so the query does not fail due to being unable
        // to locate the tablet server for the provided Key.
        // This is purely an optimization, but we will want to log the error.
        LOG.error("Failed to get tablet location, returning dummy location", e);
        return Optional.empty();
    }
}
 
Example 10
Source File: DistCp.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (!dstroot.isDir()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf,
      dstlsr, Text.class, FileStatus.class,
      SequenceFile.CompressionType.NONE);
  try {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDir()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), child);
          lsrstack.push(child);
        }
      }
    }
  } finally {
    checkAndClose(writer);
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, FileStatus.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  SequenceFile.Reader lsrin = null;
  SequenceFile.Reader dstin = null;
  try {
    lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
    dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final FileStatus lsrstatus = new FileStatus();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final FsShell shell = new FsShell(conf);
    final String[] shellargs = {"-rmr", null};

    boolean hasnext = dstin.next(dstpath, dstfrom);
    for(; lsrin.next(lsrpath, lsrstatus); ) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      for(; hasnext && dst_cmp_lsr < 0; ) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      }
      else {
        //lsrpath does not exist, delete it
        String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
        if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
          shellargs[1] = s;
          int r = 0;
          try {
             r = shell.run(shellargs);
          } catch(Exception e) {
            throw new IOException("Exception from shell.", e);
          }
          if (r != 0) {
            throw new IOException("\"" + shellargs[0] + " " + shellargs[1]
                + "\" returns non-zero value " + r);
          }
        }
      }
    }
  } finally {
    checkAndClose(lsrin);
    checkAndClose(dstin);
  }
}
 
Example 11
Source File: TeraInputFormat.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public int compare(int i, int j) {
  Text left = records.get(i);
  Text right = records.get(j);
  return left.compareTo(right);
}
 
Example 12
Source File: TeraInputFormat.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public int compare(int i, int j) {
  Text left = records.get(i);
  Text right = records.get(j);
  return left.compareTo(right);
}
 
Example 13
Source File: TestTotalOrderPartitioner.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public int compare(Text a, Text b) {
  return -a.compareTo(b);
}
 
Example 14
Source File: CopyCommitter.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void deleteMissing(Configuration conf) throws IOException {
  LOG.info("-delete option is enabled. About to remove entries from " +
      "target that are missing in source");

  // Sort the source-file listing alphabetically.
  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

  // Similarly, create the listing of target-files. Sort alphabetically.
  Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
  CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);

  List<Path> targets = new ArrayList<Path>(1);
  Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targets.add(targetFinalPath);
  DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
  //
  // Set up options to be the same from the CopyListing.buildListing's perspective,
  // so to collect similar listings as when doing the copy
  //
  options.setOverwrite(overwrite);
  options.setSyncFolder(syncFolder);
  options.setTargetPathExists(targetPathExists);
  
  target.buildListing(targetListing, options);
  Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
  long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedSourceListing));
  SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedTargetListing));

  // Walk both source and target file listings.
  // Delete all from target that doesn't also exist on source.
  long deletedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
    Text trgtRelPath = new Text();

    FileSystem targetFS = targetFinalPath.getFileSystem(conf);
    boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
    while (targetReader.next(trgtRelPath, trgtFileStatus)) {
      // Skip sources that don't exist on target.
      while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
        srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
      }

      if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;

      // Target doesn't exist at source. Delete.
      boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
          targetFS.delete(trgtFileStatus.getPath(), true));
      if (result) {
        LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
        deletedEntries++;
      } else {
        throw new IOException("Unable to delete " + trgtFileStatus.getPath());
      }
      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Deleting missing files from target. [" +
          targetReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
    IOUtils.closeStream(targetReader);
  }
  LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
 
Example 15
Source File: DistCpV1.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the dst files/dirs which do not exist in src
 * 
 * @return total count of files and directories deleted from destination
 * @throws IOException
 */
static private long deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (dstroot.isFile()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf,
      Writer.file(dstlsr), Writer.keyClass(Text.class),
      Writer.valueClass(NullWritable.class), Writer.compression(
      SequenceFile.CompressionType.NONE))) {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDirectory()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), NullWritable.get());
          lsrstack.push(child);
        }
      }
    }
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, NullWritable.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  long deletedPathsCount = 0;
  try (SequenceFile.Reader lsrin =
           new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
       SequenceFile.Reader  dstin =
           new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final Trash trash = new Trash(dstfs, conf);
    Path lastpath = null;

    boolean hasnext = dstin.next(dstpath, dstfrom);
    while (lsrin.next(lsrpath, NullWritable.get())) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      while (hasnext && dst_cmp_lsr < 0) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      } else {
        //lsrpath does not exist, delete it
        final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
        ++deletedPathsCount;
        if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
          if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
            throw new IOException("Failed to delete " + rmpath);
          }
          lastpath = rmpath;
        }
      }
    }
  }
  return deletedPathsCount;
}
 
Example 16
Source File: TeraInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
public int compare(int i, int j) {
  Text left = records.get(i);
  Text right = records.get(j);
  return left.compareTo(right);
}
 
Example 17
Source File: TeraInputFormat.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
public int compare(int i, int j) {
  Text left = records.get(i);
  Text right = records.get(j);
  return left.compareTo(right);
}
 
Example 18
Source File: CopyCommitter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void deleteMissing(Configuration conf) throws IOException {
  LOG.info("-delete option is enabled. About to remove entries from " +
      "target that are missing in source");

  // Sort the source-file listing alphabetically.
  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

  // Similarly, create the listing of target-files. Sort alphabetically.
  Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
  CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);

  List<Path> targets = new ArrayList<Path>(1);
  Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targets.add(targetFinalPath);
  DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
  //
  // Set up options to be the same from the CopyListing.buildListing's perspective,
  // so to collect similar listings as when doing the copy
  //
  options.setOverwrite(overwrite);
  options.setSyncFolder(syncFolder);
  options.setTargetPathExists(targetPathExists);
  
  target.buildListing(targetListing, options);
  Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
  long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedSourceListing));
  SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedTargetListing));

  // Walk both source and target file listings.
  // Delete all from target that doesn't also exist on source.
  long deletedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
    Text trgtRelPath = new Text();

    FileSystem targetFS = targetFinalPath.getFileSystem(conf);
    boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
    while (targetReader.next(trgtRelPath, trgtFileStatus)) {
      // Skip sources that don't exist on target.
      while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
        srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
      }

      if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;

      // Target doesn't exist at source. Delete.
      boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
          targetFS.delete(trgtFileStatus.getPath(), true));
      if (result) {
        LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
        deletedEntries++;
      } else {
        throw new IOException("Unable to delete " + trgtFileStatus.getPath());
      }
      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Deleting missing files from target. [" +
          targetReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
    IOUtils.closeStream(targetReader);
  }
  LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
 
Example 19
Source File: DistCpV1.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the dst files/dirs which do not exist in src
 * 
 * @return total count of files and directories deleted from destination
 * @throws IOException
 */
static private long deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (dstroot.isFile()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf,
      Writer.file(dstlsr), Writer.keyClass(Text.class),
      Writer.valueClass(NullWritable.class), Writer.compression(
      SequenceFile.CompressionType.NONE))) {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDirectory()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), NullWritable.get());
          lsrstack.push(child);
        }
      }
    }
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, NullWritable.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  long deletedPathsCount = 0;
  try (SequenceFile.Reader lsrin =
           new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
       SequenceFile.Reader  dstin =
           new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final Trash trash = new Trash(dstfs, conf);
    Path lastpath = null;

    boolean hasnext = dstin.next(dstpath, dstfrom);
    while (lsrin.next(lsrpath, NullWritable.get())) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      while (hasnext && dst_cmp_lsr < 0) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      } else {
        //lsrpath does not exist, delete it
        final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
        ++deletedPathsCount;
        if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
          if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
            throw new IOException("Failed to delete " + rmpath);
          }
          lastpath = rmpath;
        }
      }
    }
  }
  return deletedPathsCount;
}
 
Example 20
Source File: TestTotalOrderPartitioner.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public int compare(Text a, Text b) {
  return -a.compareTo(b);
}