Java Code Examples for org.apache.hadoop.io.Text#Comparator

The following examples show how to use org.apache.hadoop.io.Text#Comparator . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestValuesIterator.java    From tez with Apache License 2.0 6 votes vote down vote up
private RawComparator getComparator(TestWithComparator comparator) {
  switch (comparator) {
  case LONG:
    return new LongWritable.Comparator();
  case INT:
    return new IntWritable.Comparator();
  case BYTES:
    return new BytesWritable.Comparator();
  case TEZ_BYTES:
    return new TezBytesComparator();
  case TEXT:
    return new Text.Comparator();
  case CUSTOM:
    return new CustomKey.Comparator();
  default:
    return null;
  }
}
 
Example 2
Source File: DistCpV1.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the dst files/dirs which do not exist in src
 * 
 * @return total count of files and directories deleted from destination
 * @throws IOException
 */
static private long deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (dstroot.isFile()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf,
      Writer.file(dstlsr), Writer.keyClass(Text.class),
      Writer.valueClass(NullWritable.class), Writer.compression(
      SequenceFile.CompressionType.NONE))) {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDirectory()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), NullWritable.get());
          lsrstack.push(child);
        }
      }
    }
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, NullWritable.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  long deletedPathsCount = 0;
  try (SequenceFile.Reader lsrin =
           new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
       SequenceFile.Reader  dstin =
           new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final Trash trash = new Trash(dstfs, conf);
    Path lastpath = null;

    boolean hasnext = dstin.next(dstpath, dstfrom);
    while (lsrin.next(lsrpath, NullWritable.get())) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      while (hasnext && dst_cmp_lsr < 0) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      } else {
        //lsrpath does not exist, delete it
        final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
        ++deletedPathsCount;
        if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
          if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
            throw new IOException("Failed to delete " + rmpath);
          }
          lastpath = rmpath;
        }
      }
    }
  }
  return deletedPathsCount;
}
 
Example 3
Source File: DistCpV1.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Delete the dst files/dirs which do not exist in src
 * 
 * @return total count of files and directories deleted from destination
 * @throws IOException
 */
static private long deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (dstroot.isFile()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf,
      Writer.file(dstlsr), Writer.keyClass(Text.class),
      Writer.valueClass(NullWritable.class), Writer.compression(
      SequenceFile.CompressionType.NONE))) {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDirectory()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), NullWritable.get());
          lsrstack.push(child);
        }
      }
    }
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, NullWritable.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  long deletedPathsCount = 0;
  try (SequenceFile.Reader lsrin =
           new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
       SequenceFile.Reader  dstin =
           new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final Trash trash = new Trash(dstfs, conf);
    Path lastpath = null;

    boolean hasnext = dstin.next(dstpath, dstfrom);
    while (lsrin.next(lsrpath, NullWritable.get())) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      while (hasnext && dst_cmp_lsr < 0) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      } else {
        //lsrpath does not exist, delete it
        final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
        ++deletedPathsCount;
        if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
          if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
            throw new IOException("Failed to delete " + rmpath);
          }
          lastpath = rmpath;
        }
      }
    }
  }
  return deletedPathsCount;
}
 
Example 4
Source File: PigTextRawComparator.java    From spork with Apache License 2.0 4 votes vote down vote up
public PigTextRawComparator() {
    super(NullableText.class);
    mWrappedComp = new Text.Comparator();
}
 
Example 5
Source File: DistCp.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (!dstroot.isDir()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf,
      dstlsr, Text.class, FileStatus.class,
      SequenceFile.CompressionType.NONE);
  try {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDir()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), child);
          lsrstack.push(child);
        }
      }
    }
  } finally {
    checkAndClose(writer);
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, FileStatus.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  SequenceFile.Reader lsrin = null;
  SequenceFile.Reader dstin = null;
  try {
    lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
    dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final FileStatus lsrstatus = new FileStatus();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final FsShell shell = new FsShell(conf);
    final String[] shellargs = {"-rmr", null};

    boolean hasnext = dstin.next(dstpath, dstfrom);
    for(; lsrin.next(lsrpath, lsrstatus); ) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      for(; hasnext && dst_cmp_lsr < 0; ) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      }
      else {
        //lsrpath does not exist, delete it
        String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
        if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
          shellargs[1] = s;
          int r = 0;
          try {
             r = shell.run(shellargs);
          } catch(Exception e) {
            throw new IOException("Exception from shell.", e);
          }
          if (r != 0) {
            throw new IOException("\"" + shellargs[0] + " " + shellargs[1]
                + "\" returns non-zero value " + r);
          }
        }
      }
    }
  } finally {
    checkAndClose(lsrin);
    checkAndClose(dstin);
  }
}
 
Example 6
Source File: DistCp.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (!dstroot.isDir()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf,
      dstlsr, Text.class, FileStatus.class,
      SequenceFile.CompressionType.NONE);
  try {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDir()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), child);
          lsrstack.push(child);
        }
      }
    }
  } finally {
    checkAndClose(writer);
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, FileStatus.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  SequenceFile.Reader lsrin = null;
  SequenceFile.Reader dstin = null;
  try {
    lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
    dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final FileStatus lsrstatus = new FileStatus();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final FsShell shell = new FsShell(conf);
    final String[] shellargs = {"-rmr", null};

    boolean hasnext = dstin.next(dstpath, dstfrom);
    for(; lsrin.next(lsrpath, lsrstatus); ) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      for(; hasnext && dst_cmp_lsr < 0; ) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      }
      else {
        //lsrpath does not exist, delete it
        String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
        if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
          shellargs[1] = s;
          int r = 0;
          try {
             r = shell.run(shellargs);
          } catch(Exception e) {
            throw new IOException("Exception from shell.", e);
          }
          if (r != 0) {
            throw new IOException("\"" + shellargs[0] + " " + shellargs[1]
                + "\" returns non-zero value " + r);
          }
        }
      }
    }
  } finally {
    checkAndClose(lsrin);
    checkAndClose(dstin);
  }
}