Java Code Examples for org.apache.hadoop.io.Text#Comparator
The following examples show how to use
org.apache.hadoop.io.Text#Comparator .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestValuesIterator.java From tez with Apache License 2.0 | 6 votes |
private RawComparator getComparator(TestWithComparator comparator) { switch (comparator) { case LONG: return new LongWritable.Comparator(); case INT: return new IntWritable.Comparator(); case BYTES: return new BytesWritable.Comparator(); case TEZ_BYTES: return new TezBytesComparator(); case TEXT: return new Text.Comparator(); case CUSTOM: return new CustomKey.Comparator(); default: return null; } }
Example 2
Source File: DistCpV1.java From hadoop with Apache License 2.0 | 4 votes |
/** * Delete the dst files/dirs which do not exist in src * * @return total count of files and directories deleted from destination * @throws IOException */ static private long deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (dstroot.isFile()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf, Writer.file(dstlsr), Writer.keyClass(Text.class), Writer.valueClass(NullWritable.class), Writer.compression( SequenceFile.CompressionType.NONE))) { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDirectory()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), NullWritable.get()); lsrstack.push(child); } } } } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, NullWritable.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list long deletedPathsCount = 0; try (SequenceFile.Reader lsrin = new SequenceFile.Reader(jobconf, Reader.file(sortedlsr)); SequenceFile.Reader dstin = new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) { //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final Trash trash = new Trash(dstfs, conf); Path lastpath = null; boolean hasnext = dstin.next(dstpath, dstfrom); while (lsrin.next(lsrpath, NullWritable.get())) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); while (hasnext && dst_cmp_lsr < 0) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString()); ++deletedPathsCount; if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) { if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) { throw new IOException("Failed to delete " + rmpath); } lastpath = rmpath; } } } } return deletedPathsCount; }
Example 3
Source File: DistCpV1.java From big-c with Apache License 2.0 | 4 votes |
/** * Delete the dst files/dirs which do not exist in src * * @return total count of files and directories deleted from destination * @throws IOException */ static private long deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (dstroot.isFile()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf, Writer.file(dstlsr), Writer.keyClass(Text.class), Writer.valueClass(NullWritable.class), Writer.compression( SequenceFile.CompressionType.NONE))) { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDirectory()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), NullWritable.get()); lsrstack.push(child); } } } } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, NullWritable.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list long deletedPathsCount = 0; try (SequenceFile.Reader lsrin = new SequenceFile.Reader(jobconf, Reader.file(sortedlsr)); SequenceFile.Reader dstin = new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) { //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final Trash trash = new Trash(dstfs, conf); Path lastpath = null; boolean hasnext = dstin.next(dstpath, dstfrom); while (lsrin.next(lsrpath, NullWritable.get())) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); while (hasnext && dst_cmp_lsr < 0) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString()); ++deletedPathsCount; if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) { if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) { throw new IOException("Failed to delete " + rmpath); } lastpath = rmpath; } } } } return deletedPathsCount; }
Example 4
Source File: PigTextRawComparator.java From spork with Apache License 2.0 | 4 votes |
public PigTextRawComparator() { super(NullableText.class); mWrappedComp = new Text.Comparator(); }
Example 5
Source File: DistCp.java From RDFS with Apache License 2.0 | 4 votes |
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = {"-rmr", null}; boolean hasnext = dstin.next(dstpath, dstfrom); for(; lsrin.next(lsrpath, lsrstatus); ) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for(; hasnext && dst_cmp_lsr < 0; ) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch(Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException("\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
Example 6
Source File: DistCp.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = {"-rmr", null}; boolean hasnext = dstin.next(dstpath, dstfrom); for(; lsrin.next(lsrpath, lsrstatus); ) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for(; hasnext && dst_cmp_lsr < 0; ) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch(Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException("\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }