Java Code Examples for org.apache.hadoop.util.LineReader#readLine()
The following examples show how to use
org.apache.hadoop.util.LineReader#readLine() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultiFileWordCount.java From big-c with Apache License 2.0 | 6 votes |
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); boolean skipFirstLine = false; //open the file fileIn = fs.open(path); if (startOffset != 0) { skipFirstLine = true; --startOffset; fileIn.seek(startOffset); } reader = new LineReader(fileIn); if (skipFirstLine) { // skip first line and re-establish "startOffset". startOffset += reader.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - startOffset)); } this.pos = startOffset; }
Example 2
Source File: TestMRKeyValueTextInputFormat.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testNewLines() throws Exception { LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee"); Text out = new Text(); in.readLine(out); assertEquals("line1 length", 1, out.getLength()); in.readLine(out); assertEquals("line2 length", 2, out.getLength()); in.readLine(out); assertEquals("line3 length", 0, out.getLength()); in.readLine(out); assertEquals("line4 length", 3, out.getLength()); in.readLine(out); assertEquals("line5 length", 4, out.getLength()); in.readLine(out); assertEquals("line5 length", 5, out.getLength()); assertEquals("end of file", 0, in.readLine(out)); }
Example 3
Source File: TestTextInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Test readLine for correct interpretation of maxLineLength * (returned string should be clipped at maxLineLength, and the * remaining bytes on the same line should be thrown out). * Also check that returned value matches the string length. * Varies buffer size to stress test. * * @throws Exception */ @Test (timeout=5000) public void testMaxLineLength() throws Exception { final String STR = "a\nbb\n\nccc\rdddd\r\neeeee"; final int STRLENBYTES = STR.getBytes().length; Text out = new Text(); for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) { LineReader in = makeStream(STR, bufsz); int c = 0; c += in.readLine(out, 1); assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength()); c += in.readLine(out, 1); assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength()); c += in.readLine(out, 1); assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength()); c += in.readLine(out, 3); assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength()); c += in.readLine(out, 10); assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength()); c += in.readLine(out, 8); assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength()); assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out)); assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES); } }
Example 4
Source File: TestTextInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Test readLine for correct interpretation of maxLineLength * (returned string should be clipped at maxLineLength, and the * remaining bytes on the same line should be thrown out). * Also check that returned value matches the string length. * Varies buffer size to stress test. * * @throws Exception */ @Test (timeout=5000) public void testMaxLineLength() throws Exception { final String STR = "a\nbb\n\nccc\rdddd\r\neeeee"; final int STRLENBYTES = STR.getBytes().length; Text out = new Text(); for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) { LineReader in = makeStream(STR, bufsz); int c = 0; c += in.readLine(out, 1); assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength()); c += in.readLine(out, 1); assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength()); c += in.readLine(out, 1); assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength()); c += in.readLine(out, 3); assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength()); c += in.readLine(out, 10); assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength()); c += in.readLine(out, 8); assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength()); assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out)); assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES); } }
Example 5
Source File: TestStreamedMerge.java From hadoop-gpu with Apache License 2.0 | 6 votes |
@Override public void run() { try { in_ = connectInputStream(); LineReader lineReader = new LineReader((InputStream)in_, conf_); Text line = new Text(); while (lineReader.readLine(line) > 0) { buf_.append(line.toString()); buf_.append('\n'); line.clear(); } lineReader.close(); in_.close(); } catch (IOException io) { throw new RuntimeException(io); } }
Example 6
Source File: TestTextInputFormat.java From RDFS with Apache License 2.0 | 6 votes |
/** * Test readLine for correct interpretation of maxLineLength * (returned string should be clipped at maxLineLength, and the * remaining bytes on the same line should be thrown out). * Also check that returned value matches the string length. * Varies buffer size to stress test. * * @throws Exception */ public void testMaxLineLength() throws Exception { final String STR = "a\nbb\n\nccc\rdddd\r\neeeee"; final int STRLENBYTES = STR.getBytes().length; Text out = new Text(); for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) { LineReader in = makeStream(STR, bufsz); int c = 0; c += in.readLine(out, 1); assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength()); c += in.readLine(out, 1); assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength()); c += in.readLine(out, 1); assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength()); c += in.readLine(out, 3); assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength()); c += in.readLine(out, 10); assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength()); c += in.readLine(out, 8); assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength()); assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out)); assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES); } }
Example 7
Source File: HarFileSystem.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public int getHarVersion() throws IOException { FSDataInputStream masterIn = fs.open(masterIndex); LineReader lmaster = new LineReader(masterIn, getConf()); Text line = new Text(); lmaster.readLine(line); try { masterIn.close(); } catch(IOException e){ //disregard it. // its a read. } String versionLine = line.toString(); String[] arr = versionLine.split(" "); int version = Integer.parseInt(arr[0]); return version; }
Example 8
Source File: TestKeyValueTextInputFormat.java From big-c with Apache License 2.0 | 6 votes |
public void testUTF8() throws Exception { LineReader in = null; try { in = makeStream("abcd\u20acbdcd\u20ac"); Text line = new Text(); in.readLine(line); assertEquals("readLine changed utf8 characters", "abcd\u20acbdcd\u20ac", line.toString()); in = makeStream("abc\u200axyz"); in.readLine(line); assertEquals("split on fake newline", "abc\u200axyz", line.toString()); } finally { if (in != null) { in.close(); } } }
Example 9
Source File: CompactionTool.java From hbase with Apache License 2.0 | 6 votes |
/** * Returns a split for each store files directory using the block location * of each file as locality reference. */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<>(); List<FileStatus> files = listStatus(job); Text key = new Text(); for (FileStatus file: files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); LineReader reader = new LineReader(fs.open(path)); long pos = 0; int n; try { while ((n = reader.readLine(key)) > 0) { String[] hosts = getStoreDirHosts(fs, path); splits.add(new FileSplit(path, pos, n, hosts)); pos += n; } } finally { reader.close(); } } return splits; }
Example 10
Source File: Hadoop20JHParser.java From hadoop with Apache License 2.0 | 5 votes |
/** * Can this parser parse the input? * * @param input * @return Whether this parser can parse the input. * @throws IOException * * We will deem a stream to be a good 0.20 job history stream if the * first line is exactly "Meta VERSION=\"1\" ." */ public static boolean canParse(InputStream input) throws IOException { try { LineReader reader = new LineReader(input); Text buffer = new Text(); return reader.readLine(buffer) != 0 && buffer.toString().equals("Meta VERSION=\"1\" ."); } catch (EOFException e) { return false; } }
Example 11
Source File: Hadoop20JHParser.java From big-c with Apache License 2.0 | 5 votes |
/** * Can this parser parse the input? * * @param input * @return Whether this parser can parse the input. * @throws IOException * * We will deem a stream to be a good 0.20 job history stream if the * first line is exactly "Meta VERSION=\"1\" ." */ public static boolean canParse(InputStream input) throws IOException { try { LineReader reader = new LineReader(input); Text buffer = new Text(); return reader.readLine(buffer) != 0 && buffer.toString().equals("Meta VERSION=\"1\" ."); } catch (EOFException e) { return false; } }
Example 12
Source File: TestKeyValueTextInputFormat.java From RDFS with Apache License 2.0 | 5 votes |
public void testUTF8() throws Exception { LineReader in = makeStream("abcd\u20acbdcd\u20ac"); Text line = new Text(); in.readLine(line); assertEquals("readLine changed utf8 characters", "abcd\u20acbdcd\u20ac", line.toString()); in = makeStream("abc\u200axyz"); in.readLine(line); assertEquals("split on fake newline", "abc\u200axyz", line.toString()); }
Example 13
Source File: TestTextInputFormat.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Parse the command line arguments into lines and display the result. * @param args * @throws Exception */ public static void main(String[] args) throws Exception { for(String arg: args) { System.out.println("Working on " + arg); LineReader reader = makeStream(unquote(arg)); Text line = new Text(); int size = reader.readLine(line); while (size > 0) { System.out.println("Got: " + line.toString()); size = reader.readLine(line); } reader.close(); } }
Example 14
Source File: TestConcatenatedCompressedInput.java From hadoop with Apache License 2.0 | 5 votes |
/** * Parse the command line arguments into lines and display the result. * @param args * @throws Exception */ public static void main(String[] args) throws Exception { for(String arg: args) { System.out.println("Working on " + arg); LineReader reader = makeStream(unquote(arg)); Text line = new Text(); int size = reader.readLine(line); while (size > 0) { System.out.println("Got: " + line.toString()); size = reader.readLine(line); } reader.close(); } }
Example 15
Source File: TestTextInputFormat.java From RDFS with Apache License 2.0 | 5 votes |
public void testUTF8() throws Exception { LineReader in = makeStream("abcd\u20acbdcd\u20ac"); Text line = new Text(); in.readLine(line); assertEquals("readLine changed utf8 characters", "abcd\u20acbdcd\u20ac", line.toString()); in = makeStream("abc\u200axyz"); in.readLine(line); assertEquals("split on fake newline", "abc\u200axyz", line.toString()); }
Example 16
Source File: TestMRKeyValueTextInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
@Test public void testUTF8() throws Exception { LineReader in = makeStream("abcd\u20acbdcd\u20ac"); Text line = new Text(); in.readLine(line); assertEquals("readLine changed utf8 characters", "abcd\u20acbdcd\u20ac", line.toString()); in = makeStream("abc\u200axyz"); in.readLine(line); assertEquals("split on fake newline", "abc\u200axyz", line.toString()); }
Example 17
Source File: TestMRKeyValueTextInputFormat.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testUTF8() throws Exception { LineReader in = makeStream("abcd\u20acbdcd\u20ac"); Text line = new Text(); in.readLine(line); assertEquals("readLine changed utf8 characters", "abcd\u20acbdcd\u20ac", line.toString()); in = makeStream("abc\u200axyz"); in.readLine(line); assertEquals("split on fake newline", "abc\u200axyz", line.toString()); }
Example 18
Source File: TestCodec.java From big-c with Apache License 2.0 | 4 votes |
private void testSplitableCodec( Class<? extends SplittableCompressionCodec> codecClass) throws IOException { final long DEFLBYTES = 2 * 1024 * 1024; final Configuration conf = new Configuration(); final Random rand = new Random(); final long seed = rand.nextLong(); LOG.info("seed: " + seed); rand.setSeed(seed); SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); final FileSystem fs = FileSystem.getLocal(conf); final FileStatus infile = fs.getFileStatus(writeSplitTestFile(fs, rand, codec, DEFLBYTES)); if (infile.getLen() > Integer.MAX_VALUE) { fail("Unexpected compression: " + DEFLBYTES + " -> " + infile.getLen()); } final int flen = (int) infile.getLen(); final Text line = new Text(); final Decompressor dcmp = CodecPool.getDecompressor(codec); try { for (int pos = 0; pos < infile.getLen(); pos += rand.nextInt(flen / 8)) { // read from random positions, verifying that there exist two sequential // lines as written in writeSplitTestFile final SplitCompressionInputStream in = codec.createInputStream(fs.open(infile.getPath()), dcmp, pos, flen, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (in.getAdjustedStart() >= flen) { break; } LOG.info("SAMPLE " + in.getAdjustedStart() + "," + in.getAdjustedEnd()); final LineReader lreader = new LineReader(in); lreader.readLine(line); // ignore; likely partial if (in.getPos() >= flen) { break; } lreader.readLine(line); final int seq1 = readLeadingInt(line); lreader.readLine(line); if (in.getPos() >= flen) { break; } final int seq2 = readLeadingInt(line); assertEquals("Mismatched lines", seq1 + 1, seq2); } } finally { CodecPool.returnDecompressor(dcmp); } // remove on success fs.delete(infile.getPath().getParent(), true); }
Example 19
Source File: StreamKeyValUtil.java From hadoop with Apache License 2.0 | 2 votes |
/** * Read a utf8 encoded line from a data input stream. * @param lineReader LineReader to read the line from. * @param out Text to read into * @return number of bytes read * @throws IOException */ public static int readLine(LineReader lineReader, Text out) throws IOException { out.clear(); return lineReader.readLine(out); }
Example 20
Source File: StreamKeyValUtil.java From hadoop-gpu with Apache License 2.0 | 2 votes |
/** * Read a utf8 encoded line from a data input stream. * @param lineReader LineReader to read the line from. * @param out Text to read into * @return number of bytes read * @throws IOException */ public static int readLine(LineReader lineReader, Text out) throws IOException { out.clear(); return lineReader.readLine(out); }