Java Code Examples for org.apache.hadoop.util.LineReader#readLine()

The following examples show how to use org.apache.hadoop.util.LineReader#readLine() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MultiFileWordCount.java    From big-c with Apache License 2.0 6 votes vote down vote up
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
 
Example 2
Source File: TestMRKeyValueTextInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testNewLines() throws Exception {
  LineReader in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
  Text out = new Text();
  in.readLine(out);
  assertEquals("line1 length", 1, out.getLength());
  in.readLine(out);
  assertEquals("line2 length", 2, out.getLength());
  in.readLine(out);
  assertEquals("line3 length", 0, out.getLength());
  in.readLine(out);
  assertEquals("line4 length", 3, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 4, out.getLength());
  in.readLine(out);
  assertEquals("line5 length", 5, out.getLength());
  assertEquals("end of file", 0, in.readLine(out));
}
 
Example 3
Source File: TestTextInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
 
Example 4
Source File: TestTextInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
@Test (timeout=5000)
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
 
Example 5
Source File: TestStreamedMerge.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
@Override
public void run() {
  try {
    in_ = connectInputStream();
    LineReader lineReader = new LineReader((InputStream)in_, conf_);
    Text line = new Text();
    while (lineReader.readLine(line) > 0) {
      buf_.append(line.toString());
      buf_.append('\n');
      line.clear();
    }
    lineReader.close();
    in_.close();
  } catch (IOException io) {
    throw new RuntimeException(io);
  }
}
 
Example 6
Source File: TestTextInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Test readLine for correct interpretation of maxLineLength
 * (returned string should be clipped at maxLineLength, and the
 * remaining bytes on the same line should be thrown out).
 * Also check that returned value matches the string length.
 * Varies buffer size to stress test.
 *
 * @throws Exception
 */
public void testMaxLineLength() throws Exception {
  final String STR = "a\nbb\n\nccc\rdddd\r\neeeee";
  final int STRLENBYTES = STR.getBytes().length;
  Text out = new Text();
  for (int bufsz = 1; bufsz < STRLENBYTES+1; ++bufsz) {
    LineReader in = makeStream(STR, bufsz);
    int c = 0;
    c += in.readLine(out, 1);
    assertEquals("line1 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line2 length, bufsz: "+bufsz, 1, out.getLength());
    c += in.readLine(out, 1);
    assertEquals("line3 length, bufsz: "+bufsz, 0, out.getLength());
    c += in.readLine(out, 3);
    assertEquals("line4 length, bufsz: "+bufsz, 3, out.getLength());
    c += in.readLine(out, 10);
    assertEquals("line5 length, bufsz: "+bufsz, 4, out.getLength());
    c += in.readLine(out, 8);
    assertEquals("line5 length, bufsz: "+bufsz, 5, out.getLength());
    assertEquals("end of file, bufsz: " +bufsz, 0, in.readLine(out));
    assertEquals("total bytes, bufsz: "+bufsz, c, STRLENBYTES);
  }
}
 
Example 7
Source File: HarFileSystem.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public int getHarVersion() throws IOException { 
  FSDataInputStream masterIn = fs.open(masterIndex);
  LineReader lmaster = new LineReader(masterIn, getConf());
  Text line = new Text();
  lmaster.readLine(line);
  try {
    masterIn.close();
  } catch(IOException e){
    //disregard it.
    // its a read.
  }
  String versionLine = line.toString();
  String[] arr = versionLine.split(" ");
  int version = Integer.parseInt(arr[0]);
  return version;
}
 
Example 8
Source File: TestKeyValueTextInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void testUTF8() throws Exception {
  LineReader in = null;

  try {
    in = makeStream("abcd\u20acbdcd\u20ac");
    Text line = new Text();
    in.readLine(line);
    assertEquals("readLine changed utf8 characters",
                 "abcd\u20acbdcd\u20ac", line.toString());
    in = makeStream("abc\u200axyz");
    in.readLine(line);
    assertEquals("split on fake newline", "abc\u200axyz", line.toString());
  } finally {
    if (in != null) {
      in.close();
    }
  }
}
 
Example 9
Source File: CompactionTool.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<>();
  List<FileStatus> files = listStatus(job);

  Text key = new Text();
  for (FileStatus file: files) {
    Path path = file.getPath();
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    LineReader reader = new LineReader(fs.open(path));
    long pos = 0;
    int n;
    try {
      while ((n = reader.readLine(key)) > 0) {
        String[] hosts = getStoreDirHosts(fs, path);
        splits.add(new FileSplit(path, pos, n, hosts));
        pos += n;
      }
    } finally {
      reader.close();
    }
  }

  return splits;
}
 
Example 10
Source File: Hadoop20JHParser.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Can this parser parse the input?
 * 
 * @param input
 * @return Whether this parser can parse the input.
 * @throws IOException
 * 
 *           We will deem a stream to be a good 0.20 job history stream if the
 *           first line is exactly "Meta VERSION=\"1\" ."
 */
public static boolean canParse(InputStream input) throws IOException {
  try {
    LineReader reader = new LineReader(input);

    Text buffer = new Text();

    return reader.readLine(buffer) != 0
        && buffer.toString().equals("Meta VERSION=\"1\" .");
  } catch (EOFException e) {
    return false;
  }
}
 
Example 11
Source File: Hadoop20JHParser.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Can this parser parse the input?
 * 
 * @param input
 * @return Whether this parser can parse the input.
 * @throws IOException
 * 
 *           We will deem a stream to be a good 0.20 job history stream if the
 *           first line is exactly "Meta VERSION=\"1\" ."
 */
public static boolean canParse(InputStream input) throws IOException {
  try {
    LineReader reader = new LineReader(input);

    Text buffer = new Text();

    return reader.readLine(buffer) != 0
        && buffer.toString().equals("Meta VERSION=\"1\" .");
  } catch (EOFException e) {
    return false;
  }
}
 
Example 12
Source File: TestKeyValueTextInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testUTF8() throws Exception {
  LineReader in = makeStream("abcd\u20acbdcd\u20ac");
  Text line = new Text();
  in.readLine(line);
  assertEquals("readLine changed utf8 characters", 
               "abcd\u20acbdcd\u20ac", line.toString());
  in = makeStream("abc\u200axyz");
  in.readLine(line);
  assertEquals("split on fake newline", "abc\u200axyz", line.toString());
}
 
Example 13
Source File: TestTextInputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Parse the command line arguments into lines and display the result.
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
  for(String arg: args) {
    System.out.println("Working on " + arg);
    LineReader reader = makeStream(unquote(arg));
    Text line = new Text();
    int size = reader.readLine(line);
    while (size > 0) {
      System.out.println("Got: " + line.toString());
      size = reader.readLine(line);
    }
    reader.close();
  }
}
 
Example 14
Source File: TestConcatenatedCompressedInput.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Parse the command line arguments into lines and display the result.
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
  for(String arg: args) {
    System.out.println("Working on " + arg);
    LineReader reader = makeStream(unquote(arg));
    Text line = new Text();
    int size = reader.readLine(line);
    while (size > 0) {
      System.out.println("Got: " + line.toString());
      size = reader.readLine(line);
    }
    reader.close();
  }
}
 
Example 15
Source File: TestTextInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testUTF8() throws Exception {
  LineReader in = makeStream("abcd\u20acbdcd\u20ac");
  Text line = new Text();
  in.readLine(line);
  assertEquals("readLine changed utf8 characters", 
               "abcd\u20acbdcd\u20ac", line.toString());
  in = makeStream("abc\u200axyz");
  in.readLine(line);
  assertEquals("split on fake newline", "abc\u200axyz", line.toString());
}
 
Example 16
Source File: TestMRKeyValueTextInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testUTF8() throws Exception {
  LineReader in = makeStream("abcd\u20acbdcd\u20ac");
  Text line = new Text();
  in.readLine(line);
  assertEquals("readLine changed utf8 characters", 
               "abcd\u20acbdcd\u20ac", line.toString());
  in = makeStream("abc\u200axyz");
  in.readLine(line);
  assertEquals("split on fake newline", "abc\u200axyz", line.toString());
}
 
Example 17
Source File: TestMRKeyValueTextInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testUTF8() throws Exception {
  LineReader in = makeStream("abcd\u20acbdcd\u20ac");
  Text line = new Text();
  in.readLine(line);
  assertEquals("readLine changed utf8 characters", 
               "abcd\u20acbdcd\u20ac", line.toString());
  in = makeStream("abc\u200axyz");
  in.readLine(line);
  assertEquals("split on fake newline", "abc\u200axyz", line.toString());
}
 
Example 18
Source File: TestCodec.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void testSplitableCodec(
    Class<? extends SplittableCompressionCodec> codecClass)
    throws IOException {
  final long DEFLBYTES = 2 * 1024 * 1024;
  final Configuration conf = new Configuration();
  final Random rand = new Random();
  final long seed = rand.nextLong();
  LOG.info("seed: " + seed);
  rand.setSeed(seed);
  SplittableCompressionCodec codec =
    ReflectionUtils.newInstance(codecClass, conf);
  final FileSystem fs = FileSystem.getLocal(conf);
  final FileStatus infile =
    fs.getFileStatus(writeSplitTestFile(fs, rand, codec, DEFLBYTES));
  if (infile.getLen() > Integer.MAX_VALUE) {
    fail("Unexpected compression: " + DEFLBYTES + " -> " + infile.getLen());
  }
  final int flen = (int) infile.getLen();
  final Text line = new Text();
  final Decompressor dcmp = CodecPool.getDecompressor(codec);
  try {
    for (int pos = 0; pos < infile.getLen(); pos += rand.nextInt(flen / 8)) {
      // read from random positions, verifying that there exist two sequential
      // lines as written in writeSplitTestFile
      final SplitCompressionInputStream in =
        codec.createInputStream(fs.open(infile.getPath()), dcmp,
            pos, flen, SplittableCompressionCodec.READ_MODE.BYBLOCK);
      if (in.getAdjustedStart() >= flen) {
        break;
      }
      LOG.info("SAMPLE " + in.getAdjustedStart() + "," + in.getAdjustedEnd());
      final LineReader lreader = new LineReader(in);
      lreader.readLine(line); // ignore; likely partial
      if (in.getPos() >= flen) {
        break;
      }
      lreader.readLine(line);
      final int seq1 = readLeadingInt(line);
      lreader.readLine(line);
      if (in.getPos() >= flen) {
        break;
      }
      final int seq2 = readLeadingInt(line);
      assertEquals("Mismatched lines", seq1 + 1, seq2);
    }
  } finally {
    CodecPool.returnDecompressor(dcmp);
  }
  // remove on success
  fs.delete(infile.getPath().getParent(), true);
}
 
Example 19
Source File: StreamKeyValUtil.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Read a utf8 encoded line from a data input stream. 
 * @param lineReader LineReader to read the line from.
 * @param out Text to read into
 * @return number of bytes read 
 * @throws IOException
 */
public static int readLine(LineReader lineReader, Text out) 
throws IOException {
  out.clear();
  return lineReader.readLine(out);
}
 
Example 20
Source File: StreamKeyValUtil.java    From hadoop-gpu with Apache License 2.0 2 votes vote down vote up
/**
 * Read a utf8 encoded line from a data input stream. 
 * @param lineReader LineReader to read the line from.
 * @param out Text to read into
 * @return number of bytes read 
 * @throws IOException
 */
public static int readLine(LineReader lineReader, Text out) 
throws IOException {
  out.clear();
  return lineReader.readLine(out);
}