Java Code Examples for org.apache.hadoop.util.UTF8ByteArrayUtils

The following examples show how to use org.apache.hadoop.util.UTF8ByteArrayUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: TextOutputReader.java    License: Apache License 2.0 6 votes vote down vote up
private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}
 
Example 2
Source Project: big-c   Source File: TextOutputReader.java    License: Apache License 2.0 6 votes vote down vote up
private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}
 
Example 3
Source Project: RDFS   Source File: PipeMapRed.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Split a line into key and value.
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
  int numKeyFields = getNumOfKeyFields();
  byte[] separator = getFieldSeparator();
  
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
        length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
    }
  } catch (CharacterCodingException e) {
    LOG.warn(StringUtils.stringifyException(e));
  }
}
 
Example 4
Source Project: hadoop-gpu   Source File: PipeMapRed.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Split a line into key and value.
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
  int numKeyFields = getNumOfKeyFields();
  byte[] separator = getFieldSeparator();
  
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
        length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
    }
  } catch (CharacterCodingException e) {
    LOG.warn(StringUtils.stringifyException(e));
  }
}
 
Example 5
Source Project: hadoop   Source File: KeyFieldHelper.java    License: Apache License 2.0 5 votes vote down vote up
public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }
 
Example 6
Source Project: big-c   Source File: KeyFieldHelper.java    License: Apache License 2.0 5 votes vote down vote up
public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }
 
Example 7
Source Project: RDFS   Source File: KeyFieldHelper.java    License: Apache License 2.0 5 votes vote down vote up
public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }
 
Example 8
Source Project: hadoop-gpu   Source File: KeyFieldHelper.java    License: Apache License 2.0 5 votes vote down vote up
public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }