org.apache.hadoop.util.UTF8ByteArrayUtils Java Examples

The following examples show how to use org.apache.hadoop.util.UTF8ByteArrayUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TextOutputReader.java From hadoop with Apache License 2.0

6 votes

private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}

Example #2

Source File: TextOutputReader.java From big-c with Apache License 2.0

6 votes

private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}

Example #3

Source File: PipeMapRed.java From RDFS with Apache License 2.0

6 votes

/**
 * Split a line into key and value.
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
  int numKeyFields = getNumOfKeyFields();
  byte[] separator = getFieldSeparator();
  
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
        length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
    }
  } catch (CharacterCodingException e) {
    LOG.warn(StringUtils.stringifyException(e));
  }
}

Example #4

Source File: PipeMapRed.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Split a line into key and value.
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
  int numKeyFields = getNumOfKeyFields();
  byte[] separator = getFieldSeparator();
  
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
        length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
    }
  } catch (CharacterCodingException e) {
    LOG.warn(StringUtils.stringifyException(e));
  }
}

Example #5

Source File: KeyFieldHelper.java From hadoop with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

Example #6

Source File: KeyFieldHelper.java From big-c with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

Example #7

Source File: KeyFieldHelper.java From RDFS with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

Example #8

Source File: KeyFieldHelper.java From hadoop-gpu with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }