Java Code Examples for org.apache.hadoop.io.Text#encode()

The following examples show how to use org.apache.hadoop.io.Text#encode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SequenceFileInputFilter.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode / frequency * frequency == hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
Example 2
Source File: SequenceFileInputFilter.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode / frequency * frequency == hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
Example 3
Source File: SequenceFileInputFilter.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode/frequency*frequency==hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
Example 4
Source File: SequenceFileInputFilter.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode/frequency*frequency==hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
Example 5
Source File: TextUtil.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Appends the UTF-8 bytes of the given string to the given {@link Text}
 */
public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
    try {
        ByteBuffer buffer = Text.encode(s, replaceBadChar);
        t.append(buffer.array(), 0, buffer.limit());
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
}
 
Example 6
Source File: TextUtil.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling
 * {@link String#getBytes(String)}.
 *
 * @param string
 *            the string to convert
 * @return the UTF-8 representation of the string
 */
public static byte[] toUtf8(String string) {
    ByteBuffer buffer;
    try {
        buffer = Text.encode(string, false);
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
    byte[] bytes = new byte[buffer.limit()];
    System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length);
    return bytes;
}
 
Example 7
Source File: ColumnPrefixes.java    From rya with Apache License 2.0 5 votes vote down vote up
private static Text concat(Text prefix, String str) {
	Text temp = new Text(prefix);

	try {
		ByteBuffer buffer = Text.encode(str, false);
		temp.append(buffer.array(), 0, buffer.limit());
	} catch (CharacterCodingException cce) {
		throw new IllegalArgumentException(cce);
	}

	return temp;
}
 
Example 8
Source File: QseqOutputFormat.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
public void write(Text ignored_key, SequencedFragment seq) throws IOException
{
	sBuilder.delete(0, sBuilder.length()); // clear

	sBuilder.append( seq.getInstrument() == null ? "" : seq.getInstrument() ).append(delim);
	sBuilder.append( seq.getRunNumber() == null ? "" : seq.getRunNumber().toString() ).append(delim);
	sBuilder.append( seq.getLane() == null ? "" : seq.getLane().toString() ).append(delim);
	sBuilder.append( seq.getTile() == null ? "" : seq.getTile().toString() ).append(delim);
	sBuilder.append( seq.getXpos() == null ? "" : seq.getXpos().toString() ).append(delim);
	sBuilder.append( seq.getYpos() == null ? "" : seq.getYpos().toString() ).append(delim);

	String index;
	if (seq.getIndexSequence() == null || seq.getIndexSequence().isEmpty())
		index = "0";
	else
		index = seq.getIndexSequence().replace('N', '.');
	sBuilder.append( index ).append(delim);

	sBuilder.append( seq.getRead() == null ? "" : seq.getRead().toString() ).append(delim);
	// here we also replace 'N' with '.'
	sBuilder.append( seq.getSequence() == null ? "" : seq.getSequence().toString().replace('N', '.')).append(delim);

	//////// quality may have to be re-coded
	if (seq.getQuality() == null)
		sBuilder.append("");
	else
	{
		int startPos = sBuilder.length();
		sBuilder.append(seq.getQuality().toString());
		if (baseQualityFormat == BaseQualityEncoding.Sanger)
		{
			//  do nothing
		}
		else if (baseQualityFormat == BaseQualityEncoding.Illumina)
		{
			// recode the quality in-place
			for (int i = startPos; i < sBuilder.length(); ++i)
			{
				// cast to avoid warning about possible loss of precision for assigning a char from an int.
				char newValue = (char)(sBuilder.charAt(i) + 31); // 64 - 33 = 31: difference between illumina and sanger encoding
				if (newValue > 126)
					throw new RuntimeException("output quality score over allowed range.  Maybe you meant to write in Sanger format?");
				sBuilder.setCharAt(i, newValue);
			}
		}
		else
			throw new RuntimeException("BUG!  Unknown base quality format value " + baseQualityFormat + " in QseqRecordWriter");
	}
	sBuilder.append(delim);
	/////////
	sBuilder.append((seq.getFilterPassed() == null || seq.getFilterPassed() ) ? 1 : 0);

	try {
		ByteBuffer buf = Text.encode(sBuilder.toString());
		out.write(buf.array(), 0, buf.limit());
	} catch (java.nio.charset.CharacterCodingException e) {
		throw new RuntimeException("Error encoding qseq record: " + seq);
	}
	out.write(newLine, 0, newLine.length);
}