Java Code Examples for org.apache.lucene.util.UnicodeUtil#UTF16toUTF8

The following examples show how to use org.apache.lucene.util.UnicodeUtil#UTF16toUTF8 . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LowerFunction.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toLowerCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}
 
Example 2
Source File: UpperFunction.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toUpperCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}
 
Example 3
Source File: StringEncoding.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Wraps the Lucene UnicodeUtil.UTF16toUTF8 bytes serializatiom...
 */
public static byte[] toBytes(String value, boolean desc){
    if(value==null) return Encoding.EMPTY_BYTE_ARRAY;
    if(value.isEmpty()){
        if(desc)
            return new byte[]{(byte)(0x01^0xff)};
        else
            return new byte[]{0x01};
    }

    //convert to UTF-8 encoding
    BytesRef result = new BytesRef();
    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
    byte[] returnArray = new byte[result.length];
    for(int i=0;i<result.length;i++){
        byte newD = (byte)(result.bytes[i+result.offset] + 2);
        if(desc)
            newD ^= 0xff; //reverse the sign bit so that data is reversed in 2's complement
        returnArray[i] = newD;
    }
    return returnArray;
}
 
Example 4
Source File: PHPSerializedResponseWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
  // serialized PHP strings don't need to be escaped at all, however the 
  // string size reported needs be the number of bytes rather than chars.
  utf8 = ArrayUtil.grow(utf8, val.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
  final int nBytes = UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);

  writer.write("s:");
  writer.write(Integer.toString(nBytes));
  writer.write(":\"");
  writer.write(val);
  writer.write("\";");
}
 
Example 5
Source File: StringEncoding.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
public static int toBytes(String value, boolean desc, byte[] buffer, int offset){
    if(value==null || value.isEmpty()) return 0;

    //convert to UTF-8 encoding
    BytesRef result = new BytesRef();
    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
    for(int i=0;i<result.length;i++){
        byte newD = (byte)(result.bytes[i+result.offset] + 2);
        if(desc)
            newD ^= 0xff; //reverse the sign bit so that data is reversed in 2's complement
        buffer[offset+i] = newD;
    }
    return value.length();
}
 
Example 6
Source File: TestUTF32ToUTF8.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private boolean matches(ByteRunAutomaton a, int code) {
  char[] chars = Character.toChars(code);
  byte[] b = new byte[UnicodeUtil.maxUTF8Length(chars.length)];
  final int len = UnicodeUtil.UTF16toUTF8(chars, 0, chars.length, b);
  return a.run(b, 0, len);
}
 
Example 7
Source File: SerializerUtil.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
public static void writeString(String s, DataOutput out) throws IOException {
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytes);
  writeBytesRef(bytes, out);
}