Java Code Examples for java.nio.charset.StandardCharsets.UTF_16BE

The following are Jave code examples for showing how to use UTF_16BE of the java.nio.charset.StandardCharsets class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: openjdk-jdk10   File: Source.java   Source Code and License Vote up 6 votes
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 2
Project: openjdk-jdk10   File: ParseAPITest.java   Source Code and License Vote up 6 votes
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 3
Project: OpenJSharp   File: Source.java   Source Code and License Vote up 5 votes
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 4
Project: datarouter   File: StringByteTool.java   Source Code and License Vote up 4 votes
@Test
public void testAsciiExtensions(){
	Assert.assertTrue(unknownCharacterInt.equals(65533));

	for(int i = 0; i < 256; ++i){
		String ascii = new String(new byte[]{ByteTool.fromUnsignedInt0To255(i)}, StandardCharsets.US_ASCII);

		String latin1 = new String(new byte[]{ByteTool.fromUnsignedInt0To255(i)},
				StandardCharsets.ISO_8859_1);

		String windows1252 = new String(new byte[]{ByteTool.fromUnsignedInt0To255(i)}, Charset.forName(
				"windows-1252"));

		String utf16be = new String(new byte[]{(byte)0, ByteTool.fromUnsignedInt0To255(i)},
				StandardCharsets.UTF_16BE);

		String utf8 = new String(new byte[]{ByteTool.fromUnsignedInt0To255(i)}, StandardCharsets.UTF_8);

		if(i < 0x80){
			Assert.assertEquals(latin1, ascii);
			Assert.assertEquals(windows1252, latin1);
			Assert.assertEquals(utf16be, windows1252);
			Assert.assertEquals(utf8, utf16be);
		}else if(i < 160){
			Assert.assertEquals(unknownCharacter.toString(), ascii);// invalid octet
			Assert.assertEquals(latin1.charAt(0), i);// valid octet, but not not mapped to any character
			Assert.assertTrue(StringTool.notEmpty(windows1252));
			Assert.assertTrue(latin1.equals(utf16be));
			Assert.assertTrue(!windows1252.equals(utf16be));
			Assert.assertEquals(unknownCharacter.toString(), utf8);// utf8 will expect 2 bytes here, so our 1
																	// byte is junk
		}else{
			Assert.assertEquals(unknownCharacter.toString(), ascii);// invalid octet
			Assert.assertTrue(StringTool.notEmpty(latin1));
			Assert.assertTrue(StringTool.notEmpty(windows1252));
			Assert.assertEquals(windows1252, latin1);
			Assert.assertEquals(utf16be, windows1252);
			Assert.assertEquals(unknownCharacter.toString(), utf8);// utf8 will expect 2 bytes here, so our 1
																	// byte is junk
		}
	}
}
 
Example 5
Project: thjson   File: THJSONTokenizer.java   Source Code and License Vote up 4 votes
/**
 * The next four chars must be hex digits referring to a 16-bit unicode escape
 * @return a 16-bit integer that must be encoded in UTF8
 * @throws IOException
 */
private String readUnicodeEscape() throws IOException {
	unicode[0] = (byte) ((readHexDigit() << 4) | (readHexDigit()));
	unicode[1] = (byte) ((readHexDigit() << 4) | (readHexDigit()));
	return new String(unicode, StandardCharsets.UTF_16BE);
}