Java Code Examples for java.nio.charset.StandardCharsets#UTF_16BE
The following examples show how to use
java.nio.charset.StandardCharsets#UTF_16BE .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: COSString.java From sambox with Apache License 2.0 | 6 votes |
/** * @return the content PDF text string as defined in Chap 7.9 of PDF 32000-1:2008. */ public String getString() { // text string - BOM indicates Unicode if (bytes.length >= 2) { if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF) { // UTF-16BE return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE); } else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE) { // UTF-16LE - not in the PDF spec! return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE); } } // otherwise use PDFDocEncoding return PDFDocEncoding.toString(bytes); }
Example 2
Source File: Source.java From openjdk-8-source with GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { start = 2; cs = StandardCharsets.UTF_16LE; } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 3
Source File: Source.java From openjdk-8 with GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { start = 2; cs = StandardCharsets.UTF_16LE; } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 4
Source File: ID3v2File.java From Audinaut with GNU General Public License v3.0 | 6 votes |
private String getDecodedString(byte[] raw) { int encid = raw[0] & 0xFF; int len = raw.length; String v = ""; try { int ID3_ENC_LATIN = 0x00; int ID3_ENC_UTF8 = 0x03; int ID3_ENC_UTF16BE = 0x02; int ID3_ENC_UTF16LE = 0x01; if (encid == ID3_ENC_LATIN) { v = new String(raw, 1, len - 1, StandardCharsets.ISO_8859_1); } else if (encid == ID3_ENC_UTF8) { v = new String(raw, 1, len - 1, StandardCharsets.UTF_8); } else if (encid == ID3_ENC_UTF16LE) { v = new String(raw, 3, len - 3, StandardCharsets.UTF_16LE); } else if (encid == ID3_ENC_UTF16BE) { v = new String(raw, 3, len - 3, StandardCharsets.UTF_16BE); } } catch (Exception ignored) { } return v; }
Example 5
Source File: Source.java From hottub with GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) { if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else { start = 2; cs = StandardCharsets.UTF_16LE; } } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 6
Source File: Source.java From nashorn with GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { start = 2; cs = StandardCharsets.UTF_16LE; } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 7
Source File: Source.java From openjdk-jdk9 with GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) { if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else { start = 2; cs = StandardCharsets.UTF_16LE; } } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 8
Source File: ProgramBuilder.java From ghidra with Apache License 2.0 | 6 votes |
public void createEncodedString(String address, String string, Charset encoding, boolean nullTerminate) throws Exception { byte[] bytes = string.getBytes(encoding); if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) { if (nullTerminate) { bytes = Arrays.copyOf(bytes, bytes.length + 1); } setBytes(address, bytes); applyDataType(address, new StringDataType(), 1); } else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) { if (nullTerminate) { bytes = Arrays.copyOf(bytes, bytes.length + 2); setBytes(address, bytes); applyDataType(address, new TerminatedUnicodeDataType(), 1); } else { setBytes(address, bytes); } } else { setBytes(address, bytes); } }
Example 9
Source File: MemSearchAsciiTest.java From ghidra with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private void setEncoding(Charset encoding) throws Exception { JComboBox<Charset> encodingOptions = (JComboBox<Charset>) findComponentByName(pane, "Encoding Options", false); // Makes encoding UTF_16 in case encoding is UTF_16BE or UTF_16LE // BE and LE are not choices in the combo box. if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) { encoding = StandardCharsets.UTF_16; } for (int i = 0; i < encodingOptions.getItemCount(); i++) { if (encodingOptions.getItemAt(i) == encoding) { int index = i; runSwing(() -> encodingOptions.setSelectedIndex(index)); break; } } }
Example 10
Source File: ChakraTest.java From es6draft with MIT License | 5 votes |
private static Charset charsetFor(BOMInputStream bis) throws IOException { ByteOrderMark bom = bis.getBOM(); if (ByteOrderMark.UTF_8.equals(bom)) { return StandardCharsets.UTF_8; } if (ByteOrderMark.UTF_16LE.equals(bom)) { return StandardCharsets.UTF_16LE; } if (ByteOrderMark.UTF_16BE.equals(bom)) { return StandardCharsets.UTF_16BE; } return StandardCharsets.UTF_8; }
Example 11
Source File: JsonbHttpMessageConverterTests.java From java-technology-stack with MIT License | 5 votes |
@Test public void writeUTF16() throws IOException { MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE); MockHttpOutputMessage outputMessage = new MockHttpOutputMessage(); String body = "H\u00e9llo W\u00f6rld"; this.converter.write(body, contentType, outputMessage); assertEquals("Invalid result", body, outputMessage.getBodyAsString(StandardCharsets.UTF_16BE)); assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType()); }
Example 12
Source File: MappingJackson2HttpMessageConverterTests.java From java-technology-stack with MIT License | 5 votes |
@Test public void writeUTF16() throws IOException { MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE); MockHttpOutputMessage outputMessage = new MockHttpOutputMessage(); String body = "H\u00e9llo W\u00f6rld"; converter.write(body, contentType, outputMessage); assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE)); assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType()); }
Example 13
Source File: MappingJackson2MessageConverterTests.java From java-technology-stack with MIT License | 5 votes |
@Test public void toMessageUtf16String() { MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter(); converter.setSerializedPayloadClass(String.class); MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE); Map<String, Object> map = new HashMap<>(); map.put(MessageHeaders.CONTENT_TYPE, contentType); MessageHeaders headers = new MessageHeaders(map); String payload = "H\u00e9llo W\u00f6rld"; Message<?> message = converter.toMessage(payload, headers); assertEquals("\"" + payload + "\"", message.getPayload()); assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE)); }
Example 14
Source File: MappingJackson2MessageConverterTests.java From java-technology-stack with MIT License | 5 votes |
@Test public void toMessageUtf16() { MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter(); MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE); Map<String, Object> map = new HashMap<>(); map.put(MessageHeaders.CONTENT_TYPE, contentType); MessageHeaders headers = new MessageHeaders(map); String payload = "H\u00e9llo W\u00f6rld"; Message<?> message = converter.toMessage(payload, headers); assertEquals("\"" + payload + "\"", new String((byte[]) message.getPayload(), StandardCharsets.UTF_16BE)); assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE)); }
Example 15
Source File: SimpleStringSchemaTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSerializationWithAnotherCharset() { final Charset charset = StandardCharsets.UTF_16BE; final String string = "之掃描古籍版實乃姚鼐的"; final byte[] bytes = string.getBytes(charset); assertArrayEquals(bytes, new SimpleStringSchema(charset).serialize(string)); assertEquals(string, new SimpleStringSchema(charset).deserialize(bytes)); }
Example 16
Source File: UnicodeBom.java From Strata with Apache License 2.0 | 5 votes |
BomReader(InputStream inputStream) throws IOException { super(inputStream); Charset encoding; byte[] bom = new byte[MAX_BOM_SIZE]; // read first 3 bytes such that they can be pushed back later PushbackInputStream pushbackStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE); int bytesRead = ByteStreams.read(pushbackStream, bom, 0, 3); // look for BOM and adapt, defauling to UTF-8 if (bytesRead >= 3 && bom[0] == X_EF && bom[1] == X_BB && bom[2] == X_BF) { encoding = StandardCharsets.UTF_8; pushbackStream.unread(bom, 3, (bytesRead - 3)); } else if (bytesRead >= 2 && bom[0] == X_FE && bom[1] == X_FF) { encoding = StandardCharsets.UTF_16BE; pushbackStream.unread(bom, 2, (bytesRead - 2)); } else if (bytesRead >= 2 && bom[0] == X_FF && bom[1] == X_FE) { encoding = StandardCharsets.UTF_16LE; pushbackStream.unread(bom, 2, (bytesRead - 2)); } else { encoding = StandardCharsets.UTF_8; pushbackStream.unread(bom, 0, bytesRead); } // use Java standard code now we know the encoding this.underlying = new InputStreamReader(pushbackStream, encoding); }
Example 17
Source File: GsonHttpMessageConverterTests.java From spring-analysis-note with MIT License | 5 votes |
@Test public void writeUTF16() throws IOException { MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE); MockHttpOutputMessage outputMessage = new MockHttpOutputMessage(); String body = "H\u00e9llo W\u00f6rld"; this.converter.write(body, contentType, outputMessage); assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE)); assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType()); }
Example 18
Source File: StringSerializer.java From ProtocolSupportBungee with GNU Affero General Public License v3.0 | 4 votes |
public static String readShortUTF16BEString(ByteBuf buf) { return new String(Utils.readBytes(buf, buf.readUnsignedShort() * 2), StandardCharsets.UTF_16BE); }
Example 19
Source File: ServerPinger.java From Launcher with GNU General Public License v3.0 | 4 votes |
private static String readUTF16String(HInput input) throws IOException { int length = input.readUnsignedShort() << 1; byte[] encoded = input.readByteArray(-length); return new String(encoded, StandardCharsets.UTF_16BE); }
Example 20
Source File: XMLCharsetDeterminator.java From ph-commons with Apache License 2.0 | 4 votes |
/** * Determine the XML charset * * @param aBytes * XML byte representation * @return <code>null</code> if no charset was found. In that case you might * wanna try UTF-8 as the fallback. */ @Nullable public static Charset determineXMLCharset (@Nonnull final byte [] aBytes) { ValueEnforcer.notNull (aBytes, "Bytes"); Charset aParseCharset = null; int nSearchOfs = 0; if (aBytes.length > 0) { // Check if a BOM is present // Read at maximum 4 bytes (max BOM bytes) try ( NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes, 0, Math.min (EUnicodeBOM.getMaximumByteCount (), aBytes.length))) { // Check for BOM first final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS); if (aISC.hasBOM ()) { // A BOM was found, but not necessarily a charset could uniquely be // identified - skip the // BOM bytes and continue determination from there nSearchOfs = aISC.getBOM ().getByteCount (); } if (aISC.hasCharset ()) { // A BOM was found, and that BOM also has a unique charset assigned aParseCharset = aISC.getCharset (); } } } // No charset found and enough bytes left? if (aParseCharset == null && aBytes.length - nSearchOfs >= 4) if (_match (aBytes, nSearchOfs, CS_UTF32_BE)) aParseCharset = CHARSET_UTF_32BE; else if (_match (aBytes, nSearchOfs, CS_UTF32_LE)) aParseCharset = CHARSET_UTF_32LE; else if (_match (aBytes, nSearchOfs, CS_UTF16_BE)) aParseCharset = StandardCharsets.UTF_16BE; else if (_match (aBytes, nSearchOfs, CS_UTF16_LE)) aParseCharset = StandardCharsets.UTF_16LE; else if (_match (aBytes, nSearchOfs, CS_UTF8)) aParseCharset = StandardCharsets.UTF_8; else if (_match (aBytes, nSearchOfs, CS_EBCDIC)) aParseCharset = CHARSET_EBCDIC; else if (_match (aBytes, nSearchOfs, CS_IBM290)) aParseCharset = CHARSET_IBM290; if (aParseCharset == null) { // Fallback charset is always UTF-8 aParseCharset = FALLBACK_CHARSET; } // Now read with a reader return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset); }