Java Code Examples for java.nio.charset.StandardCharsets#UTF_16BE
The following examples show how to use
java.nio.charset.StandardCharsets#UTF_16BE .
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: sambox File: COSString.java License: Apache License 2.0 | 6 votes |
/** * @return the content PDF text string as defined in Chap 7.9 of PDF 32000-1:2008. */ public String getString() { // text string - BOM indicates Unicode if (bytes.length >= 2) { if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF) { // UTF-16BE return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE); } else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE) { // UTF-16LE - not in the PDF spec! return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE); } } // otherwise use PDFDocEncoding return PDFDocEncoding.toString(bytes); }
Example 2
Source Project: openjdk-8-source File: Source.java License: GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { start = 2; cs = StandardCharsets.UTF_16LE; } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 3
Source Project: openjdk-8 File: Source.java License: GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { start = 2; cs = StandardCharsets.UTF_16LE; } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 4
Source Project: hottub File: Source.java License: GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) { if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else { start = 2; cs = StandardCharsets.UTF_16LE; } } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 5
Source Project: openjdk-jdk9 File: Source.java License: GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) { if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else { start = 2; cs = StandardCharsets.UTF_16LE; } } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 6
Source Project: ghidra File: ProgramBuilder.java License: Apache License 2.0 | 6 votes |
public void createEncodedString(String address, String string, Charset encoding, boolean nullTerminate) throws Exception { byte[] bytes = string.getBytes(encoding); if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) { if (nullTerminate) { bytes = Arrays.copyOf(bytes, bytes.length + 1); } setBytes(address, bytes); applyDataType(address, new StringDataType(), 1); } else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) { if (nullTerminate) { bytes = Arrays.copyOf(bytes, bytes.length + 2); setBytes(address, bytes); applyDataType(address, new TerminatedUnicodeDataType(), 1); } else { setBytes(address, bytes); } } else { setBytes(address, bytes); } }
Example 7
Source Project: ghidra File: MemSearchAsciiTest.java License: Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private void setEncoding(Charset encoding) throws Exception { JComboBox<Charset> encodingOptions = (JComboBox<Charset>) findComponentByName(pane, "Encoding Options", false); // Makes encoding UTF_16 in case encoding is UTF_16BE or UTF_16LE // BE and LE are not choices in the combo box. if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) { encoding = StandardCharsets.UTF_16; } for (int i = 0; i < encodingOptions.getItemCount(); i++) { if (encodingOptions.getItemAt(i) == encoding) { int index = i; runSwing(() -> encodingOptions.setSelectedIndex(index)); break; } } }
Example 8
Source Project: nashorn File: Source.java License: GNU General Public License v2.0 | 6 votes |
private static char[] byteToCharArray(final byte[] bytes) { Charset cs = StandardCharsets.UTF_8; int start = 0; // BOM detection. if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) { start = 2; cs = StandardCharsets.UTF_16BE; } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) { start = 2; cs = StandardCharsets.UTF_16LE; } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) { start = 3; cs = StandardCharsets.UTF_8; } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) { start = 4; cs = Charset.forName("UTF-32LE"); } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) { start = 4; cs = Charset.forName("UTF-32BE"); } return new String(bytes, start, bytes.length - start, cs).toCharArray(); }
Example 9
Source Project: Audinaut File: ID3v2File.java License: GNU General Public License v3.0 | 6 votes |
private String getDecodedString(byte[] raw) { int encid = raw[0] & 0xFF; int len = raw.length; String v = ""; try { int ID3_ENC_LATIN = 0x00; int ID3_ENC_UTF8 = 0x03; int ID3_ENC_UTF16BE = 0x02; int ID3_ENC_UTF16LE = 0x01; if (encid == ID3_ENC_LATIN) { v = new String(raw, 1, len - 1, StandardCharsets.ISO_8859_1); } else if (encid == ID3_ENC_UTF8) { v = new String(raw, 1, len - 1, StandardCharsets.UTF_8); } else if (encid == ID3_ENC_UTF16LE) { v = new String(raw, 3, len - 3, StandardCharsets.UTF_16LE); } else if (encid == ID3_ENC_UTF16BE) { v = new String(raw, 3, len - 3, StandardCharsets.UTF_16BE); } } catch (Exception ignored) { } return v; }
Example 10
Source Project: es6draft File: ChakraTest.java License: MIT License | 5 votes |
private static Charset charsetFor(BOMInputStream bis) throws IOException { ByteOrderMark bom = bis.getBOM(); if (ByteOrderMark.UTF_8.equals(bom)) { return StandardCharsets.UTF_8; } if (ByteOrderMark.UTF_16LE.equals(bom)) { return StandardCharsets.UTF_16LE; } if (ByteOrderMark.UTF_16BE.equals(bom)) { return StandardCharsets.UTF_16BE; } return StandardCharsets.UTF_8; }
Example 11
Source Project: spring-analysis-note File: GsonHttpMessageConverterTests.java License: MIT License | 5 votes |
@Test public void writeUTF16() throws IOException { MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE); MockHttpOutputMessage outputMessage = new MockHttpOutputMessage(); String body = "H\u00e9llo W\u00f6rld"; this.converter.write(body, contentType, outputMessage); assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE)); assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType()); }
Example 12
Source Project: Strata File: UnicodeBom.java License: Apache License 2.0 | 5 votes |
BomReader(InputStream inputStream) throws IOException { super(inputStream); Charset encoding; byte[] bom = new byte[MAX_BOM_SIZE]; // read first 3 bytes such that they can be pushed back later PushbackInputStream pushbackStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE); int bytesRead = ByteStreams.read(pushbackStream, bom, 0, 3); // look for BOM and adapt, defauling to UTF-8 if (bytesRead >= 3 && bom[0] == X_EF && bom[1] == X_BB && bom[2] == X_BF) { encoding = StandardCharsets.UTF_8; pushbackStream.unread(bom, 3, (bytesRead - 3)); } else if (bytesRead >= 2 && bom[0] == X_FE && bom[1] == X_FF) { encoding = StandardCharsets.UTF_16BE; pushbackStream.unread(bom, 2, (bytesRead - 2)); } else if (bytesRead >= 2 && bom[0] == X_FF && bom[1] == X_FE) { encoding = StandardCharsets.UTF_16LE; pushbackStream.unread(bom, 2, (bytesRead - 2)); } else { encoding = StandardCharsets.UTF_8; pushbackStream.unread(bom, 0, bytesRead); } // use Java standard code now we know the encoding this.underlying = new InputStreamReader(pushbackStream, encoding); }
Example 13
Source Project: flink File: SimpleStringSchemaTest.java License: Apache License 2.0 | 5 votes |
@Test public void testSerializationWithAnotherCharset() { final Charset charset = StandardCharsets.UTF_16BE; final String string = "之掃描古籍版實乃姚鼐的"; final byte[] bytes = string.getBytes(charset); assertArrayEquals(bytes, new SimpleStringSchema(charset).serialize(string)); assertEquals(string, new SimpleStringSchema(charset).deserialize(bytes)); }
Example 14
Source Project: java-technology-stack File: MappingJackson2MessageConverterTests.java License: MIT License | 5 votes |
@Test public void toMessageUtf16() { MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter(); MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE); Map<String, Object> map = new HashMap<>(); map.put(MessageHeaders.CONTENT_TYPE, contentType); MessageHeaders headers = new MessageHeaders(map); String payload = "H\u00e9llo W\u00f6rld"; Message<?> message = converter.toMessage(payload, headers); assertEquals("\"" + payload + "\"", new String((byte[]) message.getPayload(), StandardCharsets.UTF_16BE)); assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE)); }
Example 15
Source Project: java-technology-stack File: MappingJackson2MessageConverterTests.java License: MIT License | 5 votes |
@Test public void toMessageUtf16String() { MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter(); converter.setSerializedPayloadClass(String.class); MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE); Map<String, Object> map = new HashMap<>(); map.put(MessageHeaders.CONTENT_TYPE, contentType); MessageHeaders headers = new MessageHeaders(map); String payload = "H\u00e9llo W\u00f6rld"; Message<?> message = converter.toMessage(payload, headers); assertEquals("\"" + payload + "\"", message.getPayload()); assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE)); }
Example 16
Source Project: java-technology-stack File: MappingJackson2HttpMessageConverterTests.java License: MIT License | 5 votes |
@Test public void writeUTF16() throws IOException { MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE); MockHttpOutputMessage outputMessage = new MockHttpOutputMessage(); String body = "H\u00e9llo W\u00f6rld"; converter.write(body, contentType, outputMessage); assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE)); assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType()); }
Example 17
Source Project: java-technology-stack File: JsonbHttpMessageConverterTests.java License: MIT License | 5 votes |
@Test public void writeUTF16() throws IOException { MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE); MockHttpOutputMessage outputMessage = new MockHttpOutputMessage(); String body = "H\u00e9llo W\u00f6rld"; this.converter.write(body, contentType, outputMessage); assertEquals("Invalid result", body, outputMessage.getBodyAsString(StandardCharsets.UTF_16BE)); assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType()); }
Example 18
Source Project: ProtocolSupportBungee File: StringSerializer.java License: GNU Affero General Public License v3.0 | 4 votes |
public static String readShortUTF16BEString(ByteBuf buf) { return new String(Utils.readBytes(buf, buf.readUnsignedShort() * 2), StandardCharsets.UTF_16BE); }
Example 19
Source Project: Launcher File: ServerPinger.java License: GNU General Public License v3.0 | 4 votes |
private static String readUTF16String(HInput input) throws IOException { int length = input.readUnsignedShort() << 1; byte[] encoded = input.readByteArray(-length); return new String(encoded, StandardCharsets.UTF_16BE); }
Example 20
Source Project: ph-commons File: XMLCharsetDeterminator.java License: Apache License 2.0 | 4 votes |
/** * Determine the XML charset * * @param aBytes * XML byte representation * @return <code>null</code> if no charset was found. In that case you might * wanna try UTF-8 as the fallback. */ @Nullable public static Charset determineXMLCharset (@Nonnull final byte [] aBytes) { ValueEnforcer.notNull (aBytes, "Bytes"); Charset aParseCharset = null; int nSearchOfs = 0; if (aBytes.length > 0) { // Check if a BOM is present // Read at maximum 4 bytes (max BOM bytes) try ( NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes, 0, Math.min (EUnicodeBOM.getMaximumByteCount (), aBytes.length))) { // Check for BOM first final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS); if (aISC.hasBOM ()) { // A BOM was found, but not necessarily a charset could uniquely be // identified - skip the // BOM bytes and continue determination from there nSearchOfs = aISC.getBOM ().getByteCount (); } if (aISC.hasCharset ()) { // A BOM was found, and that BOM also has a unique charset assigned aParseCharset = aISC.getCharset (); } } } // No charset found and enough bytes left? if (aParseCharset == null && aBytes.length - nSearchOfs >= 4) if (_match (aBytes, nSearchOfs, CS_UTF32_BE)) aParseCharset = CHARSET_UTF_32BE; else if (_match (aBytes, nSearchOfs, CS_UTF32_LE)) aParseCharset = CHARSET_UTF_32LE; else if (_match (aBytes, nSearchOfs, CS_UTF16_BE)) aParseCharset = StandardCharsets.UTF_16BE; else if (_match (aBytes, nSearchOfs, CS_UTF16_LE)) aParseCharset = StandardCharsets.UTF_16LE; else if (_match (aBytes, nSearchOfs, CS_UTF8)) aParseCharset = StandardCharsets.UTF_8; else if (_match (aBytes, nSearchOfs, CS_EBCDIC)) aParseCharset = CHARSET_EBCDIC; else if (_match (aBytes, nSearchOfs, CS_IBM290)) aParseCharset = CHARSET_IBM290; if (aParseCharset == null) { // Fallback charset is always UTF-8 aParseCharset = FALLBACK_CHARSET; } // Now read with a reader return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset); }