Java Code Examples for java.nio.charset.StandardCharsets#UTF_16BE

The following examples show how to use java.nio.charset.StandardCharsets#UTF_16BE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: COSString.java    From sambox with Apache License 2.0 6 votes vote down vote up
/**
 * @return the content PDF text string as defined in Chap 7.9 of PDF 32000-1:2008.
 */
public String getString()
{
    // text string - BOM indicates Unicode
    if (bytes.length >= 2)
    {
        if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF)
        {
            // UTF-16BE
            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE);
        }
        else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE)
        {
            // UTF-16LE - not in the PDF spec!
            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE);
        }
    }

    // otherwise use PDFDocEncoding
    return PDFDocEncoding.toString(bytes);
}
 
Example 2
Source File: Source.java    From openjdk-8-source with GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 3
Source File: Source.java    From openjdk-8 with GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 4
Source File: ID3v2File.java    From Audinaut with GNU General Public License v3.0 6 votes vote down vote up
private String getDecodedString(byte[] raw) {
    int encid = raw[0] & 0xFF;
    int len = raw.length;
    String v = "";
    try {
        int ID3_ENC_LATIN = 0x00;
        int ID3_ENC_UTF8 = 0x03;
        int ID3_ENC_UTF16BE = 0x02;
        int ID3_ENC_UTF16LE = 0x01;
        if (encid == ID3_ENC_LATIN) {
            v = new String(raw, 1, len - 1, StandardCharsets.ISO_8859_1);
        } else if (encid == ID3_ENC_UTF8) {
            v = new String(raw, 1, len - 1, StandardCharsets.UTF_8);
        } else if (encid == ID3_ENC_UTF16LE) {
            v = new String(raw, 3, len - 3, StandardCharsets.UTF_16LE);
        } else if (encid == ID3_ENC_UTF16BE) {
            v = new String(raw, 3, len - 3, StandardCharsets.UTF_16BE);
        }
    } catch (Exception ignored) {
    }
    return v;
}
 
Example 5
Source File: Source.java    From hottub with GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 6
Source File: Source.java    From nashorn with GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 7
Source File: Source.java    From openjdk-jdk9 with GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 8
Source File: ProgramBuilder.java    From ghidra with Apache License 2.0 6 votes vote down vote up
public void createEncodedString(String address, String string, Charset encoding,
		boolean nullTerminate) throws Exception {
	byte[] bytes = string.getBytes(encoding);

	if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 1);
		}
		setBytes(address, bytes);
		applyDataType(address, new StringDataType(), 1);
	}
	else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 2);
			setBytes(address, bytes);
			applyDataType(address, new TerminatedUnicodeDataType(), 1);
		}
		else {
			setBytes(address, bytes);
		}
	}
	else {
		setBytes(address, bytes);
	}
}
 
Example 9
Source File: MemSearchAsciiTest.java    From ghidra with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void setEncoding(Charset encoding) throws Exception {
	JComboBox<Charset> encodingOptions =
		(JComboBox<Charset>) findComponentByName(pane, "Encoding Options", false);

	// Makes encoding UTF_16 in case encoding is UTF_16BE or UTF_16LE
	// BE and LE are not choices in the combo box.
	if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		encoding = StandardCharsets.UTF_16;
	}

	for (int i = 0; i < encodingOptions.getItemCount(); i++) {
		if (encodingOptions.getItemAt(i) == encoding) {
			int index = i;
			runSwing(() -> encodingOptions.setSelectedIndex(index));
			break;
		}
	}
}
 
Example 10
Source File: ChakraTest.java    From es6draft with MIT License 5 votes vote down vote up
private static Charset charsetFor(BOMInputStream bis) throws IOException {
    ByteOrderMark bom = bis.getBOM();
    if (ByteOrderMark.UTF_8.equals(bom)) {
        return StandardCharsets.UTF_8;
    }
    if (ByteOrderMark.UTF_16LE.equals(bom)) {
        return StandardCharsets.UTF_16LE;
    }
    if (ByteOrderMark.UTF_16BE.equals(bom)) {
        return StandardCharsets.UTF_16BE;
    }
    return StandardCharsets.UTF_8;
}
 
Example 11
Source File: JsonbHttpMessageConverterTests.java    From java-technology-stack with MIT License 5 votes vote down vote up
@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	this.converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", body, outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
 
Example 12
Source File: MappingJackson2HttpMessageConverterTests.java    From java-technology-stack with MIT License 5 votes vote down vote up
@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
 
Example 13
Source File: MappingJackson2MessageConverterTests.java    From java-technology-stack with MIT License 5 votes vote down vote up
@Test
public void toMessageUtf16String() {
	MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
	converter.setSerializedPayloadClass(String.class);

	MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
	Map<String, Object> map = new HashMap<>();
	map.put(MessageHeaders.CONTENT_TYPE, contentType);
	MessageHeaders headers = new MessageHeaders(map);
	String payload = "H\u00e9llo W\u00f6rld";
	Message<?> message = converter.toMessage(payload, headers);

	assertEquals("\"" + payload + "\"", message.getPayload());
	assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}
 
Example 14
Source File: MappingJackson2MessageConverterTests.java    From java-technology-stack with MIT License 5 votes vote down vote up
@Test
public void toMessageUtf16() {
	MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
	MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
	Map<String, Object> map = new HashMap<>();
	map.put(MessageHeaders.CONTENT_TYPE, contentType);
	MessageHeaders headers = new MessageHeaders(map);
	String payload = "H\u00e9llo W\u00f6rld";
	Message<?> message = converter.toMessage(payload, headers);

	assertEquals("\"" + payload + "\"", new String((byte[]) message.getPayload(), StandardCharsets.UTF_16BE));
	assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}
 
Example 15
Source File: SimpleStringSchemaTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializationWithAnotherCharset() {
	final Charset charset = StandardCharsets.UTF_16BE;
	final String string = "之掃描古籍版實乃姚鼐的";
	final byte[] bytes = string.getBytes(charset);

	assertArrayEquals(bytes, new SimpleStringSchema(charset).serialize(string));
	assertEquals(string, new SimpleStringSchema(charset).deserialize(bytes));
}
 
Example 16
Source File: UnicodeBom.java    From Strata with Apache License 2.0 5 votes vote down vote up
BomReader(InputStream inputStream) throws IOException {
  super(inputStream);

  Charset encoding;
  byte[] bom = new byte[MAX_BOM_SIZE];

  // read first 3 bytes such that they can be pushed back later
  PushbackInputStream pushbackStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE);
  int bytesRead = ByteStreams.read(pushbackStream, bom, 0, 3);

  // look for BOM and adapt, defauling to UTF-8
  if (bytesRead >= 3 && bom[0] == X_EF && bom[1] == X_BB && bom[2] == X_BF) {
    encoding = StandardCharsets.UTF_8;
    pushbackStream.unread(bom, 3, (bytesRead - 3));

  } else if (bytesRead >= 2 && bom[0] == X_FE && bom[1] == X_FF) {
    encoding = StandardCharsets.UTF_16BE;
    pushbackStream.unread(bom, 2, (bytesRead - 2));

  } else if (bytesRead >= 2 && bom[0] == X_FF && bom[1] == X_FE) {
    encoding = StandardCharsets.UTF_16LE;
    pushbackStream.unread(bom, 2, (bytesRead - 2));

  } else {
    encoding = StandardCharsets.UTF_8;
    pushbackStream.unread(bom, 0, bytesRead);
  }

  // use Java standard code now we know the encoding
  this.underlying = new InputStreamReader(pushbackStream, encoding);
}
 
Example 17
Source File: GsonHttpMessageConverterTests.java    From spring-analysis-note with MIT License 5 votes vote down vote up
@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	this.converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
 
Example 18
Source File: StringSerializer.java    From ProtocolSupportBungee with GNU Affero General Public License v3.0 4 votes vote down vote up
public static String readShortUTF16BEString(ByteBuf buf) {
	return new String(Utils.readBytes(buf, buf.readUnsignedShort() * 2), StandardCharsets.UTF_16BE);
}
 
Example 19
Source File: ServerPinger.java    From Launcher with GNU General Public License v3.0 4 votes vote down vote up
private static String readUTF16String(HInput input) throws IOException {
    int length = input.readUnsignedShort() << 1;
    byte[] encoded = input.readByteArray(-length);
    return new String(encoded, StandardCharsets.UTF_16BE);
}
 
Example 20
Source File: XMLCharsetDeterminator.java    From ph-commons with Apache License 2.0 4 votes vote down vote up
/**
 * Determine the XML charset
 *
 * @param aBytes
 *        XML byte representation
 * @return <code>null</code> if no charset was found. In that case you might
 *         wanna try UTF-8 as the fallback.
 */
@Nullable
public static Charset determineXMLCharset (@Nonnull final byte [] aBytes)
{
  ValueEnforcer.notNull (aBytes, "Bytes");

  Charset aParseCharset = null;
  int nSearchOfs = 0;

  if (aBytes.length > 0)
  {
    // Check if a BOM is present
    // Read at maximum 4 bytes (max BOM bytes)
    try (
        NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes,
                                                                                   0,
                                                                                   Math.min (EUnicodeBOM.getMaximumByteCount (),
                                                                                             aBytes.length)))
    {
      // Check for BOM first
      final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS);
      if (aISC.hasBOM ())
      {
        // A BOM was found, but not necessarily a charset could uniquely be
        // identified - skip the
        // BOM bytes and continue determination from there
        nSearchOfs = aISC.getBOM ().getByteCount ();
      }

      if (aISC.hasCharset ())
      {
        // A BOM was found, and that BOM also has a unique charset assigned
        aParseCharset = aISC.getCharset ();
      }
    }
  }

  // No charset found and enough bytes left?
  if (aParseCharset == null && aBytes.length - nSearchOfs >= 4)
    if (_match (aBytes, nSearchOfs, CS_UTF32_BE))
      aParseCharset = CHARSET_UTF_32BE;
    else
      if (_match (aBytes, nSearchOfs, CS_UTF32_LE))
        aParseCharset = CHARSET_UTF_32LE;
      else
        if (_match (aBytes, nSearchOfs, CS_UTF16_BE))
          aParseCharset = StandardCharsets.UTF_16BE;
        else
          if (_match (aBytes, nSearchOfs, CS_UTF16_LE))
            aParseCharset = StandardCharsets.UTF_16LE;
          else
            if (_match (aBytes, nSearchOfs, CS_UTF8))
              aParseCharset = StandardCharsets.UTF_8;
            else
              if (_match (aBytes, nSearchOfs, CS_EBCDIC))
                aParseCharset = CHARSET_EBCDIC;
              else
                if (_match (aBytes, nSearchOfs, CS_IBM290))
                  aParseCharset = CHARSET_IBM290;

  if (aParseCharset == null)
  {
    // Fallback charset is always UTF-8
    aParseCharset = FALLBACK_CHARSET;
  }

  // Now read with a reader
  return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset);
}