Java Code Examples for java.nio.charset.StandardCharsets#UTF_16BE

The following examples show how to use java.nio.charset.StandardCharsets#UTF_16BE . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: sambox   File: COSString.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @return the content PDF text string as defined in Chap 7.9 of PDF 32000-1:2008.
 */
public String getString()
{
    // text string - BOM indicates Unicode
    if (bytes.length >= 2)
    {
        if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF)
        {
            // UTF-16BE
            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE);
        }
        else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE)
        {
            // UTF-16LE - not in the PDF spec!
            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE);
        }
    }

    // otherwise use PDFDocEncoding
    return PDFDocEncoding.toString(bytes);
}
 
Example 2
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 3
Source Project: openjdk-8   File: Source.java    License: GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 4
Source Project: hottub   File: Source.java    License: GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 5
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 6
Source Project: ghidra   File: ProgramBuilder.java    License: Apache License 2.0 6 votes vote down vote up
public void createEncodedString(String address, String string, Charset encoding,
		boolean nullTerminate) throws Exception {
	byte[] bytes = string.getBytes(encoding);

	if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 1);
		}
		setBytes(address, bytes);
		applyDataType(address, new StringDataType(), 1);
	}
	else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 2);
			setBytes(address, bytes);
			applyDataType(address, new TerminatedUnicodeDataType(), 1);
		}
		else {
			setBytes(address, bytes);
		}
	}
	else {
		setBytes(address, bytes);
	}
}
 
Example 7
Source Project: ghidra   File: MemSearchAsciiTest.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void setEncoding(Charset encoding) throws Exception {
	JComboBox<Charset> encodingOptions =
		(JComboBox<Charset>) findComponentByName(pane, "Encoding Options", false);

	// Makes encoding UTF_16 in case encoding is UTF_16BE or UTF_16LE
	// BE and LE are not choices in the combo box.
	if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		encoding = StandardCharsets.UTF_16;
	}

	for (int i = 0; i < encodingOptions.getItemCount(); i++) {
		if (encodingOptions.getItemAt(i) == encoding) {
			int index = i;
			runSwing(() -> encodingOptions.setSelectedIndex(index));
			break;
		}
	}
}
 
Example 8
Source Project: nashorn   File: Source.java    License: GNU General Public License v2.0 6 votes vote down vote up
private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
 
Example 9
Source Project: Audinaut   File: ID3v2File.java    License: GNU General Public License v3.0 6 votes vote down vote up
private String getDecodedString(byte[] raw) {
    int encid = raw[0] & 0xFF;
    int len = raw.length;
    String v = "";
    try {
        int ID3_ENC_LATIN = 0x00;
        int ID3_ENC_UTF8 = 0x03;
        int ID3_ENC_UTF16BE = 0x02;
        int ID3_ENC_UTF16LE = 0x01;
        if (encid == ID3_ENC_LATIN) {
            v = new String(raw, 1, len - 1, StandardCharsets.ISO_8859_1);
        } else if (encid == ID3_ENC_UTF8) {
            v = new String(raw, 1, len - 1, StandardCharsets.UTF_8);
        } else if (encid == ID3_ENC_UTF16LE) {
            v = new String(raw, 3, len - 3, StandardCharsets.UTF_16LE);
        } else if (encid == ID3_ENC_UTF16BE) {
            v = new String(raw, 3, len - 3, StandardCharsets.UTF_16BE);
        }
    } catch (Exception ignored) {
    }
    return v;
}
 
Example 10
Source Project: es6draft   File: ChakraTest.java    License: MIT License 5 votes vote down vote up
private static Charset charsetFor(BOMInputStream bis) throws IOException {
    ByteOrderMark bom = bis.getBOM();
    if (ByteOrderMark.UTF_8.equals(bom)) {
        return StandardCharsets.UTF_8;
    }
    if (ByteOrderMark.UTF_16LE.equals(bom)) {
        return StandardCharsets.UTF_16LE;
    }
    if (ByteOrderMark.UTF_16BE.equals(bom)) {
        return StandardCharsets.UTF_16BE;
    }
    return StandardCharsets.UTF_8;
}
 
Example 11
@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	this.converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
 
Example 12
Source Project: Strata   File: UnicodeBom.java    License: Apache License 2.0 5 votes vote down vote up
BomReader(InputStream inputStream) throws IOException {
  super(inputStream);

  Charset encoding;
  byte[] bom = new byte[MAX_BOM_SIZE];

  // read first 3 bytes such that they can be pushed back later
  PushbackInputStream pushbackStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE);
  int bytesRead = ByteStreams.read(pushbackStream, bom, 0, 3);

  // look for BOM and adapt, defauling to UTF-8
  if (bytesRead >= 3 && bom[0] == X_EF && bom[1] == X_BB && bom[2] == X_BF) {
    encoding = StandardCharsets.UTF_8;
    pushbackStream.unread(bom, 3, (bytesRead - 3));

  } else if (bytesRead >= 2 && bom[0] == X_FE && bom[1] == X_FF) {
    encoding = StandardCharsets.UTF_16BE;
    pushbackStream.unread(bom, 2, (bytesRead - 2));

  } else if (bytesRead >= 2 && bom[0] == X_FF && bom[1] == X_FE) {
    encoding = StandardCharsets.UTF_16LE;
    pushbackStream.unread(bom, 2, (bytesRead - 2));

  } else {
    encoding = StandardCharsets.UTF_8;
    pushbackStream.unread(bom, 0, bytesRead);
  }

  // use Java standard code now we know the encoding
  this.underlying = new InputStreamReader(pushbackStream, encoding);
}
 
Example 13
Source Project: flink   File: SimpleStringSchemaTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializationWithAnotherCharset() {
	final Charset charset = StandardCharsets.UTF_16BE;
	final String string = "之掃描古籍版實乃姚鼐的";
	final byte[] bytes = string.getBytes(charset);

	assertArrayEquals(bytes, new SimpleStringSchema(charset).serialize(string));
	assertEquals(string, new SimpleStringSchema(charset).deserialize(bytes));
}
 
Example 14
@Test
public void toMessageUtf16() {
	MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
	MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
	Map<String, Object> map = new HashMap<>();
	map.put(MessageHeaders.CONTENT_TYPE, contentType);
	MessageHeaders headers = new MessageHeaders(map);
	String payload = "H\u00e9llo W\u00f6rld";
	Message<?> message = converter.toMessage(payload, headers);

	assertEquals("\"" + payload + "\"", new String((byte[]) message.getPayload(), StandardCharsets.UTF_16BE));
	assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}
 
Example 15
@Test
public void toMessageUtf16String() {
	MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
	converter.setSerializedPayloadClass(String.class);

	MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
	Map<String, Object> map = new HashMap<>();
	map.put(MessageHeaders.CONTENT_TYPE, contentType);
	MessageHeaders headers = new MessageHeaders(map);
	String payload = "H\u00e9llo W\u00f6rld";
	Message<?> message = converter.toMessage(payload, headers);

	assertEquals("\"" + payload + "\"", message.getPayload());
	assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}
 
Example 16
@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
 
Example 17
@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	this.converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", body, outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
 
Example 18
public static String readShortUTF16BEString(ByteBuf buf) {
	return new String(Utils.readBytes(buf, buf.readUnsignedShort() * 2), StandardCharsets.UTF_16BE);
}
 
Example 19
private static String readUTF16String(HInput input) throws IOException {
    int length = input.readUnsignedShort() << 1;
    byte[] encoded = input.readByteArray(-length);
    return new String(encoded, StandardCharsets.UTF_16BE);
}
 
Example 20
/**
 * Determine the XML charset
 *
 * @param aBytes
 *        XML byte representation
 * @return <code>null</code> if no charset was found. In that case you might
 *         wanna try UTF-8 as the fallback.
 */
@Nullable
public static Charset determineXMLCharset (@Nonnull final byte [] aBytes)
{
  ValueEnforcer.notNull (aBytes, "Bytes");

  Charset aParseCharset = null;
  int nSearchOfs = 0;

  if (aBytes.length > 0)
  {
    // Check if a BOM is present
    // Read at maximum 4 bytes (max BOM bytes)
    try (
        NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes,
                                                                                   0,
                                                                                   Math.min (EUnicodeBOM.getMaximumByteCount (),
                                                                                             aBytes.length)))
    {
      // Check for BOM first
      final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS);
      if (aISC.hasBOM ())
      {
        // A BOM was found, but not necessarily a charset could uniquely be
        // identified - skip the
        // BOM bytes and continue determination from there
        nSearchOfs = aISC.getBOM ().getByteCount ();
      }

      if (aISC.hasCharset ())
      {
        // A BOM was found, and that BOM also has a unique charset assigned
        aParseCharset = aISC.getCharset ();
      }
    }
  }

  // No charset found and enough bytes left?
  if (aParseCharset == null && aBytes.length - nSearchOfs >= 4)
    if (_match (aBytes, nSearchOfs, CS_UTF32_BE))
      aParseCharset = CHARSET_UTF_32BE;
    else
      if (_match (aBytes, nSearchOfs, CS_UTF32_LE))
        aParseCharset = CHARSET_UTF_32LE;
      else
        if (_match (aBytes, nSearchOfs, CS_UTF16_BE))
          aParseCharset = StandardCharsets.UTF_16BE;
        else
          if (_match (aBytes, nSearchOfs, CS_UTF16_LE))
            aParseCharset = StandardCharsets.UTF_16LE;
          else
            if (_match (aBytes, nSearchOfs, CS_UTF8))
              aParseCharset = StandardCharsets.UTF_8;
            else
              if (_match (aBytes, nSearchOfs, CS_EBCDIC))
                aParseCharset = CHARSET_EBCDIC;
              else
                if (_match (aBytes, nSearchOfs, CS_IBM290))
                  aParseCharset = CHARSET_IBM290;

  if (aParseCharset == null)
  {
    // Fallback charset is always UTF-8
    aParseCharset = FALLBACK_CHARSET;
  }

  // Now read with a reader
  return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset);
}