java.nio.charset.StandardCharsets#UTF

Source File: COSString.java From sambox with Apache License 2.0

6 votes

/**
 * @return the content PDF text string as defined in Chap 7.9 of PDF 32000-1:2008.
 */
public String getString()
{
    // text string - BOM indicates Unicode
    if (bytes.length >= 2)
    {
        if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF)
        {
            // UTF-16BE
            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE);
        }
        else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE)
        {
            // UTF-16LE - not in the PDF spec!
            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE);
        }
    }

    // otherwise use PDFDocEncoding
    return PDFDocEncoding.toString(bytes);
}

Source File: Source.java From openjdk-8-source with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: Source.java From openjdk-8 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: ID3v2File.java From Audinaut with GNU General Public License v3.0

6 votes

private String getDecodedString(byte[] raw) {
    int encid = raw[0] & 0xFF;
    int len = raw.length;
    String v = "";
    try {
        int ID3_ENC_LATIN = 0x00;
        int ID3_ENC_UTF8 = 0x03;
        int ID3_ENC_UTF16BE = 0x02;
        int ID3_ENC_UTF16LE = 0x01;
        if (encid == ID3_ENC_LATIN) {
            v = new String(raw, 1, len - 1, StandardCharsets.ISO_8859_1);
        } else if (encid == ID3_ENC_UTF8) {
            v = new String(raw, 1, len - 1, StandardCharsets.UTF_8);
        } else if (encid == ID3_ENC_UTF16LE) {
            v = new String(raw, 3, len - 3, StandardCharsets.UTF_16LE);
        } else if (encid == ID3_ENC_UTF16BE) {
            v = new String(raw, 3, len - 3, StandardCharsets.UTF_16BE);
        }
    } catch (Exception ignored) {
    }
    return v;
}

Source File: Source.java From hottub with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: Source.java From nashorn with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: Source.java From openjdk-jdk9 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: ProgramBuilder.java From ghidra with Apache License 2.0

6 votes

public void createEncodedString(String address, String string, Charset encoding,
		boolean nullTerminate) throws Exception {
	byte[] bytes = string.getBytes(encoding);

	if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 1);
		}
		setBytes(address, bytes);
		applyDataType(address, new StringDataType(), 1);
	}
	else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 2);
			setBytes(address, bytes);
			applyDataType(address, new TerminatedUnicodeDataType(), 1);
		}
		else {
			setBytes(address, bytes);
		}
	}
	else {
		setBytes(address, bytes);
	}
}

Source File: MemSearchAsciiTest.java From ghidra with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private void setEncoding(Charset encoding) throws Exception {
	JComboBox<Charset> encodingOptions =
		(JComboBox<Charset>) findComponentByName(pane, "Encoding Options", false);

	// Makes encoding UTF_16 in case encoding is UTF_16BE or UTF_16LE
	// BE and LE are not choices in the combo box.
	if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		encoding = StandardCharsets.UTF_16;
	}

	for (int i = 0; i < encodingOptions.getItemCount(); i++) {
		if (encodingOptions.getItemAt(i) == encoding) {
			int index = i;
			runSwing(() -> encodingOptions.setSelectedIndex(index));
			break;
		}
	}
}

Source File: ChakraTest.java From es6draft with MIT License

5 votes

private static Charset charsetFor(BOMInputStream bis) throws IOException {
    ByteOrderMark bom = bis.getBOM();
    if (ByteOrderMark.UTF_8.equals(bom)) {
        return StandardCharsets.UTF_8;
    }
    if (ByteOrderMark.UTF_16LE.equals(bom)) {
        return StandardCharsets.UTF_16LE;
    }
    if (ByteOrderMark.UTF_16BE.equals(bom)) {
        return StandardCharsets.UTF_16BE;
    }
    return StandardCharsets.UTF_8;
}

Source File: JsonbHttpMessageConverterTests.java From java-technology-stack with MIT License

5 votes

@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	this.converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", body, outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}

Source File: MappingJackson2HttpMessageConverterTests.java From java-technology-stack with MIT License

5 votes

@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}

Source File: MappingJackson2MessageConverterTests.java From java-technology-stack with MIT License

5 votes

@Test
public void toMessageUtf16String() {
	MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
	converter.setSerializedPayloadClass(String.class);

	MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
	Map<String, Object> map = new HashMap<>();
	map.put(MessageHeaders.CONTENT_TYPE, contentType);
	MessageHeaders headers = new MessageHeaders(map);
	String payload = "H\u00e9llo W\u00f6rld";
	Message<?> message = converter.toMessage(payload, headers);

	assertEquals("\"" + payload + "\"", message.getPayload());
	assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}

Source File: MappingJackson2MessageConverterTests.java From java-technology-stack with MIT License

5 votes

@Test
public void toMessageUtf16() {
	MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
	MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
	Map<String, Object> map = new HashMap<>();
	map.put(MessageHeaders.CONTENT_TYPE, contentType);
	MessageHeaders headers = new MessageHeaders(map);
	String payload = "H\u00e9llo W\u00f6rld";
	Message<?> message = converter.toMessage(payload, headers);

	assertEquals("\"" + payload + "\"", new String((byte[]) message.getPayload(), StandardCharsets.UTF_16BE));
	assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}

Source File: SimpleStringSchemaTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSerializationWithAnotherCharset() {
	final Charset charset = StandardCharsets.UTF_16BE;
	final String string = "之掃描古籍版實乃姚鼐的";
	final byte[] bytes = string.getBytes(charset);

	assertArrayEquals(bytes, new SimpleStringSchema(charset).serialize(string));
	assertEquals(string, new SimpleStringSchema(charset).deserialize(bytes));
}

Source File: UnicodeBom.java From Strata with Apache License 2.0

5 votes

BomReader(InputStream inputStream) throws IOException {
  super(inputStream);

  Charset encoding;
  byte[] bom = new byte[MAX_BOM_SIZE];

  // read first 3 bytes such that they can be pushed back later
  PushbackInputStream pushbackStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE);
  int bytesRead = ByteStreams.read(pushbackStream, bom, 0, 3);

  // look for BOM and adapt, defauling to UTF-8
  if (bytesRead >= 3 && bom[0] == X_EF && bom[1] == X_BB && bom[2] == X_BF) {
    encoding = StandardCharsets.UTF_8;
    pushbackStream.unread(bom, 3, (bytesRead - 3));

  } else if (bytesRead >= 2 && bom[0] == X_FE && bom[1] == X_FF) {
    encoding = StandardCharsets.UTF_16BE;
    pushbackStream.unread(bom, 2, (bytesRead - 2));

  } else if (bytesRead >= 2 && bom[0] == X_FF && bom[1] == X_FE) {
    encoding = StandardCharsets.UTF_16LE;
    pushbackStream.unread(bom, 2, (bytesRead - 2));

  } else {
    encoding = StandardCharsets.UTF_8;
    pushbackStream.unread(bom, 0, bytesRead);
  }

  // use Java standard code now we know the encoding
  this.underlying = new InputStreamReader(pushbackStream, encoding);
}

Source File: GsonHttpMessageConverterTests.java From spring-analysis-note with MIT License

5 votes

@Test
public void writeUTF16() throws IOException {
	MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
	MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
	String body = "H\u00e9llo W\u00f6rld";
	this.converter.write(body, contentType, outputMessage);
	assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
	assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}

Source File: StringSerializer.java From ProtocolSupportBungee with GNU Affero General Public License v3.0

4 votes

public static String readShortUTF16BEString(ByteBuf buf) {
	return new String(Utils.readBytes(buf, buf.readUnsignedShort() * 2), StandardCharsets.UTF_16BE);
}

Source File: ServerPinger.java From Launcher with GNU General Public License v3.0

4 votes

private static String readUTF16String(HInput input) throws IOException {
    int length = input.readUnsignedShort() << 1;
    byte[] encoded = input.readByteArray(-length);
    return new String(encoded, StandardCharsets.UTF_16BE);
}

Source File: XMLCharsetDeterminator.java From ph-commons with Apache License 2.0

4 votes

/**
 * Determine the XML charset
 *
 * @param aBytes
 *        XML byte representation
 * @return <code>null</code> if no charset was found. In that case you might
 *         wanna try UTF-8 as the fallback.
 */
@Nullable
public static Charset determineXMLCharset (@Nonnull final byte [] aBytes)
{
  ValueEnforcer.notNull (aBytes, "Bytes");

  Charset aParseCharset = null;
  int nSearchOfs = 0;

  if (aBytes.length > 0)
  {
    // Check if a BOM is present
    // Read at maximum 4 bytes (max BOM bytes)
    try (
        NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes,
                                                                                   0,
                                                                                   Math.min (EUnicodeBOM.getMaximumByteCount (),
                                                                                             aBytes.length)))
    {
      // Check for BOM first
      final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS);
      if (aISC.hasBOM ())
      {
        // A BOM was found, but not necessarily a charset could uniquely be
        // identified - skip the
        // BOM bytes and continue determination from there
        nSearchOfs = aISC.getBOM ().getByteCount ();
      }

      if (aISC.hasCharset ())
      {
        // A BOM was found, and that BOM also has a unique charset assigned
        aParseCharset = aISC.getCharset ();
      }
    }
  }

  // No charset found and enough bytes left?
  if (aParseCharset == null && aBytes.length - nSearchOfs >= 4)
    if (_match (aBytes, nSearchOfs, CS_UTF32_BE))
      aParseCharset = CHARSET_UTF_32BE;
    else
      if (_match (aBytes, nSearchOfs, CS_UTF32_LE))
        aParseCharset = CHARSET_UTF_32LE;
      else
        if (_match (aBytes, nSearchOfs, CS_UTF16_BE))
          aParseCharset = StandardCharsets.UTF_16BE;
        else
          if (_match (aBytes, nSearchOfs, CS_UTF16_LE))
            aParseCharset = StandardCharsets.UTF_16LE;
          else
            if (_match (aBytes, nSearchOfs, CS_UTF8))
              aParseCharset = StandardCharsets.UTF_8;
            else
              if (_match (aBytes, nSearchOfs, CS_EBCDIC))
                aParseCharset = CHARSET_EBCDIC;
              else
                if (_match (aBytes, nSearchOfs, CS_IBM290))
                  aParseCharset = CHARSET_IBM290;

  if (aParseCharset == null)
  {
    // Fallback charset is always UTF-8
    aParseCharset = FALLBACK_CHARSET;
  }

  // Now read with a reader
  return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset);
}

Java Code Examples for java.nio.charset.StandardCharsets#UTF_16BE