Java Code Examples for java.nio.charset.StandardCharsets#UTF_16LE

The following examples show how to use java.nio.charset.StandardCharsets#UTF_16LE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ServerSentEventHttpMessageWriterTests.java From spring-analysis-note with MIT License

6 votes

@Test // SPR-16516, SPR-16539
public void writePojoWithCustomEncoding() {
	Flux<Pojo> source = Flux.just(new Pojo("foo\uD834\uDD1E", "bar\uD834\uDD1E"));
	Charset charset = StandardCharsets.UTF_16LE;
	MediaType mediaType = new MediaType("text", "event-stream", charset);
	testWrite(source, mediaType, outputMessage, Pojo.class);

	assertEquals(mediaType, outputMessage.getHeaders().getContentType());
	StepVerifier.create(outputMessage.getBody())
			.consumeNextWith(dataBuffer -> {
				String value = DataBufferTestUtils.dumpString(dataBuffer, charset);
				DataBufferUtils.release(dataBuffer);
				assertEquals("data:{\"foo\":\"foo\uD834\uDD1E\",\"bar\":\"bar\uD834\uDD1E\"}\n\n", value);
			})
			.expectComplete()
			.verify();
}

Example 2

Source File: Source.java From openjdk-8 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Example 3

Source File: FileRenameInformation2.java From jcifs with GNU Lesser General Public License v2.1

6 votes

/**
 * {@inheritDoc}
 *
 * @see jcifs.Decodable#decode(byte[], int, int)
 */
@Override
public int decode ( byte[] buffer, int bufferIndex, int len ) throws SMBProtocolDecodingException {
    int start = bufferIndex;
    this.replaceIfExists = buffer[ bufferIndex ] != 0;
    bufferIndex += 8;
    bufferIndex += 8;

    int nameLen = SMBUtil.readInt4(buffer, bufferIndex);
    bufferIndex += 4;
    byte[] nameBytes = new byte[nameLen];
    System.arraycopy(buffer, bufferIndex, nameBytes, 0, nameBytes.length);
    bufferIndex += nameLen;
    this.fileName = new String(nameBytes, StandardCharsets.UTF_16LE);
    return bufferIndex - start;
}

Example 4

Source File: Source.java From openjdk-8-source with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Example 5

Source File: Source.java From hottub with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Example 6

Source File: Source.java From openjdk-jdk9 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Example 7

Source File: MappedCharset.java From editorconfig-netbeans with MIT License

6 votes

private void init(String name) {
  switch (name) {
    case "ISO-8859-1":
      charset = StandardCharsets.ISO_8859_1;
      break;
    case "UTF-8":
      charset = StandardCharsets.UTF_8;
      break;
    case "UTF-8-BOM":
      charset = StandardCharsets.UTF_8;
      mark = FILE_MARK;
      break;
    case "UTF-16BE":
      charset = StandardCharsets.UTF_16BE;
      mark = FILE_MARK;
      break;
    case "UTF-16LE":
      charset = StandardCharsets.UTF_16LE;
      mark = FILE_MARK;
      break;
    default:
      charset = StandardCharsets.UTF_8;
      break;
  }
}

Example 8

Source File: RpcInputStreamTest.java From p4ic4idea with Apache License 2.0

6 votes

@Test
public void testReadUtf16LEWithBomAndWinLineEnding() throws IOException, FileEncoderException {
  File mockFile = tmpDir.newFile("utf_16LE_win_line_ending.txt");
  P4ExtFileUtils.extractResource(this,
          "com/perforce/p4java/common/io/utf_16LE_win_line_ending.txt", mockFile, true);
  mockFileName = mockFile.getAbsolutePath();
  file = new RpcPerforceFile(mockFileName, RpcPerforceFileType.FST_UTF16, ClientLineEnding.FST_L_CRLF);
  rpcInputStream = new RpcInputStream(file, StandardCharsets.UTF_16LE);

  byte[] targetBytes = new byte[1001];
  int read = rpcInputStream.read(targetBytes, 0, 1000);
  assertThat(read, is(5));

  file.setFileType(RpcPerforceFileType.FST_UTF16);
  rpcInputStream = new RpcInputStream(file, null);
  read = rpcInputStream.read(targetBytes, 0, 1000);
  int fileLengthExcludeBom = Files.readAllBytes(file.toPath()).length - 2;
  assertThat(read, is(fileLengthExcludeBom));
}

Example 9

Source File: Source.java From TencentKona-8 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Example 10

Source File: ProgramBuilder.java From ghidra with Apache License 2.0

6 votes

public void createEncodedString(String address, String string, Charset encoding,
		boolean nullTerminate) throws Exception {
	byte[] bytes = string.getBytes(encoding);

	if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 1);
		}
		setBytes(address, bytes);
		applyDataType(address, new StringDataType(), 1);
	}
	else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 2);
			setBytes(address, bytes);
			applyDataType(address, new TerminatedUnicodeDataType(), 1);
		}
		else {
			setBytes(address, bytes);
		}
	}
	else {
		setBytes(address, bytes);
	}
}

Example 11

Source File: FileRenameInformation2.java From jcifs-ng with GNU Lesser General Public License v2.1

6 votes

/**
 * {@inheritDoc}
 *
 * @see jcifs.Decodable#decode(byte[], int, int)
 */
@Override
public int decode ( byte[] buffer, int bufferIndex, int len ) throws SMBProtocolDecodingException {
    int start = bufferIndex;
    this.replaceIfExists = buffer[ bufferIndex ] != 0;
    bufferIndex += 8;
    bufferIndex += 8;

    int nameLen = SMBUtil.readInt4(buffer, bufferIndex);
    bufferIndex += 4;
    byte[] nameBytes = new byte[nameLen];
    System.arraycopy(buffer, bufferIndex, nameBytes, 0, nameBytes.length);
    bufferIndex += nameLen;
    this.fileName = new String(nameBytes, StandardCharsets.UTF_16LE);
    return bufferIndex - start;
}

Example 12

Source File: ServerSentEventHttpMessageWriterTests.java From java-technology-stack with MIT License

5 votes

@Test // SPR-16516, SPR-16539
public void writePojoWithCustomEncoding() {
	Flux<Pojo> source = Flux.just(new Pojo("foo\uD834\uDD1E", "bar\uD834\uDD1E"));
	Charset charset = StandardCharsets.UTF_16LE;
	MediaType mediaType = new MediaType("text", "event-stream", charset);
	testWrite(source, mediaType, outputMessage, Pojo.class);

	assertEquals(mediaType, outputMessage.getHeaders().getContentType());
	StepVerifier.create(outputMessage.getBody())
			.consumeNextWith(dataBuffer1 -> {
				String value1 =
						DataBufferTestUtils.dumpString(dataBuffer1, charset);
				DataBufferUtils.release(dataBuffer1);
				assertEquals("data:", value1);
			})
			.consumeNextWith(dataBuffer -> {
				String value = DataBufferTestUtils.dumpString(dataBuffer, charset);
				DataBufferUtils.release(dataBuffer);
				assertEquals("{\"foo\":\"foo\uD834\uDD1E\",\"bar\":\"bar\uD834\uDD1E\"}", value);
			})
			.consumeNextWith(dataBuffer2 -> {
				String value2 =
						DataBufferTestUtils.dumpString(dataBuffer2, charset);
				DataBufferUtils.release(dataBuffer2);
				assertEquals("\n", value2);
			})
			.consumeNextWith(dataBuffer3 -> {
				String value3 =
						DataBufferTestUtils.dumpString(dataBuffer3, charset);
				DataBufferUtils.release(dataBuffer3);
				assertEquals("\n", value3);
			})
			.expectComplete()
			.verify();
}

Example 13

Source File: UnicodeBom.java From Strata with Apache License 2.0

5 votes

/**
 * Converts a {@code byte[]} to a {@code String}.
 * <p>
 * This ensures that any Unicode byte order marker is used correctly.
 * The default encoding is UTF-8 if no BOM is found.
 * 
 * @param input  the input byte array
 * @return the equivalent string
 */
public static String toString(byte[] input) {
  if (input.length >= 3 && input[0] == X_EF && input[1] == X_BB && input[2] == X_BF) {
    return new String(input, 3, input.length - 3, StandardCharsets.UTF_8);

  } else if (input.length >= 2 && input[0] == X_FE && input[1] == X_FF) {
    return new String(input, 2, input.length - 2, StandardCharsets.UTF_16BE);

  } else if (input.length >= 2 && input[0] == X_FF && input[1] == X_FE) {
    return new String(input, 2, input.length - 2, StandardCharsets.UTF_16LE);

  } else {
    return new String(input, StandardCharsets.UTF_8);
  }
}

Example 14

Source File: NTLM.java From Bytecoder with Apache License 2.0

5 votes

String readSecurityBuffer(int offset, boolean unicode)
        throws NTLMException {
    byte[] raw = readSecurityBuffer(offset);
    return raw == null ? null : new String(
            raw, unicode ? StandardCharsets.UTF_16LE
                         : StandardCharsets.ISO_8859_1);
}

Example 15

Source File: RpcInputStreamTest.java From p4ic4idea with Apache License 2.0

5 votes

@Test
public void testReadUtf16LEWithBomAndUnixLineEnding() throws IOException, FileEncoderException {
    mockFileName = loadFileFromClassPath(
            "com/perforce/p4java/common/io/utf-16le_with_bom_unix_line_ending_ko.txt")
                    .getPath();
    file = new RpcPerforceFile(mockFileName, RpcPerforceFileType.FST_UTF16);
    rpcInputStream = new RpcInputStream(file, StandardCharsets.UTF_16LE);

    byte[] targetBytes = new byte[1001];
    int read = rpcInputStream.read(targetBytes, 0, 1000);
    int fileLengthExcludeBom = 343;
    assertThat(read, is(fileLengthExcludeBom));
}

Example 16

Source File: RpcInputStreamTest.java From p4ic4idea with Apache License 2.0

5 votes

@Test
public void testReadUtf16LEWithBomAndUnixLineEnding() throws IOException, FileEncoderException {
  File mockFile = tmpDir.newFile("utf-16le_with_bom_unix_line_ending_ko.txt");
  P4ExtFileUtils.extractResource(this,
          "com/perforce/p4java/common/io/utf-16le_with_bom_unix_line_ending_ko.txt", mockFile, false);
  mockFileName = mockFile.getAbsolutePath();
  file = new RpcPerforceFile(mockFileName, RpcPerforceFileType.FST_UTF16);
  rpcInputStream = new RpcInputStream(file, StandardCharsets.UTF_16LE);

  byte[] targetBytes = new byte[1001];
  int read = rpcInputStream.read(targetBytes, 0, 1000);
  int fileLengthExcludeBom = 343;
  assertThat(read, is(fileLengthExcludeBom));
}

Example 17

Source File: SimpleStringSchemaTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testSerializability() throws Exception {
	final SimpleStringSchema schema = new SimpleStringSchema(StandardCharsets.UTF_16LE);
	final SimpleStringSchema copy = CommonTestUtils.createCopySerializable(schema);

	assertEquals(schema.getCharset(), copy.getCharset());
}

Example 18

Source File: SecureStorageWindowsManager.java From snowflake-jdbc with Apache License 2.0

4 votes

public String getCredential(String host, String user)
{
  PointerByReference pCredential = new PointerByReference();
  String target = SecureStorageManager.convertTarget(host, user);

  try
  {
    boolean ret = false;
    synchronized (advapi32Lib)
    {
      ret =
          advapi32Lib.CredReadW(target, SecureStorageWindowsCredentialType.CRED_TYPE_GENERIC.getType(), 0, pCredential);
    }

    if (!ret)
    {
      logger.info(String.format("Failed to read target or could not find it in Windows Credential Manager. Error code = %d", Native.getLastError()));
      return null;
    }

    logger.debug("Found the token from Windows Credential Manager and now copying it");

    SecureStorageWindowsCredential cred = new SecureStorageWindowsCredential(pCredential.getValue());

    if (SecureStorageWindowsCredentialType.typeOf(cred.Type) != SecureStorageWindowsCredentialType.CRED_TYPE_GENERIC)
    {
      logger.info("Wrong type of credential. Expected: CRED_TYPE_GENERIC");
      return null;
    }

    if (cred.CredentialBlobSize == 0)
    {
      logger.info("Returned credential is empty");
      return null;
    }

    byte[] credBytes = cred.CredentialBlob.getByteArray(0, cred.CredentialBlobSize);
    String res = new String(credBytes, StandardCharsets.UTF_16LE);
    logger.debug("Successfully read the token. Will return it as String now");
    return res;
  }
  finally
  {
    if (pCredential.getValue() != null)
    {
      synchronized (advapi32Lib)
      {
        advapi32Lib.CredFree(pCredential.getValue());
      }
    }
  }
}

Example 19

Source File: XMLCharsetDeterminator.java From ph-commons with Apache License 2.0

4 votes

/**
 * Determine the XML charset
 *
 * @param aBytes
 *        XML byte representation
 * @return <code>null</code> if no charset was found. In that case you might
 *         wanna try UTF-8 as the fallback.
 */
@Nullable
public static Charset determineXMLCharset (@Nonnull final byte [] aBytes)
{
  ValueEnforcer.notNull (aBytes, "Bytes");

  Charset aParseCharset = null;
  int nSearchOfs = 0;

  if (aBytes.length > 0)
  {
    // Check if a BOM is present
    // Read at maximum 4 bytes (max BOM bytes)
    try (
        NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes,
                                                                                   0,
                                                                                   Math.min (EUnicodeBOM.getMaximumByteCount (),
                                                                                             aBytes.length)))
    {
      // Check for BOM first
      final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS);
      if (aISC.hasBOM ())
      {
        // A BOM was found, but not necessarily a charset could uniquely be
        // identified - skip the
        // BOM bytes and continue determination from there
        nSearchOfs = aISC.getBOM ().getByteCount ();
      }

      if (aISC.hasCharset ())
      {
        // A BOM was found, and that BOM also has a unique charset assigned
        aParseCharset = aISC.getCharset ();
      }
    }
  }

  // No charset found and enough bytes left?
  if (aParseCharset == null && aBytes.length - nSearchOfs >= 4)
    if (_match (aBytes, nSearchOfs, CS_UTF32_BE))
      aParseCharset = CHARSET_UTF_32BE;
    else
      if (_match (aBytes, nSearchOfs, CS_UTF32_LE))
        aParseCharset = CHARSET_UTF_32LE;
      else
        if (_match (aBytes, nSearchOfs, CS_UTF16_BE))
          aParseCharset = StandardCharsets.UTF_16BE;
        else
          if (_match (aBytes, nSearchOfs, CS_UTF16_LE))
            aParseCharset = StandardCharsets.UTF_16LE;
          else
            if (_match (aBytes, nSearchOfs, CS_UTF8))
              aParseCharset = StandardCharsets.UTF_8;
            else
              if (_match (aBytes, nSearchOfs, CS_EBCDIC))
                aParseCharset = CHARSET_EBCDIC;
              else
                if (_match (aBytes, nSearchOfs, CS_IBM290))
                  aParseCharset = CHARSET_IBM290;

  if (aParseCharset == null)
  {
    // Fallback charset is always UTF-8
    aParseCharset = FALLBACK_CHARSET;
  }

  // Now read with a reader
  return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset);
}

Example 20

Source File: DefaultServlet.java From Tomcat8-Source-Read with MIT License

4 votes

private static Charset processBom(InputStream is) throws IOException {
    // Java supported character sets do not use BOMs longer than 4 bytes
    byte[] bom = new byte[4];
    is.mark(bom.length);

    int count = is.read(bom);

    // BOMs are at least 2 bytes
    if (count < 2) {
        skip(is, 0);
        return null;
    }

    // Look for two byte BOMs
    int b0 = bom[0] & 0xFF;
    int b1 = bom[1] & 0xFF;
    if (b0 == 0xFE && b1 == 0xFF) {
        skip(is, 2);
        return StandardCharsets.UTF_16BE;
    }
    // Delay the UTF_16LE check if there are more that 2 bytes since it
    // overlaps with UTF-32LE.
    if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
        skip(is, 2);
        return StandardCharsets.UTF_16LE;
    }

    // Remaining BOMs are at least 3 bytes
    if (count < 3) {
        skip(is, 0);
        return null;
    }

    // UTF-8 is only 3-byte BOM
    int b2 = bom[2] & 0xFF;
    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
        skip(is, 3);
        return StandardCharsets.UTF_8;
    }

    if (count < 4) {
        skip(is, 0);
        return null;
    }

    // Look for 4-byte BOMs
    int b3 = bom[3] & 0xFF;
    if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) {
        return Charset.forName("UTF-32BE");
    }
    if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) {
        return Charset.forName("UTF-32LE");
    }

    // Now we can check for UTF16-LE. There is an assumption here that we
    // won't see a UTF16-LE file with a BOM where the first real data is
    // 0x00 0x00
    if (b0 == 0xFF && b1 == 0xFE) {
        skip(is, 2);
        return StandardCharsets.UTF_16LE;
    }

    skip(is, 0);
    return null;
}