java.nio.charset.CharsetDecoder Java Examples

The following examples show how to use java.nio.charset.CharsetDecoder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CodeSetConversion.java    From jdk1.8-source-analysis with Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to find a CharsetDecoder in the
 * cache or create a new one if necessary.  Throws an
 * INTERNAL if the code set is unknown.
 */
protected CharsetDecoder getConverter(String javaCodeSetName) {

    CharsetDecoder result = null;
    try {
        result = cache.getByteToCharConverter(javaCodeSetName);

        if (result == null) {
            Charset tmpCharset = Charset.forName(javaCodeSetName);
            result = tmpCharset.newDecoder();
            cache.setConverter(javaCodeSetName, result);
        }

    } catch(IllegalCharsetNameException icne) {
        // This can only happen if one of our charset entries has
        // an illegal name.
        throw wrapper.invalidBtcConverterName( icne, javaCodeSetName ) ;
    }

    return result;
}
 
Example #2
Source File: JapaneseTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    try (InputStream stream = loader.openResource(userDictionaryPath)) {
      String encoding = userDictionaryEncoding;
      if (encoding == null) {
        encoding = IOUtils.UTF_8;
      }
      CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
      Reader reader = new InputStreamReader(stream, decoder);
      userDictionary = UserDictionary.open(reader);
    }
  } else {
    userDictionary = null;
  }
}
 
Example #3
Source File: ScriptValuesAddedFunctions.java    From pentaho-kettle with Apache License 2.0 6 votes vote down vote up
public static Object isCodepage( Context actualContext, Scriptable actualObject, Object[] ArgList,
  Function FunctionContext ) {
  boolean bRC = false;
  if ( ArgList.length == 2 ) {
    try {
      if ( isNull( ArgList, new int[] { 0, 1 } ) ) {
        return null;
      } else if ( isUndefined( ArgList, new int[] { 0, 1 } ) ) {
        return Context.getUndefinedValue();
      }
      String strValueToCheck = Context.toString( ArgList[0] );
      String strCodePage = Context.toString( ArgList[1] );
      byte[] bytearray = strValueToCheck.getBytes();
      CharsetDecoder d = Charset.forName( strCodePage ).newDecoder();
      CharBuffer r = d.decode( ByteBuffer.wrap( bytearray ) );
      r.toString();
      bRC = true;
    } catch ( Exception e ) {
      bRC = false;
    }
  } else {
    throw Context.reportRuntimeError( "The function call isCodepage requires 2 arguments." );
  }
  return Boolean.valueOf( bRC );
}
 
Example #4
Source File: StringType.java    From qpid-proton-j with Apache License 2.0 6 votes vote down vote up
@Override
public String decode(DecoderImpl decoder, final ReadableBuffer buffer)
{
    CharsetDecoder charsetDecoder = decoder.getCharsetDecoder();
    try
    {
        return buffer.readString(charsetDecoder);
    }
    catch (CharacterCodingException e)
    {
        throw new IllegalArgumentException("Cannot parse String", e);
    }
    finally
    {
        charsetDecoder.reset();
    }
}
 
Example #5
Source File: UnicodeHelper.java    From p4ic4idea with Apache License 2.0 6 votes vote down vote up
/**
 * Try to determine whether a byte buffer's character encoding is that of the
 * passed-in charset. Uses inefficient
 * heuristics that will be revisited when we're more familiar with likely
 * usage patterns.
 * 
 * Note this has been heavily changed since inception and will
 * almost certainly disappear in the 10.x timeframe -- HR.
 */
public static boolean inferCharset(byte[] bytes, int bytesRead, Charset clientCharset) {
	ByteBuffer byteBuf = ByteBuffer.wrap(bytes, 0, bytesRead);
	CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2);
	
	if (clientCharset != null) {
		CharsetDecoder decoder = clientCharset.newDecoder();
		decoder.onMalformedInput(CodingErrorAction.REPORT);
		decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
		CoderResult coderResult = decoder.decode(byteBuf, charBuf, false);
		if (coderResult != null) {
			if (coderResult.isError()) {
				// Wasn't this one...
				return false;
			} else {
				return true;	// Still only *probably* true, dammit...
			}
		}
	}
	
	return true;
}
 
Example #6
Source File: JSON.java    From uavstack with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> T parseObject(byte[] input, //
                                int off, //
                                int len, //
                                CharsetDecoder charsetDecoder, //
                                Type clazz, //
                                Feature... features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = allocateChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charByte = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charByte);

    int position = charByte.position();

    return (T) parseObject(chars, position, clazz, features);
}
 
Example #7
Source File: ZipCoder.java    From dragonwell8_jdk with GNU General Public License v2.0 6 votes vote down vote up
String toString(byte[] ba, int length) {
    CharsetDecoder cd = decoder().reset();
    int len = (int)(length * cd.maxCharsPerByte());
    char[] ca = new char[len];
    if (len == 0)
        return new String(ca);
    // UTF-8 only for now. Other ArrayDeocder only handles
    // CodingErrorAction.REPLACE mode. ZipCoder uses
    // REPORT mode.
    if (isUTF8 && cd instanceof ArrayDecoder) {
        int clen = ((ArrayDecoder)cd).decode(ba, 0, length, ca);
        if (clen == -1)    // malformed
            throw new IllegalArgumentException("MALFORMED");
        return new String(ca, 0, clen);
    }
    ByteBuffer bb = ByteBuffer.wrap(ba, 0, length);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = cd.decode(bb, cb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = cd.flush(cb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    return new String(ca, 0, cb.position());
}
 
Example #8
Source File: ZipCoder.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
private CharsetDecoder decoder() {
    CharsetDecoder dec = decTL.get();
    if (dec == null) {
        dec = cs.newDecoder()
          .onMalformedInput(CodingErrorAction.REPORT)
          .onUnmappableCharacter(CodingErrorAction.REPORT);
        decTL.set(dec);
    }
    return dec;
}
 
Example #9
Source File: ConfigFromImpl.java    From database with Apache License 2.0 5 votes vote down vote up
@Override
public ConfigFrom propertyFile(CharsetDecoder decoder, String... filenames) {
  for (String filename : filenames) {
    if (filename != null) {
      propertyFile(decoder, new File(filename));
    }
  }
  return this;
}
 
Example #10
Source File: IBM942C.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new DoubleByte.Decoder(this,
                                  IBM942.b2c,
                                  b2cSB,
                                  0x40,
                                  0xfc);
}
 
Example #11
Source File: Message.java    From SI with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public String getPayloadTracingString() {
	if (null == payload || 0 == payload.length)
		return "no payload";
	boolean text = true;
	for (byte b:payload) {
		if (' ' > b) {
			switch(b) {
			case '\t':
			case '\n':
			case '\r':
				continue;
			}
			text = false;
			break;
		}
	}
	if (text) {
		CharsetDecoder decoder = CoAP.UTF8_CHARSET.newDecoder();
		decoder.onMalformedInput(CodingErrorAction.REPORT);
		decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
		ByteBuffer in = ByteBuffer.wrap(payload);
		CharBuffer out = CharBuffer.allocate(24);
		CoderResult result = decoder.decode(in, out, true);
		decoder.flush(out);
		out.flip();
		if (CoderResult.OVERFLOW == result) {
			return "\"" + out +  "\".. " + payload.length + " bytes";
		} else if (!result.isError()){
			return "\"" + out + "\"" ;
		}
	}
	return Utils.toHexText(payload, 256);
}
 
Example #12
Source File: ZipCoder.java    From openjdk-8-source with GNU General Public License v2.0 5 votes vote down vote up
private CharsetDecoder decoder() {
    if (dec == null) {
        dec = cs.newDecoder()
          .onMalformedInput(CodingErrorAction.REPORT)
          .onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return dec;
}
 
Example #13
Source File: ZipCoder.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
private CharsetDecoder decoder() {
    if (dec == null) {
        dec = cs.newDecoder()
          .onMalformedInput(CodingErrorAction.REPORT)
          .onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return dec;
}
 
Example #14
Source File: IBM949C.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new DoubleByte.Decoder(this,
                                  IBM949.b2c,
                                  b2cSB,
                                  0xa1,
                                  0xfe);
}
 
Example #15
Source File: CharsetUtil.java    From getty with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified {@link Charset}.
 *
 * @param charset The specified charset
 * @return The decoder for the specified {@code charset}
 */
public static CharsetDecoder decoder(Charset charset) {
    checkNotNull(charset, "charset");

    Map<Charset, CharsetDecoder> map = new HashMap<>();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = decoder(charset, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}
 
Example #16
Source File: IBM943C.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new DoubleByte.Decoder(this,
                                  IBM943.b2c,
                                  b2cSB,
                                  0x40,
                                  0xfc);
}
 
Example #17
Source File: CodeSetCache.java    From JDKSourceCode1.8 with MIT License 5 votes vote down vote up
/**
 * Stores the given CharsetDecoder in the thread local cache,
 * and returns the same converter.
 */
CharsetDecoder setConverter(Object key, CharsetDecoder converter) {
    Map btcMap = ((Map[])converterCaches.get())[BTC_CACHE_MAP];

    btcMap.put(key, converter);

    return converter;
}
 
Example #18
Source File: StringsTest.java    From rxjava-extras with Apache License 2.0 5 votes vote down vote up
@Test
public void testPropagateErrorInTheMiddleOfMultibyte() {
    Observable<byte[]> src = Observable.just(new byte[] { (byte) 0xc2 });
    Observable<byte[]> err = Observable.error(new IOException());
    CharsetDecoder charsetDecoder = Charset.forName("UTF-8").newDecoder();
    try {
        decode(Observable.concat(src, err), charsetDecoder).toList().toBlocking().single();
        fail();
    } catch (RuntimeException e) {
        assertEquals(IOException.class, e.getCause().getClass());
    }
}
 
Example #19
Source File: NIOJISAutoDetectTest.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
static void test(String expectedCharset, byte[] input) throws Exception {
    Charset cs = Charset.forName("x-JISAutoDetect");
    CharsetDecoder autoDetect = cs.newDecoder();

    Charset cs2 = Charset.forName(expectedCharset);
    CharsetDecoder decoder = cs2.newDecoder();

    ByteBuffer bb = ByteBuffer.allocate(128);
    CharBuffer charOutput = CharBuffer.allocate(128);
    CharBuffer charExpected = CharBuffer.allocate(128);

    bb.put(input);
    bb.flip();
    bb.mark();

    CoderResult result = autoDetect.decode(bb, charOutput, true);
    checkCoderResult(result);
    charOutput.flip();
    String actual = charOutput.toString();

    bb.reset();

    result = decoder.decode(bb, charExpected, true);
    checkCoderResult(result);
    charExpected.flip();
    String expected = charExpected.toString();

    check(actual.equals(expected),
          String.format("actual=%s expected=%s", actual, expected));
}
 
Example #20
Source File: BigIntUtilities.java    From secretshare with GNU Lesser General Public License v2.1 5 votes vote down vote up
public static boolean isValidUTF8(byte[] input)
{

    CharsetDecoder cs = Charset.forName(UTF8).newDecoder();

    try
    {
        cs.decode(ByteBuffer.wrap(input));
        return true;
    }
    catch (CharacterCodingException e)
    {
        return false;
    }
}
 
Example #21
Source File: CodeSetCache.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Stores the given CharsetDecoder in the thread local cache,
 * and returns the same converter.
 */
CharsetDecoder setConverter(Object key, CharsetDecoder converter) {
    Map btcMap = ((Map[])converterCaches.get())[BTC_CACHE_MAP];

    btcMap.put(key, converter);

    return converter;
}
 
Example #22
Source File: TextDecoder.java    From yajsync with GNU General Public License v3.0 5 votes vote down vote up
public static TextDecoder newStrict(Charset charset)
{
    CharsetDecoder encoder = charset.newDecoder().
        onMalformedInput(CodingErrorAction.REPORT).
        onUnmappableCharacter(CodingErrorAction.REPORT);
    TextDecoder instance = new TextDecoder(encoder);
    return instance;
}
 
Example #23
Source File: AbstractCommand.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
@Override
protected CharsetDecoder initialValue() {
  return asciiCharset.newDecoder();
}
 
Example #24
Source File: JIS_X_0201_OLD.java    From jdk8u-dev-jdk with GNU General Public License v2.0 4 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new Decoder(this);
}
 
Example #25
Source File: ISO2022_KR.java    From dragonwell8_jdk with GNU General Public License v2.0 4 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new Decoder(this);
}
 
Example #26
Source File: IBM943C_OLD.java    From jdk8u-dev-jdk with GNU General Public License v2.0 4 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new Decoder(this);
}
 
Example #27
Source File: EUC_JP.java    From jdk8u-jdk with GNU General Public License v2.0 4 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new Decoder(this);
}
 
Example #28
Source File: PCK_OLD.java    From hottub with GNU General Public License v2.0 4 votes vote down vote up
public CharsetDecoder newDecoder() {
    return new Decoder(this);
}
 
Example #29
Source File: CharsetUtil.java    From simple-netty-source with Apache License 2.0 4 votes vote down vote up
@Override
protected Map<Charset, CharsetDecoder> initialValue() {
    return new IdentityHashMap<Charset, CharsetDecoder>();
}
 
Example #30
Source File: BaseFileManager.java    From lua-for-android with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
    String encName = getEncodingName();
    CharsetDecoder decoder;
    try {
        decoder = getDecoder(encName, ignoreEncodingErrors);
    } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
        log.error(Errors.UnsupportedEncoding(encName));
        return (CharBuffer) CharBuffer.allocate(1).flip();
    }

    // slightly overestimate the buffer size to avoid reallocation.
    float factor =
        decoder.averageCharsPerByte() * 0.8f +
        decoder.maxCharsPerByte() * 0.2f;
    CharBuffer dest = CharBuffer.
        allocate(10 + (int)(inbuf.remaining()*factor));

    while (true) {
        CoderResult result = decoder.decode(inbuf, dest, true);
        dest.flip();

        if (result.isUnderflow()) { // done reading
            // make sure there is at least one extra character
            if (dest.limit() == dest.capacity()) {
                dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
                dest.flip();
            }
            return dest;
        } else if (result.isOverflow()) { // buffer too small; expand
            int newCapacity =
                10 + dest.capacity() +
                (int)(inbuf.remaining()*decoder.maxCharsPerByte());
            dest = CharBuffer.allocate(newCapacity).put(dest);
        } else if (result.isMalformed() || result.isUnmappable()) {
            // bad character in input
            StringBuilder unmappable = new StringBuilder();
            int len = result.length();

            for (int i = 0; i < len; i++) {
                unmappable.append(String.format("%02X", inbuf.get()));
            }

            String charsetName = charset == null ? encName : charset.name();

            log.error(dest.limit(),
                      Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));

            // undo the flip() to prepare the output buffer
            // for more translation
            dest.position(dest.limit());
            dest.limit(dest.capacity());
            dest.put((char)0xfffd); // backward compatible
        } else {
            throw new AssertionError(result);
        }
    }
    // unreached
}