Java Code Examples for java.nio.charset.CharsetDecoder#reset()

The following examples show how to use java.nio.charset.CharsetDecoder#reset() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SynonymGraphFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(file), decoder));
  }
  return parser.build();
}
 
Example 2
Source File: CBUtil.java    From stratio-cassandra with Apache License 2.0 6 votes vote down vote up
private static String decodeString(ByteBuffer src) throws CharacterCodingException
{
    // the decoder needs to be reset every time we use it, hence the copy per thread
    CharsetDecoder theDecoder = decoder.get();
    theDecoder.reset();

    final CharBuffer dst = CharBuffer.allocate(
            (int) ((double) src.remaining() * theDecoder.maxCharsPerByte()));

    CoderResult cr = theDecoder.decode(src, dst, true);
    if (!cr.isUnderflow())
        cr.throwException();

    cr = theDecoder.flush(dst);
    if (!cr.isUnderflow())
        cr.throwException();

    return dst.flip().toString();
}
 
Example 3
Source File: CharsetUtil.java    From netty4.0.27Learn with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = InternalThreadLocalMap.get().charsetDecoderCache();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}
 
Example 4
Source File: CharsetUtil.java    From simple-netty-source with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = decoders.get();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}
 
Example 5
Source File: JsonUtils.java    From BigApp_Discuz_Android with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static final <T> T parseObject(byte[] input, int off, int len, CharsetDecoder charsetDecoder, Type clazz,
                                      Feature... features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = ThreadLocalCache.getChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charByte = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charByte);

    int position = charByte.position();

    return (T) parseObject(chars, position, clazz, features);
}
 
Example 6
Source File: CharsetUtil.java    From android-netty with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = decoders.get();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}
 
Example 7
Source File: UTF7CharsetTest.java    From ph-commons with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecodeLimitedOutput () throws Exception
{
  final CharsetDecoder decoder = tested.newDecoder ();
  ByteBuffer in = CharsetTestHelper.wrap ("+IKwA4QDp-");
  CharBuffer out = CharBuffer.allocate (3);
  assertEquals (CoderResult.UNDERFLOW, decoder.decode (in, out, true));
  assertEquals (CoderResult.UNDERFLOW, decoder.flush (out));
  out.flip ();
  assertEquals ("€áé", out.toString ());
  decoder.reset ();
  in = CharsetTestHelper.wrap ("A+ImIDkQ.");
  out = CharBuffer.allocate (4);
  assertEquals (CoderResult.UNDERFLOW, decoder.decode (in, out, true));
  out.flip ();
  assertEquals ("A\u2262\u0391.", out.toString ());
}
 
Example 8
Source File: JSON.java    From uavstack with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public static <T> T parseObject(byte[] input, //
                                int off, //
                                int len, //
                                CharsetDecoder charsetDecoder, //
                                Type clazz, //
                                Feature... features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = allocateChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charByte = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charByte);

    int position = charByte.position();

    return (T) parseObject(chars, position, clazz, features);
}
 
Example 9
Source File: Encode.java    From keycloak with Apache License 2.0 6 votes vote down vote up
public static String decodePath(String path)
{
   Matcher matcher = encodedCharsMulti.matcher(path);
   int start=0;
   StringBuilder builder = new StringBuilder();
   CharsetDecoder decoder = Charset.forName(UTF_8).newDecoder();
   while (matcher.find())
   {
 	 builder.append(path, start, matcher.start());
      decoder.reset();
      String decoded = decodeBytes(matcher.group(1), decoder);
      builder.append(decoded);
      start = matcher.end();
   }
   builder.append(path, start, path.length());
   return builder.toString();
}
 
Example 10
Source File: GremlinAPI.java    From hugegraph with Apache License 2.0 6 votes vote down vote up
public String name() {
    // Get the first line of script as the name
    String firstLine = this.gremlin.split("\r\n|\r|\n", 2)[0];
    final Charset charset = Charset.forName(CHARSET);
    final byte[] bytes = firstLine.getBytes(charset);
    if (bytes.length <= MAX_NAME_LENGTH) {
        return firstLine;
    }

    /*
     * Reference https://stackoverflow.com/questions/3576754/truncating-strings-by-bytes
     */
    CharsetDecoder decoder = charset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.IGNORE);
    decoder.reset();

    ByteBuffer buffer = ByteBuffer.wrap(bytes, 0, MAX_NAME_LENGTH);
    try {
        return decoder.decode(buffer).toString();
    } catch (CharacterCodingException e) {
        throw new HugeException("Failed to decode truncated bytes of " +
                                "gremlin first line", e);
    }
}
 
Example 11
Source File: UTF7CharsetModifiedTest.java    From ph-commons with Apache License 2.0 6 votes vote down vote up
@Test
public void testDecodeNoClosing () throws Exception
{
  ByteBuffer in = CharsetTestHelper.wrap ("&");
  CharBuffer out = CharBuffer.allocate (1024);
  final CharsetDecoder decoder = tested.newDecoder ();
  CoderResult result = decoder.decode (in, out, true);
  assertEquals (CoderResult.UNDERFLOW, result);
  result = decoder.flush (out);
  assertEquals (CoderResult.malformedForLength (1), result);
  assertEquals (1, in.position ());
  assertEquals (0, out.position ());
  in = CharsetTestHelper.wrap ("&AO");
  out = CharBuffer.allocate (1024);
  decoder.reset ();
  result = decoder.decode (in, out, true);
  assertEquals (CoderResult.UNDERFLOW, result);
  result = decoder.flush (out);
  assertEquals (CoderResult.malformedForLength (1), result);
  assertEquals (3, in.position ());
  assertEquals (0, out.position ());
}
 
Example 12
Source File: IOUtils.java    From apm-agent-java with Apache License 2.0 5 votes vote down vote up
private static CoderResult decode(CharBuffer charBuffer, ByteBuffer buffer) {
    final CharsetDecoder charsetDecoder = threadLocalCharsetDecoder.get();
    try {
        final CoderResult coderResult = charsetDecoder.decode(buffer, charBuffer, true);
        charsetDecoder.flush(charBuffer);
        return coderResult;
    } finally {
        ((Buffer) buffer).clear();
        charsetDecoder.reset();
    }
}
 
Example 13
Source File: CharsetDecoderTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public void test_ByteArray_decode_no_offset() throws Exception {
    CharsetDecoder decoder = Charset.forName("UTF-16").newDecoder();
    byte[] arr = encode("UTF-16", "Android");
    ByteBuffer inBuffer = ByteBuffer.wrap(arr, 0, arr.length).slice();
    CharBuffer outBuffer = CharBuffer.allocate(arr.length);
    decoder.reset();
    CoderResult coderResult = decoder.decode(inBuffer, outBuffer, true);
    assertFalse(coderResult.toString(), coderResult.isError());
    decoder.flush(outBuffer);
    outBuffer.flip();
    assertEquals("Android", outBuffer.toString().trim());
}
 
Example 14
Source File: LuceneUtil.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Truncate a single field so that it does not exceed Lucene's byte size limit on indexed terms.
 *
 * @param field the string to be indexed
 * @return a string that can be indexed which is within Lucene's byte size limit, or null if anything goes wrong
 */
public static String truncateIndexField(String field) {
    if (field == null) {
        return field;
    }

    Charset charset = Charset.defaultCharset();
    byte[] bytes = field.getBytes(charset);
    if (bytes.length <= IndexWriter.MAX_TERM_LENGTH) {
        return field;
    }

    // chop the field to maximum allowed byte length
    ByteBuffer bbuf = ByteBuffer.wrap(bytes, 0, IndexWriter.MAX_TERM_LENGTH);

    try {
        // decode the chopped byte buffer back into original charset
        CharsetDecoder decoder = charset.newDecoder();
        decoder.onMalformedInput(CodingErrorAction.IGNORE);
        decoder.reset();
        CharBuffer cbuf = decoder.decode(bbuf);
        return cbuf.toString();
    } catch (CharacterCodingException shouldNotHappen) {}

    // if we get here, something bad has happened
    return null;
}
 
Example 15
Source File: AbstractAdaptiveByteBuffer.java    From craft-atom with MIT License 4 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public String getString(CharsetDecoder decoder) throws CharacterCodingException {
    if (!hasRemaining()) {
        return "";
    }

    boolean utf16 = decoder.charset().name().startsWith("UTF-16");

    int oldPos = position();
    int oldLimit = limit();
    int end = -1;
    int newPos;

    if (!utf16) {
        end = indexOf((byte) 0x00);
        if (end < 0) {
            newPos = end = oldLimit;
        } else {
            newPos = end + 1;
        }
    } else {
        int i = oldPos;
        for (;;) {
            boolean wasZero = get(i) == 0;
            i++;

            if (i >= oldLimit) {
                break;
            }

            if (get(i) != 0) {
                i++;
                if (i >= oldLimit) {
                    break;
                }

                continue;
            }

            if (wasZero) {
                end = i - 1;
                break;
            }
        }

        if (end < 0) {
            newPos = end = oldPos + (oldLimit - oldPos & 0xFFFFFFFE);
        } else {
            if (end + 2 <= oldLimit) {
                newPos = end + 2;
            } else {
                newPos = end;
            }
        }
    }

    if (oldPos == end) {
        position(newPos);
        return "";
    }

    limit(end);
    decoder.reset();

    int expectedLength = (int) (remaining() * decoder.averageCharsPerByte()) + 1;
    CharBuffer out = CharBuffer.allocate(expectedLength);
    for (;;) {
        CoderResult cr;
        if (hasRemaining()) {
            cr = decoder.decode(buf(), out, true);
        } else {
            cr = decoder.flush(out);
        }

        if (cr.isUnderflow()) {
            break;
        }

        if (cr.isOverflow()) {
            CharBuffer o = CharBuffer.allocate(out.capacity() + expectedLength);
            out.flip();
            o.put(out);
            out = o;
            continue;
        }

        if (cr.isError()) {
            // Revert the buffer back to the previous state.
            limit(oldLimit);
            position(oldPos);
            cr.throwException();
        }
    }

    limit(oldLimit);
    position(newPos);
    return out.flip().toString();
}
 
Example 16
Source File: AbstractIoBuffer.java    From neoscada with Eclipse Public License 1.0 4 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public String getString(int fieldSize, CharsetDecoder decoder) throws CharacterCodingException {
    checkFieldSize(fieldSize);

    if (fieldSize == 0) {
        return "";
    }

    if (!hasRemaining()) {
        return "";
    }

    boolean utf16 = decoder.charset().name().startsWith("UTF-16");

    if (utf16 && (fieldSize & 1) != 0) {
        throw new IllegalArgumentException("fieldSize is not even.");
    }

    int oldPos = position();
    int oldLimit = limit();
    int end = oldPos + fieldSize;

    if (oldLimit < end) {
        throw new BufferUnderflowException();
    }

    int i;

    if (!utf16) {
        for (i = oldPos; i < end; i++) {
            if (get(i) == 0) {
                break;
            }
        }

        if (i == end) {
            limit(end);
        } else {
            limit(i);
        }
    } else {
        for (i = oldPos; i < end; i += 2) {
            if (get(i) == 0 && get(i + 1) == 0) {
                break;
            }
        }

        if (i == end) {
            limit(end);
        } else {
            limit(i);
        }
    }

    if (!hasRemaining()) {
        limit(oldLimit);
        position(end);
        return "";
    }
    decoder.reset();

    int expectedLength = (int) (remaining() * decoder.averageCharsPerByte()) + 1;
    CharBuffer out = CharBuffer.allocate(expectedLength);
    for (;;) {
        CoderResult cr;
        if (hasRemaining()) {
            cr = decoder.decode(buf(), out, true);
        } else {
            cr = decoder.flush(out);
        }

        if (cr.isUnderflow()) {
            break;
        }

        if (cr.isOverflow()) {
            CharBuffer o = CharBuffer.allocate(out.capacity() + expectedLength);
            out.flip();
            o.put(out);
            out = o;
            continue;
        }

        if (cr.isError()) {
            // Revert the buffer back to the previous state.
            limit(oldLimit);
            position(oldPos);
            cr.throwException();
        }
    }

    limit(oldLimit);
    position(end);
    return out.flip().toString();
}
 
Example 17
Source File: AbstractIoBuffer.java    From neoscada with Eclipse Public License 1.0 4 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public String getString(CharsetDecoder decoder) throws CharacterCodingException {
    if (!hasRemaining()) {
        return "";
    }

    boolean utf16 = decoder.charset().name().startsWith("UTF-16");

    int oldPos = position();
    int oldLimit = limit();
    int end = -1;
    int newPos;

    if (!utf16) {
        end = indexOf((byte) 0x00);
        if (end < 0) {
            newPos = end = oldLimit;
        } else {
            newPos = end + 1;
        }
    } else {
        int i = oldPos;
        for (;;) {
            boolean wasZero = get(i) == 0;
            i++;

            if (i >= oldLimit) {
                break;
            }

            if (get(i) != 0) {
                i++;
                if (i >= oldLimit) {
                    break;
                }

                continue;
            }

            if (wasZero) {
                end = i - 1;
                break;
            }
        }

        if (end < 0) {
            newPos = end = oldPos + (oldLimit - oldPos & 0xFFFFFFFE);
        } else {
            if (end + 2 <= oldLimit) {
                newPos = end + 2;
            } else {
                newPos = end;
            }
        }
    }

    if (oldPos == end) {
        position(newPos);
        return "";
    }

    limit(end);
    decoder.reset();

    int expectedLength = (int) (remaining() * decoder.averageCharsPerByte()) + 1;
    CharBuffer out = CharBuffer.allocate(expectedLength);
    for (;;) {
        CoderResult cr;
        if (hasRemaining()) {
            cr = decoder.decode(buf(), out, true);
        } else {
            cr = decoder.flush(out);
        }

        if (cr.isUnderflow()) {
            break;
        }

        if (cr.isOverflow()) {
            CharBuffer o = CharBuffer.allocate(out.capacity() + expectedLength);
            out.flip();
            o.put(out);
            out = o;
            continue;
        }

        if (cr.isError()) {
            // Revert the buffer back to the previous state.
            limit(oldLimit);
            position(oldPos);
            cr.throwException();
        }
    }

    limit(oldLimit);
    position(newPos);
    return out.flip().toString();
}
 
Example 18
Source File: JsonUtils.java    From BigApp_Discuz_Android with Apache License 2.0 4 votes vote down vote up
public static final Object parse(byte[] input, int off, int len, CharsetDecoder charsetDecoder, int features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = ThreadLocalCache.getChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charBuf = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charBuf);

    int position = charBuf.position();

    DefaultJSONParser parser = new DefaultJSONParser(chars, position, ParserConfig.getGlobalInstance(), features);
    Object value = parser.parse();

    handleResovleTask(parser, value);

    parser.close();

    return value;
}
 
Example 19
Source File: TerminalEmulator.java    From Ansole with GNU General Public License v2.0 4 votes vote down vote up
private boolean handleUTF8Sequence(byte b) {
    if (mUTF8ToFollow == 0 && (b & 0x80) == 0) {
        // ASCII character -- we don't need to handle this
        return false;
    }

    if (mUTF8ToFollow > 0) {
        if ((b & 0xc0) != 0x80) {
            /* Not a UTF-8 continuation byte (doesn't begin with 0b10)
               Replace the entire sequence with the replacement char */
            mUTF8ToFollow = 0;
            mUTF8ByteBuffer.clear();
            emit(UNICODE_REPLACEMENT_CHAR);

            /* The Unicode standard (section 3.9, definition D93) requires
             * that we now attempt to process this byte as though it were
             * the beginning of another possibly-valid sequence */
            return handleUTF8Sequence(b);
        }

        mUTF8ByteBuffer.put(b);
        if (--mUTF8ToFollow == 0) {
            // Sequence complete -- decode and emit it
            ByteBuffer byteBuf = mUTF8ByteBuffer;
            CharBuffer charBuf = mInputCharBuffer;
            CharsetDecoder decoder = mUTF8Decoder;

            byteBuf.rewind();
            decoder.reset();
            decoder.decode(byteBuf, charBuf, true);
            decoder.flush(charBuf);

            char[] chars = charBuf.array();
            if (chars[0] >= 0x80 && chars[0] <= 0x9f) {
                /* Sequence decoded to a C1 control character which needs
                   to be sent through process() again */
                process((byte) chars[0], false);
            } else {
                emit(chars);
            }

            byteBuf.clear();
            charBuf.clear();
        }
    } else {
        if ((b & 0xe0) == 0xc0) { // 0b110 -- two-byte sequence
            mUTF8ToFollow = 1;
        } else if ((b & 0xf0) == 0xe0) { // 0b1110 -- three-byte sequence
            mUTF8ToFollow = 2;
        } else if ((b & 0xf8) == 0xf0) { // 0b11110 -- four-byte sequence
            mUTF8ToFollow = 3;
        } else {
            // Not a valid UTF-8 sequence start -- replace this char
            emit(UNICODE_REPLACEMENT_CHAR);
            return true;
        }

        mUTF8ByteBuffer.put(b);
    }

    return true;
}
 
Example 20
Source File: VisorTaskUtils.java    From ignite with Apache License 2.0 3 votes vote down vote up
/**
 * Decode file charset.
 *
 * @param f File to process.
 * @return File charset.
 * @throws IOException in case of error.
 */
public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"};

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
        if (charsets.containsKey(c))
            orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
        FileChannel ch = raf.getChannel();

        ByteBuffer buf = ByteBuffer.allocate(DFLT_BUFFER_SIZE);

        ch.read(buf);

        buf.flip();

        for (Charset charset : orderedCharsets) {
            CharsetDecoder decoder = charset.newDecoder();

            decoder.reset();

            try {
                decoder.decode(buf);

                return charset;
            }
            catch (CharacterCodingException ignored) {
            }
        }
    }

    return Charset.defaultCharset();
}