Java Code Examples for java.nio.charset.CoderResult#isMalformed()

The following examples show how to use java.nio.charset.CoderResult#isMalformed() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BasicURLCanonicalizer.java    From webarchive-commons with Apache License 2.0 6 votes vote down vote up
/**
 * Decodes bytes in bbuf as utf-8 and appends decoded characters to sb. If
 * decoding of any portion fails, appends the un-decodable %xx%xx sequence
 * extracted from inputStr instead of decoded characters. See "bad unicode"
 * tests in GoogleCanonicalizerTest#testDecode(). Variables only make sense
 * within context of {@link #decode(String)}.
 * 
 * @param sb
 *            StringBuilder to append to
 * @param bbuf
 *            raw bytes decoded from %-encoded input
 * @param inputStr
 *            full input string
 * @param seqStart
 *            start index inclusive within inputStr of %-encoded sequence
 * @param seqEnd
 *            end index exclusive within inputStr of %-encoded sequence
 * @param utf8decoder
 */
private void appendDecodedPctUtf8(StringBuilder sb, ByteBuffer bbuf,
		String inputStr, int seqStart, int seqEnd,
		CharsetDecoder utf8decoder) {
	// assert bbuf.position() * 3 == seqEnd - seqStart;
	utf8decoder.reset();
	CharBuffer cbuf = CharBuffer.allocate(bbuf.position());
	bbuf.flip();
	while (bbuf.position() < bbuf.limit()) {
		CoderResult coderResult = utf8decoder.decode(bbuf, cbuf, true);
		sb.append(cbuf.flip());
		if (coderResult.isMalformed()) {
			// put the malformed %xx%xx into the result un-decoded
			CharSequence undecodablePctHex = inputStr.subSequence(seqStart
					+ 3 * bbuf.position(), seqStart + 3 * bbuf.position()
					+ 3 * coderResult.length());
			sb.append(undecodablePctHex);

			// there could be more good stuff after the bad
			bbuf.position(bbuf.position() + coderResult.length());
		}
		cbuf.clear();
	}
}
 
Example 2
Source File: BinaryTruncator.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
Validity checkValidity(ByteBuffer buffer) {
  int pos = buffer.position();
  CoderResult result = CoderResult.OVERFLOW;
  while (result.isOverflow()) {
    dummyBuffer.clear();
    result = decoder.decode(buffer, dummyBuffer, true);
  }
  buffer.position(pos);
  if (result.isUnderflow()) {
    return Validity.VALID;
  } else if (result.isMalformed()) {
    return Validity.MALFORMED;
  } else {
    return Validity.UNMAPPABLE;
  }
}
 
Example 3
Source File: ResettableFileInputStream.java    From mt-flume with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized int readChar() throws IOException {
  if (!buf.hasRemaining()) {
    refillBuf();
  }

  int start = buf.position();
  charBuf.clear();

  boolean isEndOfInput = false;
  if (position >= fileSize) {
    isEndOfInput = true;
  }

  CoderResult res = decoder.decode(buf, charBuf, isEndOfInput);
  if (res.isMalformed() || res.isUnmappable()) {
    res.throwException();
  }

  int delta = buf.position() - start;

  charBuf.flip();
  if (charBuf.hasRemaining()) {
    char c = charBuf.get();
    // don't increment the persisted location if we are in between a
    // surrogate pair, otherwise we may never recover if we seek() to this
    // location!
    incrPosition(delta, !Character.isHighSurrogate(c));
    return c;

  // there may be a partial character in the decoder buffer
  } else {
    incrPosition(delta, false);
    return -1;
  }

}
 
Example 4
Source File: UrlCanonicalizer.java    From outbackcdx with Apache License 2.0 5 votes vote down vote up
private static void tryDecodeUtf8(ByteBuffer bb, StringBuilder out) {
    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
    CharBuffer cb = CharBuffer.allocate(bb.remaining());
    while (bb.hasRemaining()) {
        CoderResult result = decoder.decode(bb, cb, true);
        if (result.isMalformed()) {
            for (int i = 0; i < result.length(); i++) {
                out.append('%').append(String.format("%02x", bb.get()));
            }
        }
        out.append(cb.flip());
        cb.clear();
    }
}
 
Example 5
Source File: MboxIterator.java    From sling-samples with Apache License 2.0 5 votes vote down vote up
private void decodeNextCharBuffer() throws CharConversionException {
    CoderResult coderResult = DECODER.decode(byteBuffer, mboxCharBuffer, endOfInputFlag);
    updateEndOfInputFlag();
    mboxCharBuffer.flip();
    if (coderResult.isError()) {
        if (coderResult.isMalformed()) {
            throw new CharConversionException("Malformed input!");
        } else if (coderResult.isUnmappable()) {
            throw new CharConversionException("Unmappable character!");
        }
    }
}
 
Example 6
Source File: InputStreamReader.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Reads at most {@code length} characters from this reader and stores them
 * at position {@code offset} in the character array {@code buf}. Returns
 * the number of characters actually read or -1 if the end of the reader has
 * been reached. The bytes are either obtained from converting bytes in this
 * reader's buffer or by first filling the buffer from the source
 * InputStream and then reading from the buffer.
 *
 * @param buf
 *            the array to store the characters read.
 * @param offset
 *            the initial position in {@code buf} to store the characters
 *            read from this reader.
 * @param length
 *            the maximum number of characters to read.
 * @return the number of characters read or -1 if the end of the reader has
 *         been reached.
 * @throws IndexOutOfBoundsException
 *             if {@code offset < 0} or {@code length < 0}, or if
 *             {@code offset + length} is greater than the length of
 *             {@code buf}.
 * @throws IOException
 *             if this reader is closed or some other I/O error occurs.
 */
@Override
public int read(char[] buf, int offset, int length) throws IOException {
    synchronized (lock) {
        if (!isOpen()) {
            throw new IOException("InputStreamReader is closed.");
        }
        if (offset < 0 || offset > buf.length - length || length < 0) {
            throw new IndexOutOfBoundsException();
        }
        if (length == 0) {
            return 0;
        }

        CharBuffer out = CharBuffer.wrap(buf, offset, length);
        CoderResult result = CoderResult.UNDERFLOW;

        // bytes.remaining() indicates number of bytes in buffer
        // when 1-st time entered, it'll be equal to zero
        boolean needInput = !bytes.hasRemaining();

        while (out.hasRemaining()) {
            // fill the buffer if needed
            if (needInput) {
                try {
                    if ((in.available() == 0)
                        && (out.position() > offset)) {
                        // we could return the result without blocking read
                        break;
                    }
                } catch (IOException e) {
                    // available didn't work so just try the read
                }

                int to_read = bytes.capacity() - bytes.limit();
                int off = bytes.arrayOffset() + bytes.limit();
                int was_red = in.read(bytes.array(), off, to_read);

                if (was_red == -1) {
                    endOfInput = true;
                    break;
                } else if (was_red == 0) {
                    break;
                }
                bytes.limit(bytes.limit() + was_red);
                needInput = false;
            }

            // decode bytes
            result = decoder.decode(bytes, out, false);

            if (result.isUnderflow()) {
                // compact the buffer if no space left
                if (bytes.limit() == bytes.capacity()) {
                    bytes.compact();
                    bytes.limit(bytes.position());
                    bytes.position(0);
                }
                needInput = true;
            } else {
                break;
            }
        }

        if (result == CoderResult.UNDERFLOW && endOfInput) {
            result = decoder.decode(bytes, out, true);
            decoder.flush(out);
            decoder.reset();
        }
        if (result.isMalformed()) {
            throw new MalformedInputException(result.length());
        } else if (result.isUnmappable()) {
            throw new UnmappableCharacterException(result.length());
        }

        return out.position() - offset == 0 ? -1 : out.position() - offset;
    }
}
 
Example 7
Source File: InputStreamReader.java    From jtransc with Apache License 2.0 4 votes vote down vote up
/**
 * Reads up to {@code count} characters from this reader and stores them
 * at position {@code offset} in the character array {@code buffer}. Returns
 * the number of characters actually read or -1 if the end of the reader has
 * been reached. The bytes are either obtained from converting bytes in this
 * reader's buffer or by first filling the buffer from the source
 * InputStream and then reading from the buffer.
 *
 * @throws IndexOutOfBoundsException
 *     if {@code offset < 0 || count < 0 || offset + count > buffer.length}.
 * @throws IOException
 *             if this reader is closed or some other I/O error occurs.
 */
@Override
public int read(char[] buffer, int offset, int count) throws IOException {
    synchronized (lock) {
        if (!isOpen()) {
            throw new IOException("InputStreamReader is closed");
        }

        JTranscArrays.checkOffsetAndCount(buffer.length, offset, count);
        if (count == 0) {
            return 0;
        }

        CharBuffer out = CharBuffer.wrap(buffer, offset, count);
        CoderResult result = CoderResult.UNDERFLOW;

        // bytes.remaining() indicates number of bytes in buffer
        // when 1-st time entered, it'll be equal to zero
        boolean needInput = !bytes.hasRemaining();

        while (out.hasRemaining()) {
            // fill the buffer if needed
            if (needInput) {
                try {
                    if (in.available() == 0 && out.position() > offset) {
                        // we could return the result without blocking read
                        break;
                    }
                } catch (IOException e) {
                    // available didn't work so just try the read
                }

                int desiredByteCount = bytes.capacity() - bytes.limit();
                int off = bytes.arrayOffset() + bytes.limit();
                int actualByteCount = in.read(bytes.array(), off, desiredByteCount);

                if (actualByteCount == -1) {
                    endOfInput = true;
                    break;
                } else if (actualByteCount == 0) {
                    break;
                }
                bytes.limit(bytes.limit() + actualByteCount);
                needInput = false;
            }

            // decode bytes
            result = decoder.decode(bytes, out, false);

            if (result.isUnderflow()) {
                // compact the buffer if no space left
                if (bytes.limit() == bytes.capacity()) {
                    bytes.compact();
                    bytes.limit(bytes.position());
                    bytes.position(0);
                }
                needInput = true;
            } else {
                break;
            }
        }

        if (result == CoderResult.UNDERFLOW && endOfInput) {
            result = decoder.decode(bytes, out, true);
            decoder.flush(out);
            decoder.reset();
        }
        if (result.isMalformed() || result.isUnmappable()) {
            result.throwException();
        }

        return out.position() - offset == 0 ? -1 : out.position() - offset;
    }
}
 
Example 8
Source File: C2BConverter.java    From tomcatsrc with Apache License 2.0 4 votes vote down vote up
/**
 * Convert the given characters to bytes.
 * 
 * @param cc char input
 * @param bc byte output
 */
public void convert(CharChunk cc, ByteChunk bc) 
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), 
                bc.getBuffer().length - bc.getEnd());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getBuffer().length);
        bb.position(bc.getEnd());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), 
                cc.getLength());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getEnd());
        cb.position(cc.getStart());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put((char) cc.substract());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.getLength());
            cc.substract(leftovers.array(), 0, cc.getLength());
        }
    }
}
 
Example 9
Source File: B2CConverter.java    From tomcatsrc with Apache License 2.0 4 votes vote down vote up
/**
 * Convert the given bytes to characters.
 * 
 * @param bc byte input
 * @param cc char output
 * @param endOfInput    Is this all of the available data
 */
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getEnd());
        bb.position(bc.getStart());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), 
                cc.getBuffer().length - cc.getEnd());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getBuffer().length);
        cb.position(cc.getEnd());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = cb.position();
        // Loop until one char is decoded or there is a decoder error
        do {
            leftovers.put(bc.substractB());
            leftovers.flip();
            result = decoder.decode(leftovers, cb, endOfInput);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (cb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        bb.position(bc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = decoder.decode(bb, cb, endOfInput);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk, if
        // this continues the char buffer will get resized
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
        // Put leftovers in the leftovers byte buffer
        if (bc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(bc.getLength());
            bc.substract(leftovers.array(), 0, bc.getLength());
        }
    }
}
 
Example 10
Source File: InputStreamReader.java    From TorrentEngine with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Reads at most {@code length} characters from this reader and stores them
 * at position {@code offset} in the character array {@code buf}. Returns
 * the number of characters actually read or -1 if the end of the reader has
 * been reached. The bytes are either obtained from converting bytes in this
 * reader's buffer or by first filling the buffer from the source
 * InputStream and then reading from the buffer.
 * 
 * @param buf
 *            the array to store the characters read.
 * @param offset
 *            the initial position in {@code buf} to store the characters
 *            read from this reader.
 * @param length
 *            the maximum number of characters to read.
 * @return the number of characters read or -1 if the end of the reader has
 *         been reached.
 * @throws IndexOutOfBoundsException
 *             if {@code offset < 0} or {@code length < 0}, or if
 *             {@code offset + length} is greater than the length of
 *             {@code buf}.
 * @throws IOException
 *             if this reader is closed or some other I/O error occurs.
 */
@Override
public int read(char[] buf, int offset, int length) throws IOException {
    synchronized (lock) {
        if (!isOpen()) {
            throw new IOException("InputStreamReader is closed.");
        }
        if (offset < 0 || offset > buf.length - length || length < 0) {
            throw new IndexOutOfBoundsException();
        }
        if (length == 0) {
            return 0;
        }

        CharBuffer out = CharBuffer.wrap(buf, offset, length);
        CoderResult result = CoderResult.UNDERFLOW;

        // bytes.remaining() indicates number of bytes in buffer
        // when 1-st time entered, it'll be equal to zero
        boolean needInput = !bytes.hasRemaining();

        while (out.hasRemaining()) {
            // fill the buffer if needed
            if (needInput) {
                try {
                    if ((in.available() == 0) 
                        && (out.position() > offset)) {
                        // we could return the result without blocking read
                        break;
                    }
                } catch (IOException e) {
                    // available didn't work so just try the read
                }

                int to_read = bytes.capacity() - bytes.limit();
                int off = bytes.arrayOffset() + bytes.limit();
                int was_red = in.read(bytes.array(), off, to_read);

                if (was_red == -1) {
                    endOfInput = true;
                    break;
                } else if (was_red == 0) {
                    break;
                }
                bytes.limit(bytes.limit() + was_red);
                needInput = false;
            }

            // decode bytes
            result = decoder.decode(bytes, out, false);

            if (result.isUnderflow()) {
                // compact the buffer if no space left
                if (bytes.limit() == bytes.capacity()) {
                    bytes.compact();
                    bytes.limit(bytes.position());
                    bytes.position(0);
                }
                needInput = true;
            } else {
                break;
            }
        }

        if (result == CoderResult.UNDERFLOW && endOfInput) {
            result = decoder.decode(bytes, out, true);
            decoder.flush(out);
            decoder.reset();
        }
        if (result.isMalformed()) {
            throw new MalformedInputException(result.length());
        } else if (result.isUnmappable()) {
            throw new UnmappableCharacterException(result.length());
        }

        return out.position() - offset == 0 ? -1 : out.position() - offset;
    }
}
 
Example 11
Source File: B2CConverter.java    From Tomcat8-Source-Read with MIT License 4 votes vote down vote up
/**
 * Convert the given bytes to characters.
 *
 * @param bc byte input
 * @param cc char output
 * @param endOfInput    Is this all of the available data
 *
 * @throws IOException If the conversion can not be completed
 */
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getEnd());
        bb.position(bc.getStart());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
                cc.getBuffer().length - cc.getEnd());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getBuffer().length);
        cb.position(cc.getEnd());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = cb.position();
        // Loop until one char is decoded or there is a decoder error
        do {
            leftovers.put(bc.substractB());
            leftovers.flip();
            result = decoder.decode(leftovers, cb, endOfInput);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (cb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        bb.position(bc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = decoder.decode(bb, cb, endOfInput);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk, if
        // this continues the char buffer will get resized
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
        // Put leftovers in the leftovers byte buffer
        if (bc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(bc.getLength());
            bc.substract(leftovers.array(), 0, bc.getLength());
        }
    }
}
 
Example 12
Source File: BaseFileManager.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
@SuppressWarnings("cast")
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
    String encName = getEncodingName();
    CharsetDecoder decoder;
    try {
        decoder = getDecoder(encName, ignoreEncodingErrors);
    } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
        log.error("unsupported.encoding", encName);
        return (CharBuffer)CharBuffer.allocate(1).flip();
    }

    // slightly overestimate the buffer size to avoid reallocation.
    float factor =
        decoder.averageCharsPerByte() * 0.8f +
        decoder.maxCharsPerByte() * 0.2f;
    CharBuffer dest = CharBuffer.
        allocate(10 + (int)(inbuf.remaining()*factor));

    while (true) {
        CoderResult result = decoder.decode(inbuf, dest, true);
        dest.flip();

        if (result.isUnderflow()) { // done reading
            // make sure there is at least one extra character
            if (dest.limit() == dest.capacity()) {
                dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
                dest.flip();
            }
            return dest;
        } else if (result.isOverflow()) { // buffer too small; expand
            int newCapacity =
                10 + dest.capacity() +
                (int)(inbuf.remaining()*decoder.maxCharsPerByte());
            dest = CharBuffer.allocate(newCapacity).put(dest);
        } else if (result.isMalformed() || result.isUnmappable()) {
            // bad character in input
            StringBuilder unmappable = new StringBuilder();
            int len = result.length();

            for (int i = 0; i < len; i++) {
                unmappable.append(String.format("%02X", inbuf.get()));
            }

            String charsetName = charset == null ? encName : charset.name();

            log.error(dest.limit(),
                      Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));

            // undo the flip() to prepare the output buffer
            // for more translation
            dest.position(dest.limit());
            dest.limit(dest.capacity());
            dest.put((char)0xfffd); // backward compatible
        } else {
            throw new AssertionError(result);
        }
    }
    // unreached
}
 
Example 13
Source File: C2BConverter.java    From Tomcat7.0.67 with Apache License 2.0 4 votes vote down vote up
/**
 * Convert the given characters to bytes.
 * 
 * @param cc char input
 * @param bc byte output
 */
public void convert(CharChunk cc, ByteChunk bc) 
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), 
                bc.getBuffer().length - bc.getEnd());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getBuffer().length);
        bb.position(bc.getEnd());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), 
                cc.getLength());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getEnd());
        cb.position(cc.getStart());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put((char) cc.substract());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.getLength());
            cc.substract(leftovers.array(), 0, cc.getLength());
        }
    }
}
 
Example 14
Source File: B2CConverter.java    From Tomcat7.0.67 with Apache License 2.0 4 votes vote down vote up
/**
 * Convert the given bytes to characters.
 * 
 * @param bc byte input
 * @param cc char output
 * @param endOfInput    Is this all of the available data
 */
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getEnd());
        bb.position(bc.getStart());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), 
                cc.getBuffer().length - cc.getEnd());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getBuffer().length);
        cb.position(cc.getEnd());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = cb.position();
        // Loop until one char is decoded or there is a decoder error
        do {
            leftovers.put(bc.substractB());
            leftovers.flip();
            result = decoder.decode(leftovers, cb, endOfInput);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (cb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        bb.position(bc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = decoder.decode(bb, cb, endOfInput);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk, if
        // this continues the char buffer will get resized
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
        // Put leftovers in the leftovers byte buffer
        if (bc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(bc.getLength());
            bc.substract(leftovers.array(), 0, bc.getLength());
        }
    }
}
 
Example 15
Source File: FastMatcher.java    From netbeans with Apache License 2.0 4 votes vote down vote up
@Override
public char charAt(int index) {

    if (index < lastIndex) {
        returns++;
    }
    lastIndex = index;
    if (index > length()) {
        throw new IndexOutOfBoundsException();
    }
    if (isInBuffer(index)) {
        return getFromBuffer(index);
    } else {
        if (index < currentStart || currentStart == -1) {
            reset();
        }
        retrieves++;
        MappedByteBuffer mappedByteBuffer = null;
        try {
            while (readBytes < fileSize) {
                try {
                    mappedByteBuffer = fileChannel.map(
                            FileChannel.MapMode.READ_ONLY,
                            readBytes,
                            Math.min(SIZE_LIMIT, fileSize - readBytes));
                    maps++;
                    CoderResult result;
                    do {
                        currentStart = currentStart == -1 ? 0
                                : currentStart + currentBuffer.limit();
                        currentBuffer.clear();
                        result = currentDecoder.decode(mappedByteBuffer,
                                currentBuffer,
                                readBytes + SIZE_LIMIT >= fileSize);
                        currentBuffer.flip();
                        int readChars = currentBuffer.limit();
                        if (currentStart + readChars > index) {
                            return getFromBuffer(index);
                        }
                        if (result.isUnmappable() || result.isMalformed()
                                || result.isError()) {
                            throw new IOException("Error decoding file: "
                                    + result.toString() + " ");
                        }
                    } while (result.isOverflow());
                } finally {
                    if (mappedByteBuffer != null) {
                        int readNow = mappedByteBuffer.position();
                        readBytes += readNow;
                        unmap(mappedByteBuffer);
                    }
                }
            }
            boolean repeat;
            do {
                repeat = currentDecoder.flush(currentBuffer).isOverflow();
                int size = currentBuffer.position();
                if (size + currentStart > index) {
                    currentBuffer.flip();
                    return currentBuffer.get(index - currentStart);
                }
                currentBuffer.clear();
                currentStart += size;
            } while (repeat);
        } catch (IOException ex) {
            if (mappedByteBuffer != null) {
                unmap(mappedByteBuffer);
            }
            Exceptions.printStackTrace(ex);
        }
    }

    throw new IllegalStateException(
            "Cannot get character.");   //NOI18N
}
 
Example 16
Source File: FastMatcher.java    From netbeans with Apache License 2.0 4 votes vote down vote up
/**
 * Compute lenght of this sequence - quite expensive operation, indeed.
 */
@Override
public int length() {
    if (length != -1) {
        return length;
    }
    long start = System.currentTimeMillis();
    int charactersRead = 0;
    long bytesRead = 0;
    MappedByteBuffer mappedByteBuffer = null;
    CharBuffer charBuffer = CharBuffer.allocate(SIZE_LIMIT);
    CharsetDecoder decoder = prepareDecoder(charset);
    decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    try {
        while (bytesRead < fileSize) {
            mappedByteBuffer = fileChannel.map(
                    FileChannel.MapMode.READ_ONLY, bytesRead,
                    Math.min(SIZE_LIMIT, fileSize - bytesRead));
            CoderResult result;
            do {
                charBuffer.clear();
                result = decoder.decode(
                        mappedByteBuffer, charBuffer,
                        bytesRead + SIZE_LIMIT >= fileSize);
                if (result.isUnmappable() || result.isMalformed()
                        || result.isError()) {
                    throw new IOException("Error decoding file: "
                            + result.toString() + " ");
                }
                if (bytesRead + SIZE_LIMIT >= fileSize) {
                    LOG.info("Coding end");
                }
                charactersRead += charBuffer.position();
            } while (result.isOverflow());

            int readNow = mappedByteBuffer.position();
            bytesRead += readNow;
            unmap(mappedByteBuffer);
        }
        charBuffer.clear();
        boolean repeat;
        do {
            repeat = decoder.flush(charBuffer).isOverflow();
            charactersRead += charBuffer.position();
            charBuffer.clear();
        } while (repeat);
    } catch (IOException ex) {
        if (mappedByteBuffer != null) {
            unmap(mappedByteBuffer);
        }
        Exceptions.printStackTrace(ex);
    }
    length = charactersRead;
    LOG.log(Level.INFO, "Length computed in {0} ms.", //NOI18N
            System.currentTimeMillis() - start);
    return length;
}
 
Example 17
Source File: BaseFileManager.java    From lua-for-android with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
    String encName = getEncodingName();
    CharsetDecoder decoder;
    try {
        decoder = getDecoder(encName, ignoreEncodingErrors);
    } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
        log.error(Errors.UnsupportedEncoding(encName));
        return (CharBuffer) CharBuffer.allocate(1).flip();
    }

    // slightly overestimate the buffer size to avoid reallocation.
    float factor =
        decoder.averageCharsPerByte() * 0.8f +
        decoder.maxCharsPerByte() * 0.2f;
    CharBuffer dest = CharBuffer.
        allocate(10 + (int)(inbuf.remaining()*factor));

    while (true) {
        CoderResult result = decoder.decode(inbuf, dest, true);
        dest.flip();

        if (result.isUnderflow()) { // done reading
            // make sure there is at least one extra character
            if (dest.limit() == dest.capacity()) {
                dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
                dest.flip();
            }
            return dest;
        } else if (result.isOverflow()) { // buffer too small; expand
            int newCapacity =
                10 + dest.capacity() +
                (int)(inbuf.remaining()*decoder.maxCharsPerByte());
            dest = CharBuffer.allocate(newCapacity).put(dest);
        } else if (result.isMalformed() || result.isUnmappable()) {
            // bad character in input
            StringBuilder unmappable = new StringBuilder();
            int len = result.length();

            for (int i = 0; i < len; i++) {
                unmappable.append(String.format("%02X", inbuf.get()));
            }

            String charsetName = charset == null ? encName : charset.name();

            log.error(dest.limit(),
                      Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));

            // undo the flip() to prepare the output buffer
            // for more translation
            dest.position(dest.limit());
            dest.limit(dest.capacity());
            dest.put((char)0xfffd); // backward compatible
        } else {
            throw new AssertionError(result);
        }
    }
    // unreached
}
 
Example 18
Source File: C2BConverter.java    From Tomcat8-Source-Read with MIT License 4 votes vote down vote up
/**
 * Convert the given characters to bytes.
 *
 * @param cc char input
 * @param bc byte output
 * @throws IOException An encoding error occurred
 */
public void convert(CharBuffer cc, ByteBuffer bc) throws IOException {
    if ((bb == null) || (bb.array() != bc.array())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.array(), bc.limit(), bc.capacity() - bc.limit());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.capacity());
        bb.position(bc.limit());
    }
    if ((cb == null) || (cb.array() != cc.array())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.array(), cc.arrayOffset() + cc.position(), cc.remaining());
    } else {
        // Initialize the char buffer
        cb.limit(cc.limit());
        cb.position(cc.position());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put(cc.get());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.position());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.limit(bb.position());
        cc.position(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.limit(bb.position());
        cc.position(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.remaining() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.remaining());
            cc.get(leftovers.array(), 0, cc.remaining());
        }
    }
}
 
Example 19
Source File: C2BConverter.java    From Tomcat8-Source-Read with MIT License 4 votes vote down vote up
/**
 * Convert the given characters to bytes.
 *
 * @param cc char input
 * @param bc byte output
 * @throws IOException An encoding error occurred
 */
public void convert(CharChunk cc, ByteChunk bc) throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getBuffer().length);
        bb.position(bc.getEnd());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getEnd());
        cb.position(cc.getStart());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put((char) cc.substract());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.getLength());
            cc.substract(leftovers.array(), 0, cc.getLength());
        }
    }
}
 
Example 20
Source File: Speller.java    From morfologik-stemming with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void findRepl(List<CandidateData> candidates, final int depth, final int node, final byte[] prevBytes, final int wordIndex, final int candIndex) {
  int dist = 0;
  for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
    byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, prevBytes.length + 1);
    byteBuffer.put(prevBytes);
    byteBuffer.put(fsa.getArcLabel(arc));
    final int bufPos = byteBuffer.position();
    byteBuffer.flip();
    decoder.reset();
    // FIXME: this isn't correct -- no checks for overflows, no decoder flush. I don't think this should be in here
    // too, the decoder should run once on accumulated temporary byte buffer (current path) only when there's
    // a potential that this buffer can become a replacement candidate (isEndOfCandidate). Because we assume candidates
    // are valid input strings (this is verified when building the dictionary), it's save a lot of conversions.
    final CoderResult c = decoder.decode(byteBuffer, charBuffer, true);
    if (c.isMalformed()) { // assume that only valid
      // encodings are there
      final byte[] prev = new byte[bufPos];
      byteBuffer.position(0);
      byteBuffer.get(prev);
      if (!fsa.isArcTerminal(arc)) {
        findRepl(candidates, depth, fsa.getEndNode(arc), prev, wordIndex, candIndex); // note: depth is not incremented
      }
      byteBuffer.clear();
    } else if (!c.isError()) { // unmappable characters are silently discarded
      charBuffer.flip();
      candidate[candIndex] = charBuffer.get();
      charBuffer.clear();
      byteBuffer.clear();

      int lengthReplacement;
      // replacement "any to two"
      if ((lengthReplacement = matchAnyToTwo(wordIndex, candIndex)) > 0) {
        // the replacement takes place at the end of the candidate
        if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth - 1, depth - 1)) <= effectEditDistance) {
          if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2)) > 0) {
            // there are extra letters in the word after the replacement
            dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2));
          }
          if (dist <= effectEditDistance) {
            candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
          }
        }
        if (isArcNotTerminal(arc, candIndex)) {
          int x = hMatrix.get(depth, depth);
          hMatrix.set(depth, depth, hMatrix.get(depth - 1, depth - 1));
          findRepl(candidates, Math.max(0, depth), fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement - 1,
              candIndex + 1);
          hMatrix.set(depth, depth, x);
        }
      }
      //replacement "any to one"
      if ((lengthReplacement = matchAnyToOne(wordIndex, candIndex)) > 0) {
        // the replacement takes place at the end of the candidate
        if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth, depth)) <= effectEditDistance) {
          if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1)) > 0) {
            // there are extra letters in the word after the replacement
            dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1));
          }
          if (dist <= effectEditDistance) {
            candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
          }
        }
        if (isArcNotTerminal(arc, candIndex)) {
          findRepl(candidates, depth, fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement, candIndex + 1);
        }
      }
      //general
      if (cuted(depth, wordIndex, candIndex) <= effectEditDistance) {
        if ((isEndOfCandidate(arc, wordIndex))
            && (dist = ed(wordLen - 1 - (wordIndex - depth), depth, wordLen - 1, candIndex)) <= effectEditDistance) {
          candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
        }
        if (isArcNotTerminal(arc, candIndex)) {
          findRepl(candidates, depth + 1, fsa.getEndNode(arc), new byte[0], wordIndex + 1, candIndex + 1);
        }
      }
    }
  }
}