Java Code Examples for java.nio.charset.CharsetEncoder#onMalformedInput()

The following examples show how to use java.nio.charset.CharsetEncoder#onMalformedInput() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Slices.java    From aion with MIT License 6 votes vote down vote up
/** Returns a cached thread-local {@link CharsetEncoder} for the specified <tt>charset</tt>. */
private static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = encoders.get();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 2
Source File: CharsetUtil.java    From android-netty with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetEncoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = encoders.get();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 3
Source File: Text.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 4
Source File: Text.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 5
Source File: TracingManagedHttpClientConnectionFactory.java    From caravan with Apache License 2.0 6 votes vote down vote up
@Override
public ManagedHttpClientConnection create(final HttpRoute route, final ConnectionConfig config) {
    final ConnectionConfig cconfig = config != null ? config : ConnectionConfig.DEFAULT;
    CharsetDecoder chardecoder = null;
    CharsetEncoder charencoder = null;
    final Charset charset = cconfig.getCharset();
    final CodingErrorAction malformedInputAction = cconfig.getMalformedInputAction() != null ? cconfig.getMalformedInputAction() : CodingErrorAction.REPORT;
    final CodingErrorAction unmappableInputAction = cconfig.getUnmappableInputAction() != null ? cconfig.getUnmappableInputAction()
            : CodingErrorAction.REPORT;
    if (charset != null) {
        chardecoder = charset.newDecoder();
        chardecoder.onMalformedInput(malformedInputAction);
        chardecoder.onUnmappableCharacter(unmappableInputAction);
        charencoder = charset.newEncoder();
        charencoder.onMalformedInput(malformedInputAction);
        charencoder.onUnmappableCharacter(unmappableInputAction);
    }
    final String id = "http-outgoing-" + Long.toString(COUNTER.getAndIncrement());
    return new TracingManagedHttpClientConnection(id, cconfig.getBufferSize(), cconfig.getFragmentSizeHint(), chardecoder, charencoder,
            cconfig.getMessageConstraints(), incomingContentStrategy, outgoingContentStrategy, requestWriterFactory, responseParserFactory, logFunc);
}
 
Example 6
Source File: Text.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 7
Source File: Text.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 8
Source File: Text.java    From Canova with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array()
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
        throws CharacterCodingException {
    CharsetEncoder encoder = ENCODER_FACTORY.get();
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPLACE);
        encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    ByteBuffer bytes =
            encoder.encode(CharBuffer.wrap(string.toCharArray()));
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPORT);
        encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return bytes;
}
 
Example 9
Source File: CharsetUtil.java    From netty4.0.27Learn with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetEncoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = InternalThreadLocalMap.get().charsetEncoderCache();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 10
Source File: StandardCharsetsEncoderTest.java    From j2objc with Apache License 2.0 6 votes vote down vote up
/**
 * Generates a set of reference data from the current CharsetEncoder behavior into the
 * specified file.
 */
private static void dumpEncodings(CharsetEncoder encoder, String fileName)
        throws IOException {
    encoder.onMalformedInput(CodingErrorAction.IGNORE);
    encoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    try (BufferedWriter writer = createAsciiWriter(new FileOutputStream(fileName))) {
        writeLine(writer, "# Reference encodings for " + encoder.charset().name()
                + " generated by " + Dumper.class);
        writeLine(writer, "# Encodings are used by " + StandardCharsetsEncoderTest.class);
        writeLine(writer, "# {codepoint}:{canEncode}:{encoding bytes}");

        for (int codePoint = 0; codePoint < 0xfffd; codePoint++) {
            String codePointInfo = createCodePointInfo(encoder, codePoint);
            writeLine(writer, codePointInfo);
        }
    }
}
 
Example 11
Source File: FilenameUtils.java    From MLib with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Convert a filename from Java´s native UTF-16 to OS native character encoding.
 *
 * @param fileName The UTF-16 filename string.
 * @return Natively encoded string for the OS.
 */
private static String convertToNativeEncoding(String fileName, boolean isPath) {
    String ret = fileName;

    ret = removeIllegalCharacters(ret, isPath);

    //convert our filename to OS encoding...
    try {
        final CharsetEncoder charsetEncoder = Charset.defaultCharset().newEncoder();
        charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); // otherwise breaks on first unconvertable char
        charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
        charsetEncoder.replaceWith(new byte[]{'_'});

        final ByteBuffer buf = charsetEncoder.encode(CharBuffer.wrap(ret));
        if (buf.hasArray()) {
            ret = new String(buf.array());
        }

        //remove NUL character from conversion...
        ret = ret.replaceAll("\\u0000", "");
    } catch (CharacterCodingException e) {
        e.printStackTrace();
    }

    return ret;
}
 
Example 12
Source File: CharsetUtil.java    From simple-netty-source with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetEncoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = encoders.get();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 13
Source File: StringRecord.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
/**
 * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed
 * input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the method throws a
 * MalformedInputException.
 * 
 * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is
 *         ByteBuffer.limit()
 */
public static ByteBuffer encode(final String string, final boolean replace) throws CharacterCodingException {

	final CharsetEncoder encoder = ENCODER_FACTORY.get();
	if (replace) {
		encoder.onMalformedInput(CodingErrorAction.REPLACE);
		encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
	}
	ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
	if (replace) {
		encoder.onMalformedInput(CodingErrorAction.REPORT);
		encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
	}
	return bytes;
}
 
Example 14
Source File: FilenameUtils.java    From MLib with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Convert a filename from Java´s native UTF-16 to US-ASCII character encoding.
 *
 * @param fileName The UTF-16 filename string.
 * @return US-ASCII encoded string for the OS.
 */
private static String convertToASCIIEncoding(String fileName, boolean isPath) {
    String ret = fileName;

    ret = ret.replace("ä", "ae");
    ret = ret.replace("ö", "oe");
    ret = ret.replace("ü", "ue");
    ret = ret.replace("Ä", "Ae");
    ret = ret.replace("Ö", "Oe");
    ret = ret.replace("Ü", "Ue");
    ret = ret.replace("ß", "ss");

    // ein Versuch zu vereinfachen
    ret = cleanUnicode(ret);

    ret = removeIllegalCharacters(ret, isPath);

    //convert our filename to OS encoding...
    try {
        final CharsetEncoder charsetEncoder = Charset.forName("US-ASCII").newEncoder();
        charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); // otherwise breaks on first unconvertable char
        charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
        charsetEncoder.replaceWith(new byte[]{'_'});

        final ByteBuffer buf = charsetEncoder.encode(CharBuffer.wrap(ret));
        if (buf.hasArray()) {
            ret = new String(buf.array());
        }

        //remove NUL character from conversion...
        ret = ret.replaceAll("\\u0000", "");
    } catch (CharacterCodingException e) {
        e.printStackTrace();
    }

    return ret;
}
 
Example 15
Source File: CharsetEncoderTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public void test_replaceWith() throws Exception {
    Charset ascii = Charset.forName("US-ASCII");
    CharsetEncoder e = ascii.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    e.replaceWith("=".getBytes("US-ASCII"));
    String input = "hello\u0666world";
    String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
    assertEquals("hello=world", output);
}
 
Example 16
Source File: XmlSerializer.java    From caja with Apache License 2.0 5 votes vote down vote up
private final static Writer wrap(OutputStream out) {
    Charset charset = Charset.forName("utf-8");
    CharsetEncoder encoder = charset.newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    try {
        encoder.replaceWith("\uFFFD".getBytes("utf-8"));
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }
    return new OutputStreamWriter(out, encoder);
}
 
Example 17
Source File: FeedClientImpl.java    From vespa with Apache License 2.0 5 votes vote down vote up
@Override
public void stream(String documentId, String operationId, CharSequence documentData, Object context) {
    CharsetEncoder charsetEncoder = StandardCharsets.UTF_8.newEncoder();
    charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
    charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    Document document = new Document(documentId, operationId, documentData, context);
    operationProcessor.sendDocument(document);
}
 
Example 18
Source File: Text.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Converts the provided String to bytes using the UTF-8 encoding. If
 * <code>replace</code> is true, then malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the method throws a
 * MalformedInputException.
 *
 * @param string string to encode
 * @param replace whether to replace malformed input with substitution
 *            character
 * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is
 *         ByteBuffer.limit()
 * @throws MalformedInputException if a malformed input is used
 * @throws CharacterCodingException if the conversion failed
 */
public static ByteBuffer encode(String string, boolean replace)
        throws CharacterCodingException {
    CharsetEncoder encoder = ENCODER_FACTORY.get();
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPLACE);
        encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPORT);
        encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return bytes;
}
 
Example 19
Source File: MessageBuilderFactory.java    From firebase-android-sdk with Apache License 2.0 5 votes vote down vote up
@Override
protected CharsetEncoder initialValue() {
  Charset utf8 = Charset.forName("UTF8");
  CharsetEncoder encoder = utf8.newEncoder();
  encoder.onMalformedInput(CodingErrorAction.REPORT);
  encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  return encoder;
}
 
Example 20
Source File: ConvertCharacterSet.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();

    final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);

    final CharsetDecoder decoder = inputCharset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    decoder.replaceWith("?");

    final CharsetEncoder encoder = outputCharset.newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    encoder.replaceWith("?".getBytes(outputCharset));

    try {
        final StopWatch stopWatch = new StopWatch(true);
        flowFile = session.write(flowFile, new StreamCallback() {
            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
                        final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
                    int charsRead;
                    while ((charsRead = reader.read(charBuffer)) != -1) {
                        charBuffer.flip();
                        writer.write(charBuffer.array(), 0, charsRead);
                    }

                    writer.flush();
                }
            }
        });

        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        logger.info("successfully converted characters from {} to {} for {}",
                new Object[]{inputCharset, outputCharset, flowFile});
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        throw new ProcessException(e);
    }
}