Java Code Examples for java.nio.charset.CharsetEncoder#onUnmappableCharacter()

The following examples show how to use java.nio.charset.CharsetEncoder#onUnmappableCharacter() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Slices.java    From aion with MIT License 6 votes vote down vote up
/** Returns a cached thread-local {@link CharsetEncoder} for the specified <tt>charset</tt>. */
private static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = encoders.get();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 2
Source File: Text.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 3
Source File: Text.java    From Canova with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array()
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
        throws CharacterCodingException {
    CharsetEncoder encoder = ENCODER_FACTORY.get();
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPLACE);
        encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    ByteBuffer bytes =
            encoder.encode(CharBuffer.wrap(string.toCharArray()));
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPORT);
        encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return bytes;
}
 
Example 4
Source File: CharsetUtil.java    From android-netty with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetEncoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = encoders.get();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 5
Source File: CharsetUtil.java    From simple-netty-source with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetEncoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = encoders.get();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 6
Source File: CharsetUtil.java    From netty4.0.27Learn with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetEncoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetEncoder getEncoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetEncoder> map = InternalThreadLocalMap.get().charsetEncoderCache();
    CharsetEncoder e = map.get(charset);
    if (e != null) {
        e.reset();
        e.onMalformedInput(CodingErrorAction.REPLACE);
        e.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return e;
    }

    e = charset.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, e);
    return e;
}
 
Example 7
Source File: Text.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 8
Source File: Text.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the
 * UTF-8 encoding. If <code>replace</code> is true, then
 * malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the
 * method throws a MalformedInputException.
 * @return ByteBuffer: bytes stores at ByteBuffer.array() 
 *                     and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
  throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes = 
    encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 9
Source File: TracingManagedHttpClientConnectionFactory.java    From caravan with Apache License 2.0 6 votes vote down vote up
@Override
public ManagedHttpClientConnection create(final HttpRoute route, final ConnectionConfig config) {
    final ConnectionConfig cconfig = config != null ? config : ConnectionConfig.DEFAULT;
    CharsetDecoder chardecoder = null;
    CharsetEncoder charencoder = null;
    final Charset charset = cconfig.getCharset();
    final CodingErrorAction malformedInputAction = cconfig.getMalformedInputAction() != null ? cconfig.getMalformedInputAction() : CodingErrorAction.REPORT;
    final CodingErrorAction unmappableInputAction = cconfig.getUnmappableInputAction() != null ? cconfig.getUnmappableInputAction()
            : CodingErrorAction.REPORT;
    if (charset != null) {
        chardecoder = charset.newDecoder();
        chardecoder.onMalformedInput(malformedInputAction);
        chardecoder.onUnmappableCharacter(unmappableInputAction);
        charencoder = charset.newEncoder();
        charencoder.onMalformedInput(malformedInputAction);
        charencoder.onUnmappableCharacter(unmappableInputAction);
    }
    final String id = "http-outgoing-" + Long.toString(COUNTER.getAndIncrement());
    return new TracingManagedHttpClientConnection(id, cconfig.getBufferSize(), cconfig.getFragmentSizeHint(), chardecoder, charencoder,
            cconfig.getMessageConstraints(), incomingContentStrategy, outgoingContentStrategy, requestWriterFactory, responseParserFactory, logFunc);
}
 
Example 10
Source File: IppUtil.java    From cups4j with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param str
 * @param charsetName
 * @return
 * @throws CharacterCodingException
 */
static public String getTranslatedString(String str, String charsetName) throws CharacterCodingException {
  if (charsetName == null) {
    charsetName = DEFAULT_CHARSET;
  }
  Charset charset = Charset.forName(charsetName);
  CharsetDecoder decoder = charset.newDecoder();
  CharsetEncoder encoder = charset.newEncoder();
  decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  // Convert a string to charsetName bytes in a ByteBuffer
  // The new ByteBuffer is ready to be read.
  ByteBuffer buf = encoder.encode(CharBuffer.wrap(str));
  // Convert charsetName bytes in a ByteBuffer to a character ByteBuffer
  // and then to a string. The new ByteBuffer is ready to be read.
  CharBuffer cbuf = decoder.decode(buf);
  return cbuf.toString();
}
 
Example 11
Source File: Text.java    From Bats with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed
 * input is replaced with the substitution character, which is U+FFFD. Otherwise the method throws a
 * MalformedInputException.
 *
 * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace)
    throws CharacterCodingException {
  CharsetEncoder encoder = ENCODER_FACTORY.get();
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  ByteBuffer bytes =
      encoder.encode(CharBuffer.wrap(string.toCharArray()));
  if (replace) {
    encoder.onMalformedInput(CodingErrorAction.REPORT);
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return bytes;
}
 
Example 12
Source File: FeedClientImpl.java    From vespa with Apache License 2.0 5 votes vote down vote up
@Override
public void stream(String documentId, String operationId, CharSequence documentData, Object context) {
    CharsetEncoder charsetEncoder = StandardCharsets.UTF_8.newEncoder();
    charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
    charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    Document document = new Document(documentId, operationId, documentData, context);
    operationProcessor.sendDocument(document);
}
 
Example 13
Source File: TextEditorSupport.java    From netbeans with Apache License 2.0 5 votes vote down vote up
/** Store the document in proper encoding.
 */
protected void saveFromKitToStream(StyledDocument doc, EditorKit kit, OutputStream out) throws IOException, BadLocationException {
    // not calling super.
    String enc = EncodingUtil.detectEncoding(doc);
    
    // saved form saveDocument()
    Charset cs = fileEncoding.get();
    // + fallback, if no info is available
    if (cs == null) {
        if (enc != null) {
            cs = Charset.forName(enc);
        } else {
            // fallback to the original encoding, no encoding in document istelf.
            cs = FileEncodingQuery.getEncoding(getDataObject().getPrimaryFile());
        }
    }
    if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("Saving using encoding");//, new RuntimeException (enc)); // NOI18N
    if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("!!! TextEditorSupport::saveFromKitToStream: enc = " + enc);
    if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("!!!                  ::saveFromKitToStream: after first test -> OK");

    FilterOutputStream fos = new FilterOutputStream(out) {
        @Override
        public void close() throws IOException {
            flush();
        }
    };
    CharsetEncoder encoder = cs.newEncoder();
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    Writer w = new OutputStreamWriter (fos, encoder);
    if ( Util.THIS.isLoggable() ) /* then */ Util.THIS.debug("!!!                  ::saveFromKitToStream: writer = " + w);
    try {
        kit.write(w, doc, 0, doc.getLength());
    } finally {
        w.close();
    }
}
 
Example 14
Source File: XmlSerializer.java    From caja with Apache License 2.0 5 votes vote down vote up
private final static Writer wrap(OutputStream out) {
    Charset charset = Charset.forName("utf-8");
    CharsetEncoder encoder = charset.newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    try {
        encoder.replaceWith("\uFFFD".getBytes("utf-8"));
    } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }
    return new OutputStreamWriter(out, encoder);
}
 
Example 15
Source File: StringRecord.java    From stratosphere with Apache License 2.0 5 votes vote down vote up
/**
 * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed
 * input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the method throws a
 * MalformedInputException.
 * 
 * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is
 *         ByteBuffer.limit()
 */
public static ByteBuffer encode(final String string, final boolean replace) throws CharacterCodingException {

	final CharsetEncoder encoder = ENCODER_FACTORY.get();
	if (replace) {
		encoder.onMalformedInput(CodingErrorAction.REPLACE);
		encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
	}
	ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
	if (replace) {
		encoder.onMalformedInput(CodingErrorAction.REPORT);
		encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
	}
	return bytes;
}
 
Example 16
Source File: CharsetEncoderTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public void test_replaceWith() throws Exception {
    Charset ascii = Charset.forName("US-ASCII");
    CharsetEncoder e = ascii.newEncoder();
    e.onMalformedInput(CodingErrorAction.REPLACE);
    e.onUnmappableCharacter(CodingErrorAction.REPLACE);
    e.replaceWith("=".getBytes("US-ASCII"));
    String input = "hello\u0666world";
    String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
    assertEquals("hello=world", output);
}
 
Example 17
Source File: Text.java    From pxf with Apache License 2.0 5 votes vote down vote up
/**
 * Converts the provided String to bytes using the UTF-8 encoding. If
 * <code>replace</code> is true, then malformed input is replaced with the
 * substitution character, which is U+FFFD. Otherwise the method throws a
 * MalformedInputException.
 *
 * @param string string to encode
 * @param replace whether to replace malformed input with substitution
 *            character
 * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is
 *         ByteBuffer.limit()
 * @throws MalformedInputException if a malformed input is used
 * @throws CharacterCodingException if the conversion failed
 */
public static ByteBuffer encode(String string, boolean replace)
        throws CharacterCodingException {
    CharsetEncoder encoder = ENCODER_FACTORY.get();
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPLACE);
        encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPORT);
        encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return bytes;
}
 
Example 18
Source File: FilenameUtils.java    From MLib with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Convert a filename from Java´s native UTF-16 to US-ASCII character encoding.
 *
 * @param fileName The UTF-16 filename string.
 * @return US-ASCII encoded string for the OS.
 */
private static String convertToASCIIEncoding(String fileName, boolean isPath) {
    String ret = fileName;

    ret = ret.replace("ä", "ae");
    ret = ret.replace("ö", "oe");
    ret = ret.replace("ü", "ue");
    ret = ret.replace("Ä", "Ae");
    ret = ret.replace("Ö", "Oe");
    ret = ret.replace("Ü", "Ue");
    ret = ret.replace("ß", "ss");

    // ein Versuch zu vereinfachen
    ret = cleanUnicode(ret);

    ret = removeIllegalCharacters(ret, isPath);

    //convert our filename to OS encoding...
    try {
        final CharsetEncoder charsetEncoder = Charset.forName("US-ASCII").newEncoder();
        charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); // otherwise breaks on first unconvertable char
        charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
        charsetEncoder.replaceWith(new byte[]{'_'});

        final ByteBuffer buf = charsetEncoder.encode(CharBuffer.wrap(ret));
        if (buf.hasArray()) {
            ret = new String(buf.array());
        }

        //remove NUL character from conversion...
        ret = ret.replaceAll("\\u0000", "");
    } catch (CharacterCodingException e) {
        e.printStackTrace();
    }

    return ret;
}
 
Example 19
Source File: ConvertCharacterSet.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();

    final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);

    final CharsetDecoder decoder = inputCharset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    decoder.replaceWith("?");

    final CharsetEncoder encoder = outputCharset.newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    encoder.replaceWith("?".getBytes(outputCharset));

    try {
        final StopWatch stopWatch = new StopWatch(true);
        flowFile = session.write(flowFile, new StreamCallback() {
            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
                        final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
                    int charsRead;
                    while ((charsRead = reader.read(charBuffer)) != -1) {
                        charBuffer.flip();
                        writer.write(charBuffer.array(), 0, charsRead);
                    }

                    writer.flush();
                }
            }
        });

        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        logger.info("successfully converted characters from {} to {} for {}",
                new Object[]{inputCharset, outputCharset, flowFile});
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        throw new ProcessException(e);
    }
}
 
Example 20
Source File: ConvertCharacterSet.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();

    final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);

    final CharsetDecoder decoder = inputCharset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    decoder.replaceWith("?");

    final CharsetEncoder encoder = outputCharset.newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    encoder.replaceWith("?".getBytes(outputCharset));

    try {
        final StopWatch stopWatch = new StopWatch(true);
        flowFile = session.write(flowFile, new StreamCallback() {
            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
                        final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
                    int charsRead;
                    while ((charsRead = reader.read(charBuffer)) != -1) {
                        charBuffer.flip();
                        writer.write(charBuffer.array(), 0, charsRead);
                    }

                    writer.flush();
                }
            }
        });

        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        logger.info("successfully converted characters from {} to {} for {}",
                new Object[]{inputCharset, outputCharset, flowFile});
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        throw new ProcessException(e);
    }
}