Java Code Examples for java.nio.charset.CharsetDecoder#onUnmappableCharacter()

The following examples show how to use java.nio.charset.CharsetDecoder#onUnmappableCharacter() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Text.java    From Bats with Apache License 2.0 6 votes vote down vote up
private static String decode(ByteBuffer utf8, boolean replace)
    throws CharacterCodingException {
  CharsetDecoder decoder = DECODER_FACTORY.get();
  if (replace) {
    decoder.onMalformedInput(
        java.nio.charset.CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  String str = decoder.decode(utf8).toString();
  // set decoder back to its default value: REPORT
  if (replace) {
    decoder.onMalformedInput(CodingErrorAction.REPORT);
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return str;
}
 
Example 2
Source File: Text.java    From Canova with Apache License 2.0 6 votes vote down vote up
private static String decode(ByteBuffer utf8, boolean replace)
        throws CharacterCodingException {
    CharsetDecoder decoder = DECODER_FACTORY.get();
    if (replace) {
        decoder.onMalformedInput(
                java.nio.charset.CodingErrorAction.REPLACE);
        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    String str = decoder.decode(utf8).toString();
    // set decoder back to its default value: REPORT
    if (replace) {
        decoder.onMalformedInput(CodingErrorAction.REPORT);
        decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return str;
}
 
Example 3
Source File: CharsetUtil.java    From netty4.0.27Learn with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = InternalThreadLocalMap.get().charsetDecoderCache();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}
 
Example 4
Source File: Text.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static String decode(ByteBuffer utf8, boolean replace) 
  throws CharacterCodingException {
  CharsetDecoder decoder = DECODER_FACTORY.get();
  if (replace) {
    decoder.onMalformedInput(
        java.nio.charset.CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  }
  String str = decoder.decode(utf8).toString();
  // set decoder back to its default value: REPORT
  if (replace) {
    decoder.onMalformedInput(CodingErrorAction.REPORT);
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  }
  return str;
}
 
Example 5
Source File: NetStringUtil.java    From 365browser with Apache License 2.0 6 votes vote down vote up
/**
 * Convert text in a given character set to a Unicode string.  Any invalid
 * characters are replaced with U+FFFD.  Returns null if the character set
 * is not recognized.
 * @param text ByteBuffer containing the character array to convert.
 * @param charsetName Character set it's in encoded in.
 * @return: Unicode string on success, null on failure.
 */
@CalledByNative
private static String convertToUnicodeWithSubstitutions(
        ByteBuffer text,
        String charsetName) {
    try {
        Charset charset = Charset.forName(charsetName);

        // TODO(mmenke):  Investigate if Charset.decode() can be used
        // instead.  The question is whether it uses the proper replace
        // character.  JDK CharsetDecoder docs say U+FFFD is the default,
        // but Charset.decode() docs say it uses the "charset's default
        // replacement byte array".
        CharsetDecoder decoder = charset.newDecoder();
        decoder.onMalformedInput(CodingErrorAction.REPLACE);
        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
        decoder.replaceWith("\uFFFD");
        return decoder.decode(text).toString();
    } catch (Exception e) {
        return null;
    }
}
 
Example 6
Source File: Slices.java    From aion with MIT License 6 votes vote down vote up
/** Returns a cached thread-local {@link CharsetDecoder} for the specified <tt>charset</tt>. */
private static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = decoders.get();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}
 
Example 7
Source File: MessageBuilderFactory.java    From firebase-android-sdk with Apache License 2.0 5 votes vote down vote up
@Override
protected CharsetDecoder initialValue() {
  Charset utf8 = Charset.forName("UTF8");
  CharsetDecoder decoder = utf8.newDecoder();
  decoder.onMalformedInput(CodingErrorAction.REPORT);
  decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  return decoder;
}
 
Example 8
Source File: Text.java    From pxf with Apache License 2.0 5 votes vote down vote up
private static String decode(ByteBuffer utf8, boolean replace)
        throws CharacterCodingException {
    CharsetDecoder decoder = DECODER_FACTORY.get();
    if (replace) {
        decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE);
        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    String str = decoder.decode(utf8).toString();
    // set decoder back to its default value: REPORT
    if (replace) {
        decoder.onMalformedInput(CodingErrorAction.REPORT);
        decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return str;
}
 
Example 9
Source File: CharsetDecoderTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public void test_replaceWith() throws Exception {
    CharsetDecoder d = Charset.forName("UTF-16").newDecoder();
    d.replaceWith("x");
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    ByteBuffer in = ByteBuffer.wrap(new byte[] { 109, 97, 109 });
    assertEquals("\u6d61x", d.decode(in).toString());
}
 
Example 10
Source File: Charsetfunctions.java    From alipay-sdk-java-all with Apache License 2.0 5 votes vote down vote up
public static String stringCharset(ByteBuffer bytes, String charset) throws InvalidDataException {
    CharsetDecoder decode = Charset.forName(charset).newDecoder();
    decode.onMalformedInput(codingErrorAction);
    decode.onUnmappableCharacter(codingErrorAction);
    String s;
    try {
        bytes.mark();
        s = decode.decode(bytes).toString();
        bytes.reset();
    } catch (CharacterCodingException e) {
        throw new InvalidDataException(CloseFrame.NO_UTF8, e);
    }
    return s;
}
 
Example 11
Source File: Charsetfunctions.java    From Slyther with MIT License 5 votes vote down vote up
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException {
	CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder();
	decode.onMalformedInput( codingErrorAction );
	decode.onUnmappableCharacter( codingErrorAction );
	// decode.replaceWith( "X" );
	String s;
	try {
		bytes.mark();
		s = decode.decode( bytes ).toString();
		bytes.reset();
	} catch ( CharacterCodingException e ) {
		throw new InvalidDataException( CloseFrame.NO_UTF8, e );
	}
	return s;
}
 
Example 12
Source File: Charsetfunctions.java    From RipplePower with Apache License 2.0 5 votes vote down vote up
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException {
	CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder();
	decode.onMalformedInput( codingErrorAction );
	decode.onUnmappableCharacter( codingErrorAction );
	// decode.replaceWith( "X" );
	String s;
	try {
		bytes.mark();
		s = decode.decode( bytes ).toString();
		bytes.reset();
	} catch ( CharacterCodingException e ) {
		throw new InvalidDataException( CloseFrame.NO_UTF8, e );
	}
	return s;
}
 
Example 13
Source File: AuthenticationServlet.java    From incubator-retired-wave with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private LoginContext login(BufferedReader body) throws IOException, LoginException {
  try {
    Subject subject = new Subject();

    String parametersLine = body.readLine();
    // Throws UnsupportedEncodingException.
    byte[] utf8Bytes = parametersLine.getBytes("UTF-8");

    CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder();
    utf8Decoder.onMalformedInput(CodingErrorAction.IGNORE);
    utf8Decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    // Throws CharacterCodingException.
    CharBuffer parsed = utf8Decoder.decode(ByteBuffer.wrap(utf8Bytes));
    parametersLine = parsed.toString();

    MultiMap<String> parameters = new UrlEncoded(parametersLine);
    CallbackHandler callbackHandler = new HttpRequestBasedCallbackHandler(parameters);

    LoginContext context = new LoginContext("Wave", subject, callbackHandler, configuration);

    // If authentication fails, login() will throw a LoginException.
    context.login();
    return context;
  } catch (CharacterCodingException cce) {
    throw new LoginException("Character coding exception (not utf-8): "
        + cce.getLocalizedMessage());
  } catch (UnsupportedEncodingException uee) {
    throw new LoginException("ad character encoding specification: " + uee.getLocalizedMessage());
  }
}
 
Example 14
Source File: Charsetfunctions.java    From clevertap-android-sdk with MIT License 5 votes vote down vote up
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException {
	CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder();
	decode.onMalformedInput( codingErrorAction );
	decode.onUnmappableCharacter( codingErrorAction );
	String s;
	try {
		bytes.mark();
		s = decode.decode( bytes ).toString();
		bytes.reset();
	} catch ( CharacterCodingException e ) {
		throw new InvalidDataException( CloseFrame.NO_UTF8, e );
	}
	return s;
}
 
Example 15
Source File: CharsetDecoder2Test.java    From j2objc with Apache License 2.0 5 votes vote down vote up
/**
	 * @tests java.nio.charset.CharsetDecoder#decode(java.nio.ByteBuffer)
	 */
	public void test_decode() throws CharacterCodingException {
		// Regression for HARMONY-33
//		ByteBuffer bb = ByteBuffer.allocate(1);
//		bb.put(0, (byte) 77);
//		CharsetDecoder decoder = Charset.forName("UTF-16").newDecoder();
//		decoder.onMalformedInput(CodingErrorAction.REPLACE);
//		decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
//		decoder.decode(bb);

		// Regression for HARMONY-67
//		byte[] b = new byte[] { (byte) 1 };
//		ByteBuffer buf = ByteBuffer.wrap(b);
//		CharBuffer charbuf = Charset.forName("UTF-16").decode(buf);
//		assertEquals("Assert 0: charset UTF-16", 1, charbuf.length());
//
//		charbuf = Charset.forName("UTF-16BE").decode(buf);
//		assertEquals("Assert 1: charset UTF-16BE", 0, charbuf.length());
//
//		charbuf = Charset.forName("UTF-16LE").decode(buf);
//		assertEquals("Assert 2: charset UTF16LE", 0, charbuf.length());

		// Regression for HARMONY-99
		CharsetDecoder decoder2 = Charset.forName("UTF-16").newDecoder();
		decoder2.onMalformedInput(CodingErrorAction.REPORT);
		decoder2.onUnmappableCharacter(CodingErrorAction.REPORT);
		ByteBuffer in = ByteBuffer.wrap(new byte[] { 109, 97, 109 });
		try {
			decoder2.decode(in);
			fail("Assert 3: MalformedInputException should have thrown");
		} catch (MalformedInputException e) {
			//expected
		}
	}
 
Example 16
Source File: SimulatedContext.java    From xyz-hub with Apache License 2.0 5 votes vote down vote up
@Override
public void log(byte[] bytes) {
  try {
    final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPORT);
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    final ByteBuffer in = ByteBuffer.wrap(bytes);
    log(decoder.decode(in).toString());
  } catch (CharacterCodingException ignored) {
  }
}
 
Example 17
Source File: JsonReader.java    From jsondb-core with MIT License 5 votes vote down vote up
public JsonReader(JsonDBConfig dbConfig, File collectionFile) throws IOException {
  this.collectionFile = collectionFile;
  this.lockFilesLocation = new File(collectionFile.getParentFile(), "lock");
  this.fileLockLocation = new File(lockFilesLocation, collectionFile.getName() + ".lock");
  
  if(!lockFilesLocation.exists()) {
    lockFilesLocation.mkdirs();
  }
  if(!fileLockLocation.exists()) {
    fileLockLocation.createNewFile();
  }

  CharsetDecoder decoder = dbConfig.getCharset().newDecoder();
  decoder.onMalformedInput(CodingErrorAction.REPORT);
  decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
  
  raf = new RandomAccessFile(fileLockLocation, "rw");
  channel = raf.getChannel();
  try {
    lock = channel.lock();
  } catch (IOException | OverlappingFileLockException e) {
    try {
      channel.close();
      raf.close();
    } catch (IOException e1) {
      logger.error("Failed while closing RandomAccessFile for collection file {}", collectionFile.getName());
    }
    throw new JsonFileLockException("JsonReader failed to obtain a file lock for file " + fileLockLocation, e);
  }

  fis = new FileInputStream(collectionFile);
  isr = new InputStreamReader(fis, decoder);
  reader = new BufferedReader(isr);
}
 
Example 18
Source File: MultiLineMappedMatcherSmall.java    From netbeans with Apache License 2.0 4 votes vote down vote up
@Override
protected Def checkMeasuredInternal(FileObject fo,
        SearchListener listener) {

    MappedByteBuffer bb = null;
    FileChannel fc = null;
    try {

        listener.fileContentMatchingStarted(fo.getPath());
        File file = FileUtil.toFile(fo);

        // Open the file and then get a channel from the stream
        FileInputStream fis = new FileInputStream(file);
        fc = fis.getChannel();

        // Get the file's size and then map it into memory
        int sz = (int) fc.size();
        bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);

        //  if (asciiPattern && !matchesIgnoringEncoding(bb)) {
        //    return null;
        //}

        // Decode the file into a char buffer
        Charset charset = FileEncodingQuery.getEncoding(fo);
        CharsetDecoder decoder = prepareDecoder(charset);
        decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
        CharBuffer cb = decoder.decode(bb);

        List<TextDetail> textDetails = matchWholeFile(cb, fo);

        if (textDetails == null) {
            return null;
        } else {
            Def def = new Def(fo, decoder.charset(), textDetails);
            return def;
        }
    } catch (Exception e) {
        listener.generalError(e);
        return null;
    } finally {
        if (fc != null) {
            try {
                fc.close();
            } catch (IOException ex) {
                listener.generalError(ex);
            }
        }
        MatcherUtils.unmap(bb);
    }
}
 
Example 19
Source File: ConvertCharacterSet.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }

    final ComponentLog logger = getLogger();

    final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);

    final CharsetDecoder decoder = inputCharset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    decoder.replaceWith("?");

    final CharsetEncoder encoder = outputCharset.newEncoder();
    encoder.onMalformedInput(CodingErrorAction.REPLACE);
    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    encoder.replaceWith("?".getBytes(outputCharset));

    try {
        final StopWatch stopWatch = new StopWatch(true);
        flowFile = session.write(flowFile, new StreamCallback() {
            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
                        final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
                    int charsRead;
                    while ((charsRead = reader.read(charBuffer)) != -1) {
                        charBuffer.flip();
                        writer.write(charBuffer.array(), 0, charsRead);
                    }

                    writer.flush();
                }
            }
        });

        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        logger.info("successfully converted characters from {} to {} for {}",
                new Object[]{inputCharset, outputCharset, flowFile});
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final Exception e) {
        throw new ProcessException(e);
    }
}
 
Example 20
Source File: BufferedPositionedInputStream.java    From spork with Apache License 2.0 4 votes vote down vote up
public String readLine(Charset charset, byte delimiter) throws IOException {
    CharsetDecoder decoder = charset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPLACE);
    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    int delim = delimiter&0xff;
    int rc;
    int offset = 0;
    StringBuilder sb = null;
    CoderResult res;
    while ((rc = read())!=-1) {
        if (rc == delim) {
            break;
        }
        barray[offset++] = (byte)rc;
        if (barray.length == offset) {
            bbuff.position(0);
            bbuff.limit(barray.length);
            cbuff.position(0);
            cbuff.limit(carray.length);
            res = decoder.decode(bbuff, cbuff, false);
            if (res.isError()) {
                throw new IOException("Decoding error: " + res.toString());
            }
            offset = bbuff.remaining();
            switch (offset) {
            default:
                System.arraycopy(barray, bbuff.position(), barray, 0, bbuff
                        .remaining());
                break;
            case 2:
                barray[1] = barray[barray.length - 1];
                barray[0] = barray[barray.length - 2];
                break;
            case 1:
                barray[0] = barray[barray.length - 1];
                break;
            case 0:
            }
            if (sb == null) {
                sb = new StringBuilder(cbuff.position());
            }
            sb.append(carray, 0, cbuff.position());
        }
    }
    if (sb == null) {
        if (rc == -1 && offset == 0) {
            // We are at EOF with nothing read
            return null;
        }
        sb = new StringBuilder();
    }
    bbuff.position(0);
    bbuff.limit(offset);
    cbuff.position(0);
    cbuff.limit(carray.length);
    res = decoder.decode(bbuff, cbuff, true);
    if (res.isError()) {
        System.out.println("Error");
    }
    sb.append(carray, 0, cbuff.position());
    cbuff.position(0);
    res = decoder.flush(cbuff);
    if (res.isError()) {
        System.out.println("Error");
    }
    sb.append(carray, 0, cbuff.position());
    return sb.toString();
}