com.ibm.icu.text.Normalizer Java Examples
The following examples show how to use
com.ibm.icu.text.Normalizer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ICU4Jv26TextNormalizer.java From oacc-core with Apache License 2.0 | 6 votes |
@Override public char[] normalizeToNfc(char[] source) { int destBufferSize = 3 * source.length; char[] result = null; do { char[] destBuffer = new char[destBufferSize]; try { final int destBufferUsedCount = Normalizer.normalize(source, destBuffer, Normalizer.NFC, 0); result = copyContents(destBuffer, destBufferUsedCount); } catch (IndexOutOfBoundsException e) { // NOTE: since we allocate an initial buffer that is 3x of // the source text length we never expect this to happen // try the next loop iteration with a larger buffer destBufferSize += source.length; } finally { // zero out the current dest buffer zeroOut(destBuffer); } } while (result == null); return result; }
Example #2
Source File: Compiler.java From tcl-regex-java with Apache License 2.0 | 6 votes |
@Override public int compare(CharSequence data, int start1, int start2, int length) { for (int x = 0; x < length; x++) { final int c1 = data.charAt(start1 + x); final int c2 = data.charAt(start2 + x); int thisCompare; if (caseInsensitive) { thisCompare = Normalizer.compare(c1, c2, Normalizer.COMPARE_IGNORE_CASE); } else { thisCompare = c1 - c2; } if (thisCompare != 0) { return thisCompare; } } return 0; }
Example #3
Source File: Norm2AllModes.java From fitnotifications with Apache License 2.0 | 5 votes |
@Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { int spanLengthAndMaybe=impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, false); if((spanLengthAndMaybe&1)!=0) { return Normalizer.MAYBE; } else if((spanLengthAndMaybe>>>1)==s.length()) { return Normalizer.YES; } else { return Normalizer.NO; } }
Example #4
Source File: ICUNormalizer2Filter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (normalizer.quickCheck(termAtt) != Normalizer.YES) { buffer.setLength(0); normalizer.normalize(termAtt, buffer); termAtt.setEmpty().append(buffer); } return true; } else { return false; } }
Example #5
Source File: Norm2AllModes.java From trekarta with GNU General Public License v3.0 | 5 votes |
@Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { int spanLengthAndMaybe=impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, false); if((spanLengthAndMaybe&1)!=0) { return Normalizer.MAYBE; } else if((spanLengthAndMaybe>>>1)==s.length()) { return Normalizer.YES; } else { return Normalizer.NO; } }
Example #6
Source File: NormalizationChecker.java From caja with Apache License 2.0 | 5 votes |
/** * @see nu.validator.htmlparser.common.CharacterHandler#end() */ public void end() throws SAXException { if (!alreadyComplainedAboutThisRun && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { errAboutTextRun(); } if (bufHolder != null) { // restore the original small buffer to avoid leaking // memory if this checker is recycled buf = bufHolder; bufHolder = null; } }
Example #7
Source File: ICU4Jv26TextNormalizer.java From oacc-core with Apache License 2.0 | 5 votes |
private ICU4Jv26TextNormalizer() { // this "no-op" call to the Normalize class is *very* important, without it when the // com.ibm.icu.text.Normalizer class is not present in the classpath a load of the // class will not fail until it is attempted in the normalizeToNfc() method below -- which // is too late. The class load needs to fail here to cause the getInstance() method below to // propagate the class load exception and correctly trigger the fallback to the JDK based // TextNormalizer implementation in the parent class's TextNormalizer#getInstance(). Normalizer.normalize("", Normalizer.NFC, 0); }
Example #8
Source File: ICU4Jv26TextNormalizerWorstCaseExpansionTest.java From oacc-core with Apache License 2.0 | 5 votes |
@Test public void testExpansion() throws Exception { final int expectedMaxExpansionSize = 3 * src.length(); // allocate the destination to be 3x of the source length char[] dest = new char[expectedMaxExpansionSize]; // normalize the text final int actualDestLen = Normalizer.normalize(src.toCharArray(), dest, Normalizer.NFC, 0); assertThat("Note: " + "if this test fails, then the ICU4J library in use does not maintain our bounded expansion " + "and could leak passwords; use a different library or adjust the expansion factor", actualDestLen, lessThanOrEqualTo(expectedMaxExpansionSize)); }
Example #9
Source File: IcuNormalizerFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (normalizer.quickCheck(termAtt) != Normalizer.YES) { buffer.setLength(0); normalizer.normalize(termAtt, buffer); termAtt.setEmpty().append(buffer); } return true; } else { return false; } }
Example #10
Source File: Norm2AllModes.java From fitnotifications with Apache License 2.0 | 4 votes |
@Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return Normalizer.YES; }
Example #11
Source File: Norm2AllModes.java From fitnotifications with Apache License 2.0 | 4 votes |
@Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return isNormalized(s) ? Normalizer.YES : Normalizer.NO; }
Example #12
Source File: Norm2AllModes.java From trekarta with GNU General Public License v3.0 | 4 votes |
@Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return Normalizer.YES; }
Example #13
Source File: Norm2AllModes.java From trekarta with GNU General Public License v3.0 | 4 votes |
@Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return isNormalized(s) ? Normalizer.YES : Normalizer.NO; }
Example #14
Source File: NormalizationChecker.java From caja with Apache License 2.0 | 4 votes |
/** * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int) */ public void characters(char[] ch, int start, int length) throws SAXException { if (alreadyComplainedAboutThisRun) { return; } if (atStartOfRun) { char c = ch[start]; if (pos == 1) { // there's a single high surrogate in buf if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) { err("Text run starts with a composing character."); } atStartOfRun = false; } else { if (length == 1 && UCharacter.isHighSurrogate(c)) { buf[0] = c; pos = 1; return; } else { if (UCharacter.isHighSurrogate(c)) { if (isComposingChar(UCharacter.getCodePoint(c, ch[start + 1]))) { err("Text run starts with a composing character."); } } else { if (isComposingCharOrSurrogate(c)) { err("Text run starts with a composing character."); } } atStartOfRun = false; } } } int i = start; int stop = start + length; if (pos > 0) { // there's stuff in buf while (i < stop && isComposingCharOrSurrogate(ch[i])) { i++; } appendToBuf(ch, start, i); if (i == stop) { return; } else { if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { errAboutTextRun(); } pos = 0; } } if (i < stop) { start = i; i = stop - 1; while (i > start && isComposingCharOrSurrogate(ch[i])) { i--; } if (i > start) { if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) { errAboutTextRun(); } } appendToBuf(ch, i, stop); } }
Example #15
Source File: ICU4Jv26TextNormalizerParityTest.java From oacc-core with Apache License 2.0 | 4 votes |
private char[] normalizeDirect() { // normalize using direct call to underlying normalizer final String dest = Normalizer.normalize(new String(srcCharArray), Normalizer.NFC); return dest.toCharArray(); }
Example #16
Source File: StringUtils.java From CloverETL-Engine with GNU Lesser General Public License v2.1 | 3 votes |
/** * This method replaces diacritic chars by theirs equivalence without diacritic. It works only for chars for which * decomposition is defined * * @param str * @return string in which diacritic chars are replaced by theirs equivalences without diacritic */ public static String removeDiacritic(String str) { if (str == null){ return null; } return Normalizer.decompose(str, false, 0).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); }