com.ibm.icu.text.Normalizer Java Exaples

Source File: ICU4Jv26TextNormalizer.java From oacc-core with Apache License 2.0

6 votes

@Override
public char[] normalizeToNfc(char[] source) {
   int destBufferSize = 3 * source.length;
   char[] result = null;
   do {
      char[] destBuffer = new char[destBufferSize];
      try {
         final int destBufferUsedCount = Normalizer.normalize(source, destBuffer, Normalizer.NFC, 0);
         result = copyContents(destBuffer, destBufferUsedCount);
      }
      catch (IndexOutOfBoundsException e) {
         // NOTE: since we allocate an initial buffer that is 3x of
         // the source text length we never expect this to happen

         // try the next loop iteration with a larger buffer
         destBufferSize += source.length;
      }
      finally {
         // zero out the current dest buffer
         zeroOut(destBuffer);
      }
   } while (result == null);

   return result;
}

Source File: Compiler.java From tcl-regex-java with Apache License 2.0

6 votes

@Override
public int compare(CharSequence data, int start1, int start2, int length) {
    for (int x = 0; x < length; x++) {
        final int c1 = data.charAt(start1 + x);
        final int c2 = data.charAt(start2 + x);
        int thisCompare;
        if (caseInsensitive) {
            thisCompare = Normalizer.compare(c1, c2, Normalizer.COMPARE_IGNORE_CASE);
        } else {
            thisCompare = c1 - c2;
        }
        if (thisCompare != 0) {
            return thisCompare;
        }
    }
    return 0;
}

Source File: Norm2AllModes.java From fitnotifications with Apache License 2.0

5 votes

@Override
public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
    int spanLengthAndMaybe=impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, false);
    if((spanLengthAndMaybe&1)!=0) {
        return Normalizer.MAYBE;
    } else if((spanLengthAndMaybe>>>1)==s.length()) {
        return Normalizer.YES;
    } else {
        return Normalizer.NO;
    }
}

Source File: ICUNormalizer2Filter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    if (normalizer.quickCheck(termAtt) != Normalizer.YES) {
      buffer.setLength(0);
      normalizer.normalize(termAtt, buffer);
      termAtt.setEmpty().append(buffer);
    }
    return true;
  } else {
    return false;
  }
}

Source File: Norm2AllModes.java From trekarta with GNU General Public License v3.0

5 votes

@Override
public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
    int spanLengthAndMaybe=impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, false);
    if((spanLengthAndMaybe&1)!=0) {
        return Normalizer.MAYBE;
    } else if((spanLengthAndMaybe>>>1)==s.length()) {
        return Normalizer.YES;
    } else {
        return Normalizer.NO;
    }
}

Source File: NormalizationChecker.java From caja with Apache License 2.0

5 votes

/**
 * @see nu.validator.htmlparser.common.CharacterHandler#end()
 */
public void end() throws SAXException {
    if (!alreadyComplainedAboutThisRun
            && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) {
        errAboutTextRun();
    }
    if (bufHolder != null) {
        // restore the original small buffer to avoid leaking
        // memory if this checker is recycled
        buf = bufHolder;
        bufHolder = null;
    }
}

Source File: ICU4Jv26TextNormalizer.java From oacc-core with Apache License 2.0

5 votes

private ICU4Jv26TextNormalizer() {
   // this "no-op" call to the Normalize class is *very* important, without it when the
   // com.ibm.icu.text.Normalizer class is not present in the classpath a load of the
   // class will not fail until it is attempted in the normalizeToNfc() method below -- which
   // is too late. The class load needs to fail here to cause the getInstance() method below to
   // propagate the class load exception and correctly trigger the fallback to the JDK based
   // TextNormalizer implementation in the parent class's TextNormalizer#getInstance().
   Normalizer.normalize("", Normalizer.NFC, 0);
}

Source File: ICU4Jv26TextNormalizerWorstCaseExpansionTest.java From oacc-core with Apache License 2.0

5 votes

@Test
public void testExpansion() throws Exception {
   final int expectedMaxExpansionSize = 3 * src.length();

   // allocate the destination to be 3x of the source length
   char[] dest = new char[expectedMaxExpansionSize];

   // normalize the text
   final int actualDestLen = Normalizer.normalize(src.toCharArray(), dest, Normalizer.NFC, 0);
   assertThat("Note: " +
                    "if this test fails, then the ICU4J library in use does not maintain our bounded expansion " +
                    "and could leak passwords; use a different library or adjust the expansion factor",
              actualDestLen, lessThanOrEqualTo(expectedMaxExpansionSize));
}

Source File: IcuNormalizerFilter.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

5 votes

@Override
public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
        if (normalizer.quickCheck(termAtt) != Normalizer.YES) {
            buffer.setLength(0);
            normalizer.normalize(termAtt, buffer);
            termAtt.setEmpty().append(buffer);
        }
        return true;
    } else {
        return false;
    }
}

Source File: Norm2AllModes.java From fitnotifications with Apache License 2.0

4 votes

@Override
public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return Normalizer.YES; }

Source File: Norm2AllModes.java From fitnotifications with Apache License 2.0

4 votes

@Override
public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
    return isNormalized(s) ? Normalizer.YES : Normalizer.NO;
}

Source File: Norm2AllModes.java From trekarta with GNU General Public License v3.0

4 votes

@Override
public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return Normalizer.YES; }

Source File: Norm2AllModes.java From trekarta with GNU General Public License v3.0

4 votes

@Override
public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
    return isNormalized(s) ? Normalizer.YES : Normalizer.NO;
}

Source File: NormalizationChecker.java From caja with Apache License 2.0

4 votes

/**
 * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int)
 */
public void characters(char[] ch, int start, int length)
        throws SAXException {
    if (alreadyComplainedAboutThisRun) {
        return;
    }
    if (atStartOfRun) {
        char c = ch[start];
        if (pos == 1) {
            // there's a single high surrogate in buf
            if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) {
                err("Text run starts with a composing character.");
            }
            atStartOfRun = false;
        } else {
            if (length == 1 && UCharacter.isHighSurrogate(c)) {
                buf[0] = c;
                pos = 1;
                return;
            } else {
                if (UCharacter.isHighSurrogate(c)) {
                    if (isComposingChar(UCharacter.getCodePoint(c,
                            ch[start + 1]))) {
                        err("Text run starts with a composing character.");
                    }
                } else {
                    if (isComposingCharOrSurrogate(c)) {
                        err("Text run starts with a composing character.");
                    }
                }
                atStartOfRun = false;
            }
        }
    }
    int i = start;
    int stop = start + length;
    if (pos > 0) {
        // there's stuff in buf
        while (i < stop && isComposingCharOrSurrogate(ch[i])) {
            i++;
        }
        appendToBuf(ch, start, i);
        if (i == stop) {
            return;
        } else {
            if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) {
                errAboutTextRun();
            }
            pos = 0;
        }
    }
    if (i < stop) {
        start = i;
        i = stop - 1;
        while (i > start && isComposingCharOrSurrogate(ch[i])) {
            i--;
        }
        if (i > start) {
            if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) {
                errAboutTextRun();
            }
        }
        appendToBuf(ch, i, stop);
    }
}

Source File: ICU4Jv26TextNormalizerParityTest.java From oacc-core with Apache License 2.0

4 votes

private char[] normalizeDirect() {
   // normalize using direct call to underlying normalizer
   final String dest = Normalizer.normalize(new String(srcCharArray), Normalizer.NFC);
   return dest.toCharArray();
}

Source File: StringUtils.java From CloverETL-Engine with GNU Lesser General Public License v2.1

3 votes

/**
 * This method replaces diacritic chars by theirs equivalence without diacritic. It works only for chars for which
 * decomposition is defined
 * 
 * @param str
 * @return string in which diacritic chars are replaced by theirs equivalences without diacritic
 */
public static String removeDiacritic(String str) {
	if (str == null){
		return null;
	}
	return Normalizer.decompose(str, false, 0).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}

com.ibm.icu.text.Normalizer Java Examples