sun.text.Normalizer Java Examples

The following examples show how to use sun.text.Normalizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConditionalSpecialCasing.java    From hottub with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #2
Source File: StringPrep.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
private StringBuffer normalize(StringBuffer src){
    /*
     * Option UNORM_BEFORE_PRI_29:
     *
     * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
     * requires strict adherence to Unicode 3.2 normalization,
     * including buggy composition from before fixing Public Review Issue #29.
     * Note that this results in some valid but nonsensical text to be
     * either corrupted or rejected, depending on the text.
     * See http://www.unicode.org/review/resolved-pri.html#pri29
     * See unorm.cpp and cnormtst.c
     */
    return new StringBuffer(
        Normalizer.normalize(
            src.toString(),
            java.text.Normalizer.Form.NFKC,
            Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
}
 
Example #3
Source File: ConditionalSpecialCasing.java    From jdk8u_jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #4
Source File: ConditionalSpecialCasing.java    From jdk-1.7-annotated with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "After_Soft_Dotted" condition
 *
 * Specification: The last preceding character with combining class
 * of zero before C was Soft_Dotted, and there is no intervening
 * combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterSoftDotted(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (isSoftDotted(ch)) {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #5
Source File: ConditionalSpecialCasing.java    From jdk-1.7-annotated with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "More_Above" condition
 *
 * Specification: C is followed by one or more characters of combining
 * class 230 (ABOVE) in the combining character sequence.
 *
 * Regular Expression:
 *   After C: [{cc!=0}]*[{cc==230}]
 */
private static boolean isMoreAbove(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following ABOVE combining class character
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);
        cc = Normalizer.getCombiningClass(ch);

        if (cc == COMBINING_CLASS_ABOVE) {
            return true;
        } else if (cc == 0) {
            return false;
        }
    }

    return false;
}
 
Example #6
Source File: ConditionalSpecialCasing.java    From jdk-1.7-annotated with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "After_I" condition
 *
 * Specification: The last preceding base character was an uppercase I,
 * and there is no intervening combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [I]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterI(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding base character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (ch == 'I') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #7
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "More_Above" condition
 *
 * Specification: C is followed by one or more characters of combining
 * class 230 (ABOVE) in the combining character sequence.
 *
 * Regular Expression:
 *   After C: [{cc!=0}]*[{cc==230}]
 */
private static boolean isMoreAbove(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following ABOVE combining class character
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);
        cc = Normalizer.getCombiningClass(ch);

        if (cc == COMBINING_CLASS_ABOVE) {
            return true;
        } else if (cc == 0) {
            return false;
        }
    }

    return false;
}
 
Example #8
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_Soft_Dotted" condition
 *
 * Specification: The last preceding character with combining class
 * of zero before C was Soft_Dotted, and there is no intervening
 * combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterSoftDotted(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (isSoftDotted(ch)) {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #9
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_I" condition
 *
 * Specification: The last preceding base character was an uppercase I,
 * and there is no intervening combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [I]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterI(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding base character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (ch == 'I') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #10
Source File: StringPrep.java    From jdk8u_jdk with GNU General Public License v2.0 6 votes vote down vote up
private StringBuffer normalize(StringBuffer src){
    /*
     * Option UNORM_BEFORE_PRI_29:
     *
     * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
     * requires strict adherence to Unicode 3.2 normalization,
     * including buggy composition from before fixing Public Review Issue #29.
     * Note that this results in some valid but nonsensical text to be
     * either corrupted or rejected, depending on the text.
     * See http://www.unicode.org/review/resolved-pri.html#pri29
     * See unorm.cpp and cnormtst.c
     */
    return new StringBuffer(
        Normalizer.normalize(
            src.toString(),
            java.text.Normalizer.Form.NFKC,
            Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
}
 
Example #11
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #12
Source File: StringPrep.java    From jdk8u-dev-jdk with GNU General Public License v2.0 6 votes vote down vote up
private StringBuffer normalize(StringBuffer src){
    /*
     * Option UNORM_BEFORE_PRI_29:
     *
     * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
     * requires strict adherence to Unicode 3.2 normalization,
     * including buggy composition from before fixing Public Review Issue #29.
     * Note that this results in some valid but nonsensical text to be
     * either corrupted or rejected, depending on the text.
     * See http://www.unicode.org/review/resolved-pri.html#pri29
     * See unorm.cpp and cnormtst.c
     */
    return new StringBuffer(
        Normalizer.normalize(
            src.toString(),
            java.text.Normalizer.Form.NFKC,
            Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
}
 
Example #13
Source File: ConditionalSpecialCasing.java    From openjdk-8-source with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_Soft_Dotted" condition
 *
 * Specification: The last preceding character with combining class
 * of zero before C was Soft_Dotted, and there is no intervening
 * combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterSoftDotted(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (isSoftDotted(ch)) {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #14
Source File: ConditionalSpecialCasing.java    From openjdk-8-source with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_I" condition
 *
 * Specification: The last preceding base character was an uppercase I,
 * and there is no intervening combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [I]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterI(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding base character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (ch == 'I') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #15
Source File: StringPrep.java    From hottub with GNU General Public License v2.0 6 votes vote down vote up
private StringBuffer normalize(StringBuffer src){
    /*
     * Option UNORM_BEFORE_PRI_29:
     *
     * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
     * requires strict adherence to Unicode 3.2 normalization,
     * including buggy composition from before fixing Public Review Issue #29.
     * Note that this results in some valid but nonsensical text to be
     * either corrupted or rejected, depending on the text.
     * See http://www.unicode.org/review/resolved-pri.html#pri29
     * See unorm.cpp and cnormtst.c
     */
    return new StringBuffer(
        Normalizer.normalize(
            src.toString(),
            java.text.Normalizer.Form.NFKC,
            Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
}
 
Example #16
Source File: ConditionalSpecialCasing.java    From openjdk-8-source with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #17
Source File: ConditionalSpecialCasing.java    From hottub with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "More_Above" condition
 *
 * Specification: C is followed by one or more characters of combining
 * class 230 (ABOVE) in the combining character sequence.
 *
 * Regular Expression:
 *   After C: [{cc!=0}]*[{cc==230}]
 */
private static boolean isMoreAbove(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following ABOVE combining class character
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);
        cc = Normalizer.getCombiningClass(ch);

        if (cc == COMBINING_CLASS_ABOVE) {
            return true;
        } else if (cc == 0) {
            return false;
        }
    }

    return false;
}
 
Example #18
Source File: ConditionalSpecialCasing.java    From hottub with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_Soft_Dotted" condition
 *
 * Specification: The last preceding character with combining class
 * of zero before C was Soft_Dotted, and there is no intervening
 * combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterSoftDotted(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (isSoftDotted(ch)) {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #19
Source File: ConditionalSpecialCasing.java    From hottub with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_I" condition
 *
 * Specification: The last preceding base character was an uppercase I,
 * and there is no intervening combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [I]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterI(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding base character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (ch == 'I') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #20
Source File: ConditionalSpecialCasing.java    From Java8CN with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #21
Source File: ConditionalSpecialCasing.java    From Java8CN with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "More_Above" condition
 *
 * Specification: C is followed by one or more characters of combining
 * class 230 (ABOVE) in the combining character sequence.
 *
 * Regular Expression:
 *   After C: [{cc!=0}]*[{cc==230}]
 */
private static boolean isMoreAbove(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following ABOVE combining class character
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);
        cc = Normalizer.getCombiningClass(ch);

        if (cc == COMBINING_CLASS_ABOVE) {
            return true;
        } else if (cc == 0) {
            return false;
        }
    }

    return false;
}
 
Example #22
Source File: ConditionalSpecialCasing.java    From Java8CN with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "After_Soft_Dotted" condition
 *
 * Specification: The last preceding character with combining class
 * of zero before C was Soft_Dotted, and there is no intervening
 * combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterSoftDotted(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (isSoftDotted(ch)) {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #23
Source File: ConditionalSpecialCasing.java    From Java8CN with Apache License 2.0 6 votes vote down vote up
/**
 * Implements the "After_I" condition
 *
 * Specification: The last preceding base character was an uppercase I,
 * and there is no intervening combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [I]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterI(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding base character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (ch == 'I') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #24
Source File: StringPrep.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
private StringBuffer normalize(StringBuffer src){
    /*
     * Option UNORM_BEFORE_PRI_29:
     *
     * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
     * requires strict adherence to Unicode 3.2 normalization,
     * including buggy composition from before fixing Public Review Issue #29.
     * Note that this results in some valid but nonsensical text to be
     * either corrupted or rejected, depending on the text.
     * See http://www.unicode.org/review/resolved-pri.html#pri29
     * See unorm.cpp and cnormtst.c
     */
    return new StringBuffer(
        Normalizer.normalize(
            src.toString(),
            java.text.Normalizer.Form.NFKC,
            Normalizer.UNICODE_3_2|NormalizerImpl.BEFORE_PRI_29));
}
 
Example #25
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #26
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "More_Above" condition
 *
 * Specification: C is followed by one or more characters of combining
 * class 230 (ABOVE) in the combining character sequence.
 *
 * Regular Expression:
 *   After C: [{cc!=0}]*[{cc==230}]
 */
private static boolean isMoreAbove(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following ABOVE combining class character
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);
        cc = Normalizer.getCombiningClass(ch);

        if (cc == COMBINING_CLASS_ABOVE) {
            return true;
        } else if (cc == 0) {
            return false;
        }
    }

    return false;
}
 
Example #27
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_Soft_Dotted" condition
 *
 * Specification: The last preceding character with combining class
 * of zero before C was Soft_Dotted, and there is no intervening
 * combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterSoftDotted(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (isSoftDotted(ch)) {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #28
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "After_I" condition
 *
 * Specification: The last preceding base character was an uppercase I,
 * and there is no intervening combining character class 230 (ABOVE).
 *
 * Regular Expression:
 *   Before C: [I]([{cc!=230}&{cc!=0}])*
 */
private static boolean isAfterI(String src, int index) {
    int ch;
    int cc;

    // Look for the last preceding base character
    for (int i = index; i > 0; i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);

        if (ch == 'I') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}
 
Example #29
Source File: StringPrep.java    From openjdk-jdk9 with GNU General Public License v2.0 6 votes vote down vote up
private StringBuffer normalize(StringBuffer src){
    /*
     * Option UNORM_BEFORE_PRI_29:
     *
     * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
     * requires strict adherence to Unicode 3.2 normalization,
     * including buggy composition from before fixing Public Review Issue #29.
     * Note that this results in some valid but nonsensical text to be
     * either corrupted or rejected, depending on the text.
     * See http://www.unicode.org/review/resolved-pri.html#pri29
     * See unorm.cpp and cnormtst.c
     */
    return new StringBuffer(
        Normalizer.normalize(
            src.toString(),
            java.text.Normalizer.Form.NFKC,
            Normalizer.UNICODE_3_2));
}
 
Example #30
Source File: ConditionalSpecialCasing.java    From openjdk-jdk9 with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Implements the "Before_Dot" condition
 *
 * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
 * Any sequence of characters with a combining class that is
 * neither 0 nor 230 may intervene between the current character
 * and the combining dot above.
 *
 * Regular Expression:
 *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
 */
private static boolean isBeforeDot(String src, int index) {
    int ch;
    int cc;
    int len = src.length();

    // Look for a following COMBINING DOT ABOVE
    for (int i = index + Character.charCount(src.codePointAt(index));
            i < len; i += Character.charCount(ch)) {

        ch = src.codePointAt(i);

        if (ch == '\u0307') {
            return true;
        } else {
            cc = Normalizer.getCombiningClass(ch);
            if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
                return false;
            }
        }
    }

    return false;
}