jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType Java Examples

The following examples show how to use jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Lexer.java    From openjdk-8 with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) return token.type;
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #2
Source File: Lexer.java    From hottub with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) {
            return token.type;
        }
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            final int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #3
Source File: EncodingHelper.java    From openjdk-8-source with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #4
Source File: EncodingHelper.java    From openjdk-8-source with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 */
public static boolean isCodeCType(int code, int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #5
Source File: CClassNode.java    From openjdk-8-source with GNU General Public License v2.0 4 votes vote down vote up
public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
    if (Config.NON_UNICODE_SDW) {
        switch(ctype) {
        case CharacterType.D:
        case CharacterType.S:
        case CharacterType.W:
            ctype ^= CharacterType.SPECIAL_MASK;

            if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) {
                // \s in JavaScript includes unicode characters.
                break;
            }

            if (not) {
                for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                    // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                    if ((AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
                }
                addAllMultiByteRange();
            } else {
                for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                    // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                    if ((AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
                }
            }
            return;
        }
    }

    int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut);
    if (ranges != null) {
        addCTypeByRange(ctype, not, sbOut.value, ranges);
        return;
    }

    switch(ctype) {
    case CharacterType.ALPHA:
    case CharacterType.BLANK:
    case CharacterType.CNTRL:
    case CharacterType.DIGIT:
    case CharacterType.LOWER:
    case CharacterType.PUNCT:
    case CharacterType.SPACE:
    case CharacterType.UPPER:
    case CharacterType.XDIGIT:
    case CharacterType.ASCII:
    case CharacterType.ALNUM:
        if (not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
            addAllMultiByteRange();
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
        }
        break;

    case CharacterType.GRAPH:
    case CharacterType.PRINT:
        if (not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
            addAllMultiByteRange();
        }
        break;

    case CharacterType.WORD:
        if (!not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isWord(c)) bs.set(c);
            }

            addAllMultiByteRange();
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isWord(c)) bs.set(c);
            }
        }
        break;

    default:
        throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
    } // switch
}
 
Example #6
Source File: Lexer.java    From openjdk-8-source with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) return token.type;
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #7
Source File: EncodingHelper.java    From openjdk-8 with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #8
Source File: EncodingHelper.java    From openjdk-8 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 */
public static boolean isCodeCType(int code, int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #9
Source File: CClassNode.java    From openjdk-8 with GNU General Public License v2.0 4 votes vote down vote up
public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
    if (Config.NON_UNICODE_SDW) {
        switch(ctype) {
        case CharacterType.D:
        case CharacterType.S:
        case CharacterType.W:
            ctype ^= CharacterType.SPECIAL_MASK;

            if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) {
                // \s in JavaScript includes unicode characters.
                break;
            }

            if (not) {
                for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                    // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                    if ((AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
                }
                addAllMultiByteRange();
            } else {
                for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                    // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                    if ((AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
                }
            }
            return;
        }
    }

    int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut);
    if (ranges != null) {
        addCTypeByRange(ctype, not, sbOut.value, ranges);
        return;
    }

    switch(ctype) {
    case CharacterType.ALPHA:
    case CharacterType.BLANK:
    case CharacterType.CNTRL:
    case CharacterType.DIGIT:
    case CharacterType.LOWER:
    case CharacterType.PUNCT:
    case CharacterType.SPACE:
    case CharacterType.UPPER:
    case CharacterType.XDIGIT:
    case CharacterType.ASCII:
    case CharacterType.ALNUM:
        if (not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
            addAllMultiByteRange();
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
        }
        break;

    case CharacterType.GRAPH:
    case CharacterType.PRINT:
        if (not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
            addAllMultiByteRange();
        }
        break;

    case CharacterType.WORD:
        if (!not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isWord(c)) bs.set(c);
            }

            addAllMultiByteRange();
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isWord(c)) bs.set(c);
            }
        }
        break;

    default:
        throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
    } // switch
}
 
Example #10
Source File: EncodingHelper.java    From hottub with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 *
 * @param code code
 * @param ctype ctype
 *
 * @return isCodeCType
 */
public static boolean isCodeCType(final int code, final int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #11
Source File: EncodingHelper.java    From jdk8u_nashorn with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(final int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #12
Source File: EncodingHelper.java    From jdk8u_nashorn with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 *
 * @param code code
 * @param ctype ctype
 *
 * @return isCodeCType
 */
public static boolean isCodeCType(final int code, final int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #13
Source File: Lexer.java    From jdk8u_nashorn with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) {
            return token.type;
        }
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            final int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #14
Source File: EncodingHelper.java    From nashorn with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #15
Source File: EncodingHelper.java    From nashorn with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 */
public static boolean isCodeCType(int code, int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #16
Source File: CClassNode.java    From nashorn with GNU General Public License v2.0 4 votes vote down vote up
public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) {
    if (Config.NON_UNICODE_SDW) {
        switch(ctype) {
        case CharacterType.D:
        case CharacterType.S:
        case CharacterType.W:
            ctype ^= CharacterType.SPECIAL_MASK;

            if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) {
                // \s in JavaScript includes unicode characters.
                break;
            }

            if (not) {
                for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                    // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                    if ((AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c);
                }
                addAllMultiByteRange();
            } else {
                for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
                    // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
                    if ((AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c);
                }
            }
            return;
        }
    }

    int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut);
    if (ranges != null) {
        addCTypeByRange(ctype, not, sbOut.value, ranges);
        return;
    }

    switch(ctype) {
    case CharacterType.ALPHA:
    case CharacterType.BLANK:
    case CharacterType.CNTRL:
    case CharacterType.DIGIT:
    case CharacterType.LOWER:
    case CharacterType.PUNCT:
    case CharacterType.SPACE:
    case CharacterType.UPPER:
    case CharacterType.XDIGIT:
    case CharacterType.ASCII:
    case CharacterType.ALNUM:
        if (not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
            addAllMultiByteRange();
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
        }
        break;

    case CharacterType.GRAPH:
    case CharacterType.PRINT:
        if (not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c);
            }
            addAllMultiByteRange();
        }
        break;

    case CharacterType.WORD:
        if (!not) {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (EncodingHelper.isWord(c)) bs.set(c);
            }

            addAllMultiByteRange();
        } else {
            for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
                if (!EncodingHelper.isWord(c)) bs.set(c);
            }
        }
        break;

    default:
        throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
    } // switch
}
 
Example #17
Source File: Lexer.java    From nashorn with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) return token.type;
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #18
Source File: EncodingHelper.java    From TencentKona-8 with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(final int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #19
Source File: EncodingHelper.java    From hottub with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(final int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #20
Source File: Lexer.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) {
            return token.type;
        }
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            final int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #21
Source File: EncodingHelper.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 *
 * @param code code
 * @param ctype ctype
 *
 * @return isCodeCType
 */
public static boolean isCodeCType(final int code, final int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM or U+180E (see JDK-8138758)
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0
                            || code == 0xfeff || code == 0x180e;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #22
Source File: EncodingHelper.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(final int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #23
Source File: Lexer.java    From openjdk-jdk8u-backup with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) {
            return token.type;
        }
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            final int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #24
Source File: EncodingHelper.java    From openjdk-jdk8u-backup with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 *
 * @param code code
 * @param ctype ctype
 *
 * @return isCodeCType
 */
public static boolean isCodeCType(final int code, final int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #25
Source File: EncodingHelper.java    From openjdk-jdk8u-backup with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(final int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #26
Source File: Lexer.java    From openjdk-jdk8u with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) {
            return token.type;
        }
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            final int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #27
Source File: EncodingHelper.java    From openjdk-jdk8u with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 *
 * @param code code
 * @param ctype ctype
 *
 * @return isCodeCType
 */
public static boolean isCodeCType(final int code, final int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}
 
Example #28
Source File: EncodingHelper.java    From openjdk-jdk8u with GNU General Public License v2.0 4 votes vote down vote up
public static boolean isWord(final int code) {
    // letter, digit, or '_'
    return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
 
Example #29
Source File: Lexer.java    From jdk8u60 with GNU General Public License v2.0 4 votes vote down vote up
protected final TokenType fetchTokenInCC() {
    if (!left()) {
        token.type = TokenType.EOT;
        return token.type;
    }

    fetch();
    token.type = TokenType.CHAR;
    token.setC(c);
    token.escaped = false;

    if (c == ']') {
        token.type = TokenType.CC_CLOSE;
    } else if (c == '-') {
        token.type = TokenType.CC_RANGE;
    } else if (c == syntax.metaCharTable.esc) {
        if (!syntax.backSlashEscapeInCC()) {
            return token.type;
        }
        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
        }
        fetch();
        token.escaped = true;
        token.setC(c);

        switch (c) {
        case 'w':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'W':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
            break;
        case 'd':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 'D':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
            break;
        case 's':
            fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'S':
            fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
            break;
        case 'h':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
            }
            break;
        case 'H':
            if (syntax.op2EscHXDigit()) {
                fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
            }
            break;
        case 'x':
            fetchTokenInCCFor_x();
            break;
        case 'u':
            fetchTokenInCCFor_u();
            break;
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
            fetchTokenInCCFor_digit();
            break;

        default:
            unfetch();
            final int num = fetchEscapedValue();
            if (token.getC() != num) {
                token.setCode(num);
                token.type = TokenType.CODE_POINT;
            }
            break;
        } // switch

    } else if (c == '&') {
        fetchTokenInCCFor_and();
    }
    return token.type;
}
 
Example #30
Source File: EncodingHelper.java    From jdk8u60 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
 *
 * @param code code
 * @param ctype ctype
 *
 * @return isCodeCType
 */
public static boolean isCodeCType(final int code, final int ctype) {
    int type;
    switch (ctype) {
        case CharacterType.NEWLINE:
            return isNewLine(code);
        case CharacterType.ALPHA:
            return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
        case CharacterType.BLANK:
            return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
        case CharacterType.CNTRL:
            type = Character.getType(code);
            return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
        case CharacterType.DIGIT:
            return EncodingHelper.isDigit(code);
        case CharacterType.GRAPH:
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return false;
                default:
                    type = Character.getType(code);
                    return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
            }
        case CharacterType.LOWER:
            return Character.isLowerCase(code);
        case CharacterType.PRINT:
            type = Character.getType(code);
            return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
        case CharacterType.PUNCT:
            return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
        case CharacterType.SPACE:
            // ECMA 7.2 and 7.3
            switch (code) {
                case 0x09:
                case 0x0a:
                case 0x0b:
                case 0x0c:
                case 0x0d:
                    return true;
                default:
                    // true if Unicode separator or BOM
                    return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
            }
        case CharacterType.UPPER:
            return Character.isUpperCase(code);
        case CharacterType.XDIGIT:
            return EncodingHelper.isXDigit(code);
        case CharacterType.WORD:
            return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
        case CharacterType.ALNUM:
            return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
        case CharacterType.ASCII:
            return code < 0x80;
        default:
            throw new RuntimeException("illegal character type: " + ctype);
    }
}