Java Code Examples for java.util.regex.Pattern#UNICODE_CHARACTER_CLASS

The following examples show how to use java.util.regex.Pattern#UNICODE_CHARACTER_CLASS . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hop   File: ReplaceString.java    License: Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
                             String patternString, boolean isUnicode ) {
  int flags = 0;
  if ( literalParsing && !wholeWord ) {
    flags |= Pattern.LITERAL;
  }
  if ( !caseSensitive ) {
    flags |= Pattern.CASE_INSENSITIVE;
  }
  if ( isUnicode ) {
    flags |= Pattern.UNICODE_CHARACTER_CLASS;
  }

  /*
   * XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
   * boundaries or ^/$ anchors explicitly in their pattern.
   */
  if ( wholeWord ) {
    if ( literalParsing ) {
      patternString = "\\Q" + patternString + "\\E";
    }
    patternString = "\\b" + patternString + "\\b";
  }

  return Pattern.compile( patternString, flags );
}
 
Example 2
Source Project: localization_nifi   File: ExtractText.java    License: Apache License 2.0 5 votes vote down vote up
int getCompileFlags(ProcessContext context) {
    int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
            | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
            | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
            | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
            | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
            | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
            | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
            | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
            | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
    return flags;
}
 
Example 3
Source Project: Elasticsearch   File: RegexMatcher.java    License: Apache License 2.0 5 votes vote down vote up
public static int parseFlags(@Nullable BytesRef flagsString) {
    int flags = 0;
    if (flagsString == null) {
        return flags;
    }
    for (char flag : flagsString.utf8ToString().toCharArray()) {
        switch (flag) {
            case 'i':
                flags = flags | Pattern.CASE_INSENSITIVE;
                break;
            case 'u':
                flags = flags | Pattern.UNICODE_CASE;
                break;
            case 'U':
                flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
                break;
            case 's':
                flags = flags | Pattern.DOTALL;
                break;
            case 'm':
                flags = flags | Pattern.MULTILINE;
                break;
            case 'x':
                flags = flags | Pattern.COMMENTS;
                break;
            case 'd':
                flags = flags | Pattern.UNIX_LINES;
                break;
            default:
                break;
        }
    }

    return flags;
}
 
Example 4
Source Project: nifi   File: ExtractText.java    License: Apache License 2.0 5 votes vote down vote up
int getCompileFlags(ProcessContext context) {
    int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
            | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
            | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
            | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
            | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
            | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
            | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
            | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
            | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
    return flags;
}
 
Example 5
Source Project: pentaho-kettle   File: ReplaceString.java    License: Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
  String patternString, boolean isUnicode ) {
  int flags = 0;
  if ( literalParsing && !wholeWord ) {
    flags |= Pattern.LITERAL;
  }
  if ( !caseSensitive ) {
    flags |= Pattern.CASE_INSENSITIVE;
  }
  if ( isUnicode ) {
    flags |= Pattern.UNICODE_CHARACTER_CLASS;
  }

  /*
   * XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
   * boundaries or ^/$ anchors explicitly in their pattern.
   */
  if ( wholeWord ) {
    if ( literalParsing ) {
      patternString = "\\Q" + patternString + "\\E";
    }
    patternString = "\\b" + patternString + "\\b";
  }

  return Pattern.compile( patternString, flags );
}
 
Example 6
Source Project: crate   File: RegexMatcher.java    License: Apache License 2.0 5 votes vote down vote up
public static int parseFlags(@Nullable String flagsString) {
    int flags = 0;
    if (flagsString == null) {
        return flags;
    }
    for (char flag : flagsString.toCharArray()) {
        switch (flag) {
            case 'i':
                flags = flags | Pattern.CASE_INSENSITIVE;
                break;
            case 'u':
                flags = flags | Pattern.UNICODE_CASE;
                break;
            case 'U':
                flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
                break;
            case 's':
                flags = flags | Pattern.DOTALL;
                break;
            case 'm':
                flags = flags | Pattern.MULTILINE;
                break;
            case 'x':
                flags = flags | Pattern.COMMENTS;
                break;
            case 'd':
                flags = flags | Pattern.UNIX_LINES;
                break;
            case ' ':
            case 'g':
                // handled in isGlobalFunction
                break;
            default:
                throw new IllegalArgumentException("The regular expression flag is unknown: " + flag);
        }
    }

    return flags;
}
 
Example 7
Source Project: jphp   File: WrapRegex.java    License: Apache License 2.0 4 votes vote down vote up
private static int convertFlags(Memory _flags) {
    int result = 0;

    if (_flags.isNumber()) {
        return _flags.toInteger();
    }

    String flags = _flags.toString();

    if (StringMemory.toLong(flags) != null) {
        return _flags.toInteger();
    }

    for (int i = 0; i < flags.length(); i++) {
        char c = flags.charAt(i);

        switch (c) {
            case 'i':
                result |= CASE_INSENSITIVE;
                break;
            case 'm':
                result |= MULTILINE;
                break;
            case 'L':
                result |= LITERAL;
                break;
            case 'd':
                result |= UNIX_LINES;
                break;
            case 'u':
                result |= UNICODE_CASE;
                break;
            case 'U':
                result |= Pattern.UNICODE_CHARACTER_CLASS;
                break;
            case 'x':
                result |= COMMENTS;
                break;
            case 's':
                result |= DOTALL;
                break;
        }
    }

    return result;
}