Java Code Examples for java.util.regex.Pattern#UNICODE_CHARACTER_CLASS

The following examples show how to use java.util.regex.Pattern#UNICODE_CHARACTER_CLASS . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReplaceString.java    From hop with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
                             String patternString, boolean isUnicode ) {
  int flags = 0;
  if ( literalParsing && !wholeWord ) {
    flags |= Pattern.LITERAL;
  }
  if ( !caseSensitive ) {
    flags |= Pattern.CASE_INSENSITIVE;
  }
  if ( isUnicode ) {
    flags |= Pattern.UNICODE_CHARACTER_CLASS;
  }

  /*
   * XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
   * boundaries or ^/$ anchors explicitly in their pattern.
   */
  if ( wholeWord ) {
    if ( literalParsing ) {
      patternString = "\\Q" + patternString + "\\E";
    }
    patternString = "\\b" + patternString + "\\b";
  }

  return Pattern.compile( patternString, flags );
}
 
Example 2
Source File: ExtractText.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
int getCompileFlags(ProcessContext context) {
    int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
            | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
            | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
            | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
            | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
            | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
            | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
            | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
            | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
    return flags;
}
 
Example 3
Source File: RegexMatcher.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public static int parseFlags(@Nullable BytesRef flagsString) {
    int flags = 0;
    if (flagsString == null) {
        return flags;
    }
    for (char flag : flagsString.utf8ToString().toCharArray()) {
        switch (flag) {
            case 'i':
                flags = flags | Pattern.CASE_INSENSITIVE;
                break;
            case 'u':
                flags = flags | Pattern.UNICODE_CASE;
                break;
            case 'U':
                flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
                break;
            case 's':
                flags = flags | Pattern.DOTALL;
                break;
            case 'm':
                flags = flags | Pattern.MULTILINE;
                break;
            case 'x':
                flags = flags | Pattern.COMMENTS;
                break;
            case 'd':
                flags = flags | Pattern.UNIX_LINES;
                break;
            default:
                break;
        }
    }

    return flags;
}
 
Example 4
Source File: ExtractText.java    From nifi with Apache License 2.0 5 votes vote down vote up
int getCompileFlags(ProcessContext context) {
    int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
            | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
            | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
            | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
            | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
            | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
            | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
            | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
            | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
    return flags;
}
 
Example 5
Source File: ReplaceString.java    From pentaho-kettle with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
  String patternString, boolean isUnicode ) {
  int flags = 0;
  if ( literalParsing && !wholeWord ) {
    flags |= Pattern.LITERAL;
  }
  if ( !caseSensitive ) {
    flags |= Pattern.CASE_INSENSITIVE;
  }
  if ( isUnicode ) {
    flags |= Pattern.UNICODE_CHARACTER_CLASS;
  }

  /*
   * XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
   * boundaries or ^/$ anchors explicitly in their pattern.
   */
  if ( wholeWord ) {
    if ( literalParsing ) {
      patternString = "\\Q" + patternString + "\\E";
    }
    patternString = "\\b" + patternString + "\\b";
  }

  return Pattern.compile( patternString, flags );
}
 
Example 6
Source File: RegexMatcher.java    From crate with Apache License 2.0 5 votes vote down vote up
public static int parseFlags(@Nullable String flagsString) {
    int flags = 0;
    if (flagsString == null) {
        return flags;
    }
    for (char flag : flagsString.toCharArray()) {
        switch (flag) {
            case 'i':
                flags = flags | Pattern.CASE_INSENSITIVE;
                break;
            case 'u':
                flags = flags | Pattern.UNICODE_CASE;
                break;
            case 'U':
                flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
                break;
            case 's':
                flags = flags | Pattern.DOTALL;
                break;
            case 'm':
                flags = flags | Pattern.MULTILINE;
                break;
            case 'x':
                flags = flags | Pattern.COMMENTS;
                break;
            case 'd':
                flags = flags | Pattern.UNIX_LINES;
                break;
            case ' ':
            case 'g':
                // handled in isGlobalFunction
                break;
            default:
                throw new IllegalArgumentException("The regular expression flag is unknown: " + flag);
        }
    }

    return flags;
}
 
Example 7
Source File: WrapRegex.java    From jphp with Apache License 2.0 4 votes vote down vote up
private static int convertFlags(Memory _flags) {
    int result = 0;

    if (_flags.isNumber()) {
        return _flags.toInteger();
    }

    String flags = _flags.toString();

    if (StringMemory.toLong(flags) != null) {
        return _flags.toInteger();
    }

    for (int i = 0; i < flags.length(); i++) {
        char c = flags.charAt(i);

        switch (c) {
            case 'i':
                result |= CASE_INSENSITIVE;
                break;
            case 'm':
                result |= MULTILINE;
                break;
            case 'L':
                result |= LITERAL;
                break;
            case 'd':
                result |= UNIX_LINES;
                break;
            case 'u':
                result |= UNICODE_CASE;
                break;
            case 'U':
                result |= Pattern.UNICODE_CHARACTER_CLASS;
                break;
            case 'x':
                result |= COMMENTS;
                break;
            case 's':
                result |= DOTALL;
                break;
        }
    }

    return result;
}