org.apache.oro.text.regex.PatternMatcher Java Examples

The following examples show how to use org.apache.oro.text.regex.PatternMatcher. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RegularMatch.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src	输入字符串  src	输入字符串
 * @param regx	表达式  regx	表达式
 * @return return
 */ 
public List<List<String>> fetchs(String src, String regx){ 
	List<List<String>> list = new ArrayList<List<String>>(); 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.DEFAULT_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		while(matcher.matches(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			int groups = matchResult.groups(); 
			List<String> item = new ArrayList<String>(); 
			for(int i=0; i<=groups; i++){ 
				item.add(matchResult.group(i)); 
			} 
			list.add(item); 
		} 
	}catch(Exception e){ 
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
	} 
	return list; 
}
 
Example #2
Source File: reReplace.java    From openbd-core with GNU General Public License v3.0 6 votes vote down vote up
protected String doRereplace( String _theString, String _theRE, String _theSubstr, boolean _casesensitive, boolean _replaceAll ) throws cfmRunTimeException{
	int replaceCount = _replaceAll ? Util.SUBSTITUTE_ALL : 1; 
	PatternMatcher matcher = new Perl5Matcher();
	Pattern pattern = null;
	PatternCompiler compiler = new Perl5Compiler();
   
	try {
		if ( _casesensitive ){
			pattern = compiler.compile( _theRE, Perl5Compiler.SINGLELINE_MASK );
		}else{
			pattern = compiler.compile( _theRE, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK );
		}
		
	} catch(MalformedPatternException e){ // definitely should happen since regexp is hardcoded
		cfCatchData catchD = new cfCatchData();
		catchD.setType( "Function" );
		catchD.setMessage( "Internal Error" );
		catchD.setDetail( "Invalid regular expression ( " + _theRE + " )" );
		throw new cfmRunTimeException( catchD );
	}

	// Perform substitution and print result.
	return Util.substitute(matcher, pattern, new Perl5Substitution( processSubstr( _theSubstr ) ), _theString, replaceCount );
}
 
Example #3
Source File: Regexp.java    From scipio-erp with Apache License 2.0 6 votes vote down vote up
@Override
public void exec(Map<String, Object> inMap, Map<String, Object> results, List<Object> messages, Locale locale, ClassLoader loader) {
    Object obj = inMap.get(fieldName);
    String fieldValue = null;
    try {
        fieldValue = (String) ObjectType.simpleTypeConvert(obj, "String", null, locale);
    } catch (GeneralException e) {
        messages.add("Could not convert field value for comparison: " + e.getMessage());
        return;
    }
    if (pattern == null) {
        messages.add("Could not compile regular expression \"" + expr + "\" for validation");
        return;
    }
    PatternMatcher matcher = new Perl5Matcher();
    if (!matcher.matches(fieldValue, pattern)) {
        addMessage(messages, loader, locale);
    }
}
 
Example #4
Source File: RegxpContain.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src		输入字符串  src		输入字符串
 * @param regx		表达式  regx		表达式
 * @param idx		指定提取位置  idx		指定提取位置
 * @return return
 */ 
public List<String> fetch(String src, String regx, int idx) throws Exception{ 
	List<String> list = new ArrayList<String>(); 
	 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		 
		while(matcher.contains(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			list.add(matchResult.group(idx)); 
		} 
	}catch(Exception e){ 
		log.error("[提取异常][src:{}][reg:{}]", src, regx);
		e.printStackTrace(); 
		throw e; 
	} 
	return list; 
}
 
Example #5
Source File: RegxpContain.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src	输入字符串  src	输入字符串
 * @param regx	表达式  regx	表达式
 * @return return
 */ 
public List<List<String>> fetchs(String src, String regx){ 
	List<List<String>> list = new ArrayList<List<String>>(); 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		while(matcher.contains(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			int groups = matchResult.groups(); 
			List<String> item = new ArrayList<String>(); 
			for(int i=0; i<groups; i++){ 
				item.add(matchResult.group(i)); 
			} 
			list.add(item); 
		} 
	}catch(Exception e){
		log.error("[提取异常][src:{}][reg:{}]", src, regx);
		e.printStackTrace(); 
	} 
	return list; 
}
 
Example #6
Source File: RegularContain.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src		输入字符串  src		输入字符串
 * @param regx		表达式  regx		表达式
 * @param idx		指定提取位置  idx		指定提取位置
 * @return return
 */ 
public List<String> fetch(String src, String regx, int idx) throws Exception{ 
	List<String> list = new ArrayList<String>(); 
	 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		 
		while(matcher.contains(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			list.add(matchResult.group(idx)); 
		} 
	}catch(Exception e){
		log.error("[提取异常][src:{}][reg:{}]", src, regx);
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
		throw e; 
	} 
	return list; 
}
 
Example #7
Source File: RegularContain.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src	输入字符串  src	输入字符串
 * @param regx	表达式  regx	表达式
 * @return return
 */ 
public List<List<String>> fetchs(String src, String regx){ 
	List<List<String>> list = new ArrayList<List<String>>(); 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		while(matcher.contains(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			int groups = matchResult.groups(); 
			List<String> item = new ArrayList<String>(); 
			for(int i=0; i<groups; i++){ 
				item.add(matchResult.group(i)); 
			} 
			list.add(item); 
		} 
	}catch(Exception e){ 
		log.error("[提取异常][src:{}][reg:{}]", src, regx);
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
	} 
	return list; 
}
 
Example #8
Source File: RegularMatchPrefix.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src		输入字符串  src		输入字符串
 * @param regx		表达式  regx		表达式
 * @param idx		指定提取位置  idx		指定提取位置
 * @return return
 */ 
public List<String> fetch(String src, String regx, int idx){ 
	List<String> list = new ArrayList<String>(); 
	 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		while(matcher.matchesPrefix(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			list.add(matchResult.group(idx)); 
		} 
	}catch(Exception e){ 
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
	} 
	return list; 
}
 
Example #9
Source File: RegularMatchPrefix.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src	输入字符串  src	输入字符串
 * @param regx	表达式  regx	表达式
 * @return return
 */ 
public List<List<String>> fetchs(String src, String regx){ 
	List<List<String>> list = new ArrayList<List<String>>(); 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		while(matcher.matchesPrefix(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			int groups = matchResult.groups(); 
			List<String> item = new ArrayList<String>(); 
			for(int i=0; i<=groups; i++){ 
				item.add(matchResult.group(i)); 
			} 
			list.add(item); 
		} 
	}catch(Exception e){ 
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
	} 
	return list; 
}
 
Example #10
Source File: RegularUtil.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 字符串下标 regx在src中首次出现的位置 
 * @param src     src
 * @param regx    regx
 * @param begin   有效开始位置 
 * @return return
 */ 
public static int indexOf(String src, String regx, int begin){ 
	int idx = -1; 
	try{ 
		PatternCompiler patternCompiler = new Perl5Compiler(); 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		 
		while(matcher.contains(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			int tmp = matchResult.beginOffset(0); 
			if(tmp >= begin){//匹配位置从begin开始 
				idx = tmp; 
				break; 
			} 
		} 
	}catch(Exception e){
		log.error("[提取异常][src:{}][regx:{}]", src, regx); 
		e.printStackTrace();
	} 
	return idx; 
}
 
Example #11
Source File: RegularMatch.java    From anyline with Apache License 2.0 6 votes vote down vote up
/** 
 * 提取子串 
 * @param src		输入字符串  src		输入字符串
 * @param regx		表达式  regx		表达式
 * @param idx		指定提取位置  idx		指定提取位置
 * @return return
 */ 
public List<String> fetch(String src, String regx, int idx){ 
	List<String> list = new ArrayList<String>(); 
	 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.DEFAULT_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		PatternMatcherInput input = new PatternMatcherInput(src); 
		while(matcher.matches(input, pattern)){ 
			MatchResult matchResult = matcher.getMatch(); 
			list.add(matchResult.group(idx)); 
		} 
	}catch(Exception e){ 
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
	} 
	return list; 
}
 
Example #12
Source File: PathManager.java    From consulo with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static Collection<String> getUtilClassPath() {
  final Class<?>[] classes = {PathManager.class,            // module 'util'
          Nonnull.class,                // module 'annotations'
          SystemInfoRt.class,           // module 'util-rt'
          Document.class,               // jDOM
          THashSet.class,               // trove4j
          TypeMapper.class,             // JNA
          FileUtils.class,              // JNA (jna-platform)
          PatternMatcher.class          // OROMatcher
  };

  final Set<String> classPath = new HashSet<String>();
  for (Class<?> aClass : classes) {
    final String path = getJarPathForClass(aClass);
    if (path != null) {
      classPath.add(path);
    }
  }

  final String resourceRoot = getResourceRoot(PathManager.class, "/messages/CommonBundle.properties");  // platform-resources-en
  if (resourceRoot != null) {
    classPath.add(new File(resourceRoot).getAbsolutePath());
  }

  return Collections.unmodifiableCollection(classPath);
}
 
Example #13
Source File: RegularContain.java    From anyline with Apache License 2.0 5 votes vote down vote up
/** 
 * 配置状态 
 * @param src  src
 * @param regx  regx
 * @return return
 */ 
public boolean match(String src, String regx){ 
	boolean result = false; 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		result = matcher.contains(src, pattern); 
	}catch(Exception e){ 
		result = false;
		if(ConfigTable.isDebug() && log.isWarnEnabled()){
			e.printStackTrace();
		} 
	} 
	return result; 
}
 
Example #14
Source File: RegexpStressTest.java    From common-utils with GNU General Public License v2.0 5 votes vote down vote up
public void warmUp() {
    PatternMatcher matcher = new Perl5Matcher();
    int matchedCount = 0;

    for (Pattern pattern : patterns) {
        for (String value : data) {
            if (matcher.contains(value, pattern)) {
                matchedCount++;
            }
        }
    }

    assertEquals(RegexpStressTest.this.matchedCount, matchedCount);
}
 
Example #15
Source File: RegexpStressTest.java    From common-utils with GNU General Public License v2.0 5 votes vote down vote up
public void run(int loop) {
    PatternMatcher matcher = new Perl5Matcher();

    for (int i = 0; i < loop; i++) {
        for (Pattern pattern : patterns) {
            for (String value : data) {
                matcher.contains(value, pattern);
            }
        }
    }
}
 
Example #16
Source File: GlobRegExpMatching.java    From j2ssh-maverick with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * compiles fileNameRegExp into a regular expression and pattern matches on
 * each file's name, and returns those that match.
 * 
 * @param files
 * @param fileNameRegExp
 * 
 * @return SftpFile[] of files that match the expresion.
 */
public SftpFile[] matchFilesWithPattern(SftpFile[] files,
		String fileNameRegExp) throws SftpStatusException, SshException {
	// set up variables for regexp matching
	Pattern mpattern = null;
	PatternCompiler aGCompiler = new GlobCompiler();
	PatternMatcher aPerl5Matcher = new Perl5Matcher();
	// Attempt to compile the pattern. If the pattern is not valid,
	// throw exception
	try {
		mpattern = aGCompiler.compile(fileNameRegExp);
	} catch (MalformedPatternException e) {
		throw new SshException("Invalid regular expression:"
				+ e.getMessage(), SshException.BAD_API_USAGE);
	}

	Vector<SftpFile> matchedNames = new Vector<SftpFile>();

	for (int i = 0; i < files.length; i++) {
		if ((!files[i].getFilename().equals("."))
				&& (!files[i].getFilename().equals(".."))
				&& (!files[i].isDirectory())) {
			if (aPerl5Matcher.matches(files[i].getFilename(), mpattern)) {
				// call get for each match, passing true, so that it doesnt
				// repeat the search
				matchedNames.addElement(files[i]);
			}
		}
	}

	// return (SftpFile[]) matchedNames.toArray(new SftpFile[0]);
	SftpFile[] matchedNamesSftpFiles = new SftpFile[matchedNames.size()];
	matchedNames.copyInto(matchedNamesSftpFiles);
	return matchedNamesSftpFiles;
}
 
Example #17
Source File: GlobRegExpMatching.java    From j2ssh-maverick with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * compiles fileNameRegExp into a regular expression and pattern matches on
 * each file's name, and returns those that match.
 * 
 * @param files
 * @param fileNameRegExp
 * 
 * @return String[] of files that match the expresion.
 */
public String[] matchFileNamesWithPattern(File[] files,
		String fileNameRegExp) throws SshException {
	// set up variables for regexp matching
	Pattern mpattern = null;
	PatternCompiler aGCompiler = new GlobCompiler();
	PatternMatcher aPerl5Matcher = new Perl5Matcher();
	// Attempt to compile the pattern. If the pattern is not valid,
	// throw exception
	try {
		mpattern = aGCompiler.compile(fileNameRegExp);
	} catch (MalformedPatternException e) {
		throw new SshException("Invalid regular expression:"
				+ e.getMessage(), SshException.BAD_API_USAGE);
	}

	Vector<String> matchedNames = new Vector<String>();
	for (int i = 0; i < files.length; i++) {
		if ((!files[i].getName().equals("."))
				&& (!files[i].getName().equals(".."))) {
			if (aPerl5Matcher.matches(files[i].getName(), mpattern)) {
				// call get for each match, passing true, so that it doesnt
				// repeat the search
				matchedNames.addElement(files[i].getAbsolutePath());
			}
		}
	}

	// return (String[]) matchedNames.toArray(new String[0]);
	String[] matchedNamesStrings = new String[matchedNames.size()];
	matchedNames.copyInto(matchedNamesStrings);
	return matchedNamesStrings;
}
 
Example #18
Source File: Perl5RegExpMatching.java    From j2ssh-maverick with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * compiles fileNameRegExp into a regular expression and pattern matches on
 * each file's name, and returns those that match.
 * 
 * @param files
 * @param fileNameRegExp
 * 
 * @return SftpFile[] of files that match the expresion.
 */
public SftpFile[] matchFilesWithPattern(SftpFile[] files,
		String fileNameRegExp) throws SftpStatusException, SshException {
	// set up variables for regexp matching
	Pattern mpattern = null;
	PatternCompiler aPCompiler = new Perl5Compiler();
	PatternMatcher aPerl5Matcher = new Perl5Matcher();
	// Attempt to compile the pattern. If the pattern is not valid,
	// throw exception
	try {
		mpattern = aPCompiler.compile(fileNameRegExp);
	} catch (MalformedPatternException e) {
		throw new SshException("Invalid regular expression:"
				+ e.getMessage(), SshException.BAD_API_USAGE);
	}

	Vector<SftpFile> matchedNames = new Vector<SftpFile>();

	for (int i = 0; i < files.length; i++) {
		if ((!files[i].getFilename().equals("."))
				&& (!files[i].getFilename().equals(".."))
				&& (!files[i].isDirectory())) {
			if (aPerl5Matcher.matches(files[i].getFilename(), mpattern)) {
				// call get for each match, passing true, so that it doesnt
				// repeat the search
				matchedNames.addElement(files[i]);
			}
		}
	}

	// return (SftpFile[]) matchedNames.toArray(new SftpFile[0]);
	SftpFile[] matchedNamesSftpFiles = new SftpFile[matchedNames.size()];
	matchedNames.copyInto(matchedNamesSftpFiles);
	return matchedNamesSftpFiles;
}
 
Example #19
Source File: Perl5RegExpMatching.java    From j2ssh-maverick with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * compiles fileNameRegExp into a regular expression and pattern matches on
 * each file's name, and returns those that match.
 * 
 * @param files
 * @param fileNameRegExp
 * 
 * @return String[] of file names that match the expresion.
 */
public String[] matchFileNamesWithPattern(File[] files,
		String fileNameRegExp) throws SshException {
	// set up variables for regexp matching
	Pattern mpattern = null;
	PatternCompiler aPCompiler = new Perl5Compiler();
	PatternMatcher aPerl5Matcher = new Perl5Matcher();
	// Attempt to compile the pattern. If the pattern is not valid,
	// throw exception
	try {
		mpattern = aPCompiler.compile(fileNameRegExp);
	} catch (MalformedPatternException e) {
		throw new SshException("Invalid regular expression:"
				+ e.getMessage(), SshException.BAD_API_USAGE);
	}

	Vector<String> matchedNames = new Vector<String>();

	for (int i = 0; i < files.length; i++) {
		if ((!files[i].getName().equals("."))
				&& (!files[i].getName().equals(".."))
				&& (!files[i].isDirectory())) {
			if (aPerl5Matcher.matches(files[i].getName(), mpattern)) {
				// call get for each match, passing true, so that it doesnt
				// repeat the search
				matchedNames.addElement(files[i].getName());
			}
		}
	}

	// return (String[]) matchedNames.toArray(new String[0]);
	String[] matchedNamesStrings = new String[matchedNames.size()];
	matchedNames.copyInto(matchedNamesStrings);
	return matchedNamesStrings;
}
 
Example #20
Source File: RegularMatch.java    From anyline with Apache License 2.0 5 votes vote down vote up
/** 
 * 匹配状态 
 * @param src  src
 * @param regx  regx
 * @return return
 */ 
public boolean match(String src, String regx){ 
	boolean result = false; 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.DEFAULT_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		result = matcher.matches(src, pattern); 
	}catch(Exception e){ 
		result = false;
		log.error("[match error][src:{}][regx:{}]", src, regx);
		e.printStackTrace();
	} 
	return result; 
}
 
Example #21
Source File: string.java    From openbd-core with GNU General Public License v3.0 5 votes vote down vote up
public static boolean regexMatches(String str, String re) throws MalformedPatternException {
	PatternMatcher matcher = new Perl5Matcher();
	PatternCompiler compiler = new Perl5Compiler();
	PatternMatcherInput input = new PatternMatcherInput(str);

	Pattern pattern = compiler.compile(re, Perl5Compiler.SINGLELINE_MASK);
	return matcher.matches(input, pattern);
}
 
Example #22
Source File: string.java    From openbd-core with GNU General Public License v3.0 5 votes vote down vote up
public static String escapeHtml(String str) {
	try {
		PatternMatcher matcher = new Perl5Matcher();
		PatternCompiler compiler = new Perl5Compiler();

		Pattern pattern = compiler.compile("&(([a-z][a-zA-Z0-9]*)|(#\\d{2,6});)", Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);

		String tmp = Util.substitute(matcher, pattern, new Perl5Substitution("&amp;$1"), str, Util.SUBSTITUTE_ALL);

		return replaceChars(tmp, new char[] { '<', '>', '\"' }, new String[] { "&lt;", "&gt;", "&quot;" });
	} catch (Exception e) {
		return str;
	}// won't happen

}
 
Example #23
Source File: JavaSource.java    From yugong with GNU General Public License v2.0 5 votes vote down vote up
public String findFirst(String originalStr, String regex) {
    if (StringUtils.isBlank(originalStr) || StringUtils.isBlank(regex)) {
        return StringUtils.EMPTY;
    }

    PatternMatcher matcher = new Perl5Matcher();
    if (matcher.contains(originalStr, patterns.get(regex))) {
        return StringUtils.trimToEmpty(matcher.getMatch().group(0));
    }
    return StringUtils.EMPTY;
}
 
Example #24
Source File: UtilHttp.java    From scipio-erp with Apache License 2.0 5 votes vote down vote up
/**
 * checks, if the current request comes from a searchbot
 *
 * @param request
 * @return whether the request is from a web searchbot
 */
public static boolean checkURLforSpiders(HttpServletRequest request) {
    boolean result = false;

    String spiderRequest = (String) request.getAttribute("_REQUEST_FROM_SPIDER_");
    if (UtilValidate.isNotEmpty(spiderRequest)) {
        if ("Y".equals(spiderRequest)) {
            return true;
        }
        return false;
    }
    String initialUserAgent = request.getHeader("User-Agent") != null ? request.getHeader("User-Agent") : "";
    List<String> spiderList = StringUtil.split(UtilProperties.getPropertyValue("url", "link.remove_lsessionid.user_agent_list"), ",");

    if (UtilValidate.isNotEmpty(spiderList)) {
        for (String spiderNameElement : spiderList) {
            Pattern pattern = null;
            try {
                pattern = PatternFactory.createOrGetPerl5CompiledPattern(spiderNameElement, false);
            } catch (MalformedPatternException e) {
                Debug.logError(e, module);
            }
            PatternMatcher matcher = new Perl5Matcher();
            if (matcher.contains(initialUserAgent, pattern)) {
                request.setAttribute("_REQUEST_FROM_SPIDER_", "Y");
                result = true;
                break;
            }
        }
    }

    if (!result) {
        request.setAttribute("_REQUEST_FROM_SPIDER_", "N");
    }

    return result;
}
 
Example #25
Source File: EntityComparisonOperator.java    From scipio-erp with Apache License 2.0 5 votes vote down vote up
public static final <L,R> boolean compareLike(L lhs, R rhs) {
    PatternMatcher matcher = new Perl5Matcher();
    if (lhs == null) {
        if (rhs != null) {
            return false;
        }
    } else if (lhs instanceof String && rhs instanceof String) {
        //see if the lhs value is like the rhs value, rhs will have the pattern characters in it...
        return matcher.matches((String) lhs, makeOroPattern((String) rhs));
    }
    return true;
}
 
Example #26
Source File: RegularMatchPrefix.java    From anyline with Apache License 2.0 5 votes vote down vote up
/** 
 * 配置状态 
 * @param src  src
 * @param regx  regx
 * @return return
 */ 
public boolean match(String src, String regx){ 
	boolean result = false; 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.DEFAULT_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		result = matcher.matchesPrefix(src, pattern); 
	}catch(Exception e){ 
		result = false;
		log.error("[match error][src:{}][regx:{}]", src, regx);
		e.printStackTrace();
	} 
	return result; 
}
 
Example #27
Source File: RegxpContain.java    From anyline with Apache License 2.0 5 votes vote down vote up
/** 
 * 匹配状态 
 * @param src  src
 * @param regx 表达式
 * @return return
 */ 
public boolean match(String src, String regx){ 
	boolean result = false; 
	try{ 
		Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); 
		PatternMatcher matcher = new Perl5Matcher(); 
		result = matcher.contains(src, pattern); 
	}catch(Exception e){ 
		result = false; 
	} 
	return result; 
}
 
Example #28
Source File: reMatch.java    From openbd-core with GNU General Public License v3.0 4 votes vote down vote up
public cfData execute(cfSession _session, cfArgStructData argStruct ) throws cfmRunTimeException {
	String regexp = getNamedStringParam(argStruct, "regular", "");
	String strToSearch = getNamedStringParam(argStruct, "string", "");
	boolean bUnique = getNamedBooleanParam(argStruct, "unique",false);
	
	HashSet<String>	uniqueTrack = null;
	if ( bUnique ){
		uniqueTrack	= new HashSet<String>();
	}


	/* Setup the RegEx */
	PatternCompiler compiler = new Perl5Compiler();
	Pattern pattern;
	
	try {
		if (caseSensitiveMatch) {
			pattern = compiler.compile(regexp, Perl5Compiler.SINGLELINE_MASK);
		} else {
			pattern = compiler.compile(regexp, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
		}
	} catch (MalformedPatternException e) {
		cfCatchData catchD = new cfCatchData();
		catchD.setType("Function");
		catchD.setMessage("REMatch - invalid parameter");
		catchD.setDetail("Invalid regular expression ( " + regexp + " )");
		throw new cfmRunTimeException(catchD);
	}

	/* Perform the search */
	cfArrayData	array	= cfArrayData.createArray(1);
	PatternMatcher matcher = new Perl5Matcher();
	MatchResult result;
	PatternMatcherInput input = new PatternMatcherInput( strToSearch );
	while ( matcher.contains(input, pattern) ) {
		result = matcher.getMatch();
		
		String strResult = result.toString();
		if ( bUnique ){
			if ( !uniqueTrack.contains( strResult ) ){
				array.addElement( new cfStringData( strResult ) );
				uniqueTrack.add( strResult );
			}
		}else		
			array.addElement( new cfStringData( strResult ) );
	}
	
	return array;
}
 
Example #29
Source File: OutlinkExtractor.java    From anthelion with Apache License 2.0 4 votes vote down vote up
/**
 * Extracts <code>Outlink</code> from given plain text and adds anchor
 * to the extracted <code>Outlink</code>s
 * 
 * @param plainText the plain text from wich URLs should be extracted.
 * @param anchor    the anchor of the url
 * 
 * @return Array of <code>Outlink</code>s within found in plainText
 */
public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
  long start = System.currentTimeMillis();
  final List<Outlink> outlinks = new ArrayList<Outlink>();

  try {
    final PatternCompiler cp = new Perl5Compiler();
    final Pattern pattern = cp.compile(URL_PATTERN,
        Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
            | Perl5Compiler.MULTILINE_MASK);
    final PatternMatcher matcher = new Perl5Matcher();

    final PatternMatcherInput input = new PatternMatcherInput(plainText);

    MatchResult result;
    String url;

    //loop the matches
    while (matcher.contains(input, pattern)) {
      // if this is taking too long, stop matching
      //   (SHOULD really check cpu time used so that heavily loaded systems
      //   do not unnecessarily hit this limit.)
      if (System.currentTimeMillis() - start >= 60000L) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Time limit exceeded for getOutLinks");
        }
        break;
      }
      result = matcher.getMatch();
      url = result.group(0);
      try {
        outlinks.add(new Outlink(url, anchor));
      } catch (MalformedURLException mue) {
        LOG.warn("Invalid url: '" + url + "', skipping.");
      }
    }
  } catch (Exception ex) {
    // if the matcher fails (perhaps a malformed URL) we just log it and move on
    if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
  }

  final Outlink[] retval;

  //create array of the Outlinks
  if (outlinks != null && outlinks.size() > 0) {
    retval = outlinks.toArray(new Outlink[0]);
  } else {
    retval = new Outlink[0];
  }

  return retval;
}
 
Example #30
Source File: OutlinkExtractor.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
/**
 * Extracts <code>Outlink</code> from given plain text and adds anchor
 * to the extracted <code>Outlink</code>s
 * 
 * @param plainText the plain text from wich URLs should be extracted.
 * @param anchor    the anchor of the url
 * 
 * @return Array of <code>Outlink</code>s within found in plainText
 */
public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
  long start = System.currentTimeMillis();
  final List<Outlink> outlinks = new ArrayList<Outlink>();

  try {
    final PatternCompiler cp = new Perl5Compiler();
    final Pattern pattern = cp.compile(URL_PATTERN,
        Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
            | Perl5Compiler.MULTILINE_MASK);
    final PatternMatcher matcher = new Perl5Matcher();

    final PatternMatcherInput input = new PatternMatcherInput(plainText);

    MatchResult result;
    String url;

    //loop the matches
    while (matcher.contains(input, pattern)) {
      // if this is taking too long, stop matching
      //   (SHOULD really check cpu time used so that heavily loaded systems
      //   do not unnecessarily hit this limit.)
      if (System.currentTimeMillis() - start >= 60000L) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Time limit exceeded for getOutLinks");
        }
        break;
      }
      result = matcher.getMatch();
      url = result.group(0);
      try {
        outlinks.add(new Outlink(url, anchor));
      } catch (MalformedURLException mue) {
        LOG.warn("Invalid url: '" + url + "', skipping.");
      }
    }
  } catch (Exception ex) {
    // if the matcher fails (perhaps a malformed URL) we just log it and move on
    if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
  }

  final Outlink[] retval;

  //create array of the Outlinks
  if (outlinks != null && outlinks.size() > 0) {
    retval = outlinks.toArray(new Outlink[0]);
  } else {
    retval = new Outlink[0];
  }

  return retval;
}