dk.brics.automaton.RegExp Java Examples
The following examples show how to use
dk.brics.automaton.RegExp.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlamdexUtils.java From imhotep with Apache License 2.0 | 5 votes |
public static ThreadSafeBitSet cacheRegex(final String field, final String regex, final FlamdexReader reader) { final Automaton automaton = new RegExp(regex).toAutomaton(); final ThreadSafeBitSet ret = new ThreadSafeBitSet(reader.getNumDocs()); if (reader.getIntFields().contains(field)) { cacheIntFieldRegex(field, reader, automaton, ret); } else if (reader.getStringFields().contains(field)) { cacheStringFieldRegex(field, reader, automaton, ret); } else { // No exception on unknown field because fields can be added and queries can legitimately cross boundaries // where the field isn't defined. Instead, just return an empty bitset. } return ret; }
Example #2
Source File: MultiPattern.java From multiregexp with MIT License | 5 votes |
public MultiPatternAutomaton makeAutomatonWithPrefix(String prefix) { final List<Automaton> automata = new ArrayList<>(); for (final String ptn: this.patterns) { final String prefixedPattern = prefix + ptn; final Automaton automaton = new RegExp(prefixedPattern).toAutomaton(); automaton.minimize(); automata.add(automaton); } return MultiPatternAutomaton.make(automata); }
Example #3
Source File: MultiPattern.java From multiregexp with MIT License | 5 votes |
/** * Equivalent of Pattern.compile, but the result is only valid for pattern search. * The searcher will return the first occurrence of a pattern. * * This operation is costly, make sure to cache its result when performing * search with the same patterns against the different strings. * * @return A searcher object */ public MultiPatternSearcher searcher() { final MultiPatternAutomaton searcherAutomaton = makeAutomatonWithPrefix(".*"); final List<Automaton> indidivualAutomatons = new ArrayList<>(); for (final String pattern: this.patterns) { final Automaton automaton = new RegExp(pattern).toAutomaton(); automaton.minimize(); automaton.determinize(); indidivualAutomatons.add(automaton); } return new MultiPatternSearcher(searcherAutomaton, indidivualAutomatons); }
Example #4
Source File: UnicodeTest.java From multiregexp with MIT License | 5 votes |
@Test public void testAutomatonWithUnicode() { final RegExp regexp = new RegExp("([0-9]{2,4}年)?[0-9]{1,2}月[0-9]{1,2}日"); final Automaton forwardAutomaton = regexp.toAutomaton(); { final RunAutomaton runAutomaton = new RunAutomaton(forwardAutomaton); Assert.assertTrue(runAutomaton.run("1982年9月17日")); Assert.assertFalse(runAutomaton.run("1982年9月127日")); } }
Example #5
Source File: StringPattern.java From consulo with Apache License 2.0 | 5 votes |
@Nonnull public StringPattern matchesBrics(@NonNls @Nonnull final String s) { final String escaped = StringUtil.escapeToRegexp(s); if (escaped.equals(s)) { return equalTo(s); } StringBuilder sb = new StringBuilder(s.length()*5); for (int i = 0; i < s.length(); i++) { final char c = s.charAt(i); if(c == ' ') { sb.append("<whitespace>"); } else //This is really stupid and inconvenient builder - it breaks any normal pattern with uppercase if(Character.isUpperCase(c)) { sb.append('[').append(Character.toUpperCase(c)).append(Character.toLowerCase(c)).append(']'); } else { sb.append(c); } } final RegExp regExp = new RegExp(sb.toString()); final Automaton automaton = regExp.toAutomaton(new DatatypesAutomatonProvider()); final RunAutomaton runAutomaton = new RunAutomaton(automaton, true); return with(new ValuePatternCondition<String>("matchesBrics") { @Override public boolean accepts(@Nonnull String str, final ProcessingContext context) { if (!str.isEmpty() && (str.charAt(0) == '"' || str.charAt(0) == '\'')) str = str.substring(1); return runAutomaton.run(str); } @Override public Collection<String> getValues() { return Collections.singleton(s); } }); }
Example #6
Source File: AutomatonURLFilter.java From anthelion with Apache License 2.0 | 4 votes |
Rule(boolean sign, String regex) { super(sign, regex); automaton = new RunAutomaton(new RegExp(regex, RegExp.ALL).toAutomaton()); }
Example #7
Source File: ImhotepLocalSession.java From imhotep with Apache License 2.0 | 4 votes |
@Override public void regexRegroup(String field, String regex, int targetGroup, int negativeGroup, int positiveGroup) throws ImhotepOutOfMemoryException { if (getNumGroups() > 2) { throw new IllegalStateException("regexRegroup should be applied as a filter when you have only one group"); } if (targetGroup == 0) { clearZeroDocBitsets(); } docIdToGroup = GroupLookupFactory.resize(docIdToGroup, Math.max(negativeGroup, positiveGroup), memory); final FastBitSetPooler bitSetPooler = new ImhotepBitSetPooler(memory); final FastBitSet docRemapped; try { docRemapped = bitSetPooler.create(numDocs); } catch (FlamdexOutOfMemoryException e) { throw new ImhotepOutOfMemoryException(e); } try { try ( final StringTermIterator iter = flamdexReader.getStringTermIterator(field); final DocIdStream docIdStream = flamdexReader.getDocIdStream() ) { final Automaton automaton = new RegExp(regex).toAutomaton(); while (iter.next()) { final String term = iter.term(); if (automaton.run(term)) { docIdStream.reset(iter); remapPositiveDocs(docIdStream, docRemapped, targetGroup, positiveGroup); } } } remapNegativeDocs(docRemapped, targetGroup, negativeGroup); } finally { bitSetPooler.release(docRemapped.memoryUsage()); } finalizeRegroup(); }
Example #8
Source File: CompiledAutomaton.java From spork with Apache License 2.0 | 4 votes |
public CompiledAutomaton( String rhsPattern ) { RegExp regexpr = new dk.brics.automaton.RegExp(rhsPattern, RegExp.NONE); Automaton auto = regexpr.toAutomaton(); this.runauto = new RunAutomaton(auto, true); }
Example #9
Source File: AutomatonURLFilter.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
Rule(boolean sign, String regex) { super(sign, regex); automaton = new RunAutomaton(new RegExp(regex, RegExp.ALL).toAutomaton()); }