dk.brics.automaton.Automaton Java Examples

The following examples show how to use dk.brics.automaton.Automaton. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlamdexUtils.java    From imhotep with Apache License 2.0 5 votes vote down vote up
public static ThreadSafeBitSet cacheRegex(final String field, final String regex, final FlamdexReader reader) {
    final Automaton automaton = new RegExp(regex).toAutomaton();
    final ThreadSafeBitSet ret = new ThreadSafeBitSet(reader.getNumDocs());
    if (reader.getIntFields().contains(field)) {
        cacheIntFieldRegex(field, reader, automaton, ret);
    } else if (reader.getStringFields().contains(field)) {
        cacheStringFieldRegex(field, reader, automaton, ret);
    } else {
        // No exception on unknown field because fields can be added and queries can legitimately cross boundaries
        // where the field isn't defined. Instead, just return an empty bitset.
    }
    return ret;
}
 
Example #2
Source File: StringPattern.java    From consulo with Apache License 2.0 5 votes vote down vote up
@Nonnull
public StringPattern matchesBrics(@NonNls @Nonnull final String s) {
  final String escaped = StringUtil.escapeToRegexp(s);
  if (escaped.equals(s)) {
    return equalTo(s);
  }

  StringBuilder sb = new StringBuilder(s.length()*5);
  for (int i = 0; i < s.length(); i++) {
    final char c = s.charAt(i);
    if(c == ' ') {
      sb.append("<whitespace>");
    }
    else
    //This is really stupid and inconvenient builder - it breaks any normal pattern with uppercase
    if(Character.isUpperCase(c)) {
      sb.append('[').append(Character.toUpperCase(c)).append(Character.toLowerCase(c)).append(']');
    }
    else
    {
      sb.append(c);
    }
  }
  final RegExp regExp = new RegExp(sb.toString());
  final Automaton automaton = regExp.toAutomaton(new DatatypesAutomatonProvider());
  final RunAutomaton runAutomaton = new RunAutomaton(automaton, true);

  return with(new ValuePatternCondition<String>("matchesBrics") {
    @Override
    public boolean accepts(@Nonnull String str, final ProcessingContext context) {
      if (!str.isEmpty() && (str.charAt(0) == '"' || str.charAt(0) == '\'')) str = str.substring(1);
      return runAutomaton.run(str);
    }

    @Override
    public Collection<String> getValues() {
      return Collections.singleton(s);
    }
  });
}
 
Example #3
Source File: UnicodeTest.java    From multiregexp with MIT License 5 votes vote down vote up
@Test
public void testAutomatonWithUnicode() {
    final RegExp regexp = new RegExp("([0-9]{2,4}年)?[0-9]{1,2}月[0-9]{1,2}日");
    final Automaton forwardAutomaton = regexp.toAutomaton();
    {
        final RunAutomaton runAutomaton = new RunAutomaton(forwardAutomaton);
        Assert.assertTrue(runAutomaton.run("1982年9月17日"));
        Assert.assertFalse(runAutomaton.run("1982年9月127日"));
    }
}
 
Example #4
Source File: MultiPatternAutomaton.java    From multiregexp with MIT License 5 votes vote down vote up
static MultiState initialState(List<Automaton> automata) {
    final State[] initialStates = new State[automata.size()];
    int c = 0;
    for (final Automaton automaton: automata) {
        initialStates[c] = automaton.getInitialState();
        c += 1;
    }
    return new MultiState(initialStates);
}
 
Example #5
Source File: MultiPattern.java    From multiregexp with MIT License 5 votes vote down vote up
/**
 * Equivalent of Pattern.compile, but the result is only valid for pattern search.
 * The searcher will return the first occurrence of a pattern.
 *
 * This operation is costly, make sure to cache its result when performing
 * search with the same patterns against the different strings.
 *
 * @return A searcher object
 */
public MultiPatternSearcher searcher() {
    final MultiPatternAutomaton searcherAutomaton = makeAutomatonWithPrefix(".*");
    final List<Automaton> indidivualAutomatons = new ArrayList<>();
    for (final String pattern: this.patterns) {
        final Automaton automaton = new RegExp(pattern).toAutomaton();
        automaton.minimize();
        automaton.determinize();
        indidivualAutomatons.add(automaton);
    }
    return new MultiPatternSearcher(searcherAutomaton, indidivualAutomatons);
}
 
Example #6
Source File: MultiPattern.java    From multiregexp with MIT License 5 votes vote down vote up
public MultiPatternAutomaton makeAutomatonWithPrefix(String prefix) {
    final List<Automaton> automata = new ArrayList<>();
    for (final String ptn: this.patterns) {
        final String prefixedPattern = prefix + ptn;
        final Automaton automaton = new RegExp(prefixedPattern).toAutomaton();
        automaton.minimize();
        automata.add(automaton);
    }
    return MultiPatternAutomaton.make(automata);
}
 
Example #7
Source File: FlamdexUtils.java    From imhotep with Apache License 2.0 5 votes vote down vote up
private static void cacheStringFieldRegex(String field, FlamdexReader reader, Automaton automaton, ThreadSafeBitSet ret) {
    try (final StringTermIterator iter = reader.getStringTermIterator(field);
         final DocIdStream dis = reader.getDocIdStream()) {
        while (iter.next()) {
            if (automaton.run(iter.term())) {
                dis.reset(iter);
                fillBitSet(dis, ret);
            }
        }
    }
}
 
Example #8
Source File: FlamdexUtils.java    From imhotep with Apache License 2.0 5 votes vote down vote up
private static void cacheIntFieldRegex(String field, FlamdexReader reader, Automaton automaton, ThreadSafeBitSet ret) {
    try (final IntTermIterator iter = reader.getIntTermIterator(field);
         final DocIdStream dis = reader.getDocIdStream()) {
        while (iter.next()) {
            if (automaton.run(String.valueOf(iter.term()))) {
                dis.reset(iter);
                fillBitSet(dis, ret);
            }
        }
    }
}
 
Example #9
Source File: EntropyPrecisionRecallMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
protected Pair<Double, Double> computeMeasureValue() {
	System.out.println();
	System.out.println("===================Calculating precision and recall=============================");
	System.out.println();
	
	if ((relevantTraces instanceof Automaton) && (retrievedTraces instanceof Automaton)) {
		Pair<Double, Double> values = MetricsCalculator.calculate((Automaton)relevantTraces, "REL", (Automaton)retrievedTraces, "RET", false, false, skipsRel, skipsRet);
		return values;
	}
	return new Pair<Double, Double>(0.0, 0.0);
}
 
Example #10
Source File: PartialEfficientEntropyMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
protected double computeMeasureValue() {
	System.out.println();
	System.out.println("===================Calculating partial entropy efficiently=============================");
	System.out.println();
	
	if (model instanceof Automaton) {
		double value = MetricsCalculator.calculateEntropy((Automaton)model, "model", true, true, 0);
		return value;
	}
	return 0.0;
}
 
Example #11
Source File: PartialEntropyMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
protected double computeMeasureValue() {
	System.out.println();
	System.out.println("===================Calculating partial entropy=============================");
	System.out.println();
	
	if (model instanceof Automaton) {
		double value = MetricsCalculator.calculateEntropy((Automaton)model, "model", true, false, 0);
		return value;
	}
	return 0.0;
}
 
Example #12
Source File: AbstractEntropyMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Compute value of this measure.
 * 
 * @return Value of entropy for the given model. 
 * @throws Exception if limitations of this measure are not satisfied by the given models.
 */
public double computeMeasure() throws Exception {
        
	if (limitationsHold==null) this.checkLimitations();
	if (limitationsHold==null || !limitationsHold.booleanValue()) {
		throw new Exception(String.format("Limitation(s): %s of %s measure are not fulfilled", violetedLimitations, this.getClass().getName()));
	}

	HashingStrategy<String> strategy = new HashingStrategy<String>() {

		public int computeHashCode(String object) {
			return object.hashCode();
		}

		public boolean equals(String o1, String o2) {
			return o1.equals(o2);
		}
	};
	TObjectShortMap<String> activity2short = new TObjectShortCustomHashMap<String>(strategy, 10, 0.5f, (short) -1);
    
	System.out.println(String.format("Constructing automaton for retrieved model"));
	long start = System.currentTimeMillis();
	if (model instanceof NetSystem) {
		model = Utils.constructAutomatonFromNetSystem((NetSystem) model, activity2short);
	} else if (model instanceof XLog){
		model = Utils.constructAutomatonFromLog((XLog) model, activity2short);
	}
    long finish = System.currentTimeMillis();
    System.out.println(String.format("The automaton for model constructed in                       %s ms.", (finish-start)));
    System.out.println(String.format("The number of states:                                        %s", ((Automaton)model).getNumberOfStates()));
    System.out.println(String.format("The number of transitions:                                   %s", Utils.numberOfTransitions((Automaton)model)));
    	    
    start = System.nanoTime();
	this.measureValue = this.computeMeasureValue();
	this.measureComputationTime = System.nanoTime()-start;

	return this.measureValue;
}
 
Example #13
Source File: EntropyMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
protected double computeMeasureValue() {
	System.out.println();
	System.out.println("===================Calculating entropy=============================");
	System.out.println();
	
	if (model instanceof Automaton) {
		double value = MetricsCalculator.calculateEntropy((Automaton)model, "model", false, false, numberOfSkips);
		return value;
	}
	return 0.0;
}
 
Example #14
Source File: PartialEntropyPrecisionRecallMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
protected Pair<Double, Double> computeMeasureValue() {
	
	if ((relevantTraces instanceof Automaton) && (retrievedTraces instanceof Automaton)) {
		Pair<Double, Double> values = MetricsCalculator.calculate((Automaton)relevantTraces, "REL", (Automaton)retrievedTraces, "RET", true, false, 0, 0);
		return values;
	}
	return new Pair<Double, Double>(0.0, 0.0);
}
 
Example #15
Source File: PartialEfficientEntropyPrecisionRecallMeasure.java    From codebase with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
protected Pair<Double, Double> computeMeasureValue() {
	
	if ((relevantTraces instanceof Automaton) && (retrievedTraces instanceof Automaton)) {
		Pair<Double, Double> values = MetricsCalculator.calculate((Automaton)relevantTraces, "REL", (Automaton)retrievedTraces, "RET", true, true, 0, 0);
		return values;
	}
	return new Pair<Double, Double>(0.0, 0.0);
}
 
Example #16
Source File: ImhotepLocalSession.java    From imhotep with Apache License 2.0 4 votes vote down vote up
@Override
public void regexRegroup(String field, String regex, int targetGroup, int negativeGroup, int positiveGroup) throws ImhotepOutOfMemoryException {
    if (getNumGroups() > 2) {
        throw new IllegalStateException("regexRegroup should be applied as a filter when you have only one group");
    }
    if (targetGroup == 0) {
        clearZeroDocBitsets();
    }
    docIdToGroup =
            GroupLookupFactory.resize(docIdToGroup,
                    Math.max(negativeGroup, positiveGroup),
                    memory);

    final FastBitSetPooler bitSetPooler = new ImhotepBitSetPooler(memory);
    final FastBitSet docRemapped;
    try {
        docRemapped = bitSetPooler.create(numDocs);
    } catch (FlamdexOutOfMemoryException e) {
        throw new ImhotepOutOfMemoryException(e);
    }
    try {
        try (
            final StringTermIterator iter = flamdexReader.getStringTermIterator(field);
            final DocIdStream docIdStream = flamdexReader.getDocIdStream()
        ) {
            final Automaton automaton = new RegExp(regex).toAutomaton();

            while (iter.next()) {
                final String term = iter.term();

                if (automaton.run(term)) {
                    docIdStream.reset(iter);
                    remapPositiveDocs(docIdStream, docRemapped, targetGroup, positiveGroup);
                }
            }
        }
        remapNegativeDocs(docRemapped, targetGroup, negativeGroup);
    } finally {
        bitSetPooler.release(docRemapped.memoryUsage());
    }

    finalizeRegroup();
}
 
Example #17
Source File: CompiledAutomaton.java    From spork with Apache License 2.0 4 votes vote down vote up
public CompiledAutomaton( String rhsPattern ) {
    RegExp regexpr = new dk.brics.automaton.RegExp(rhsPattern, RegExp.NONE);
    Automaton auto = regexpr.toAutomaton();
    this.runauto = new RunAutomaton(auto, true);
}
 
Example #18
Source File: AbstractQualityMeasure.java    From codebase with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * Compute value of this measure.
 * 
 * @return Value of this measure for the given models of relevant and retrieved traces. 
 * @throws Exception if limitations of this measure are not satisfied by the given models.
 */
public Pair<Double, Double> computeMeasure() throws Exception {
        
	if (limitationsHold==null) this.checkLimitations();
	if (limitationsHold==null || !limitationsHold.booleanValue()) {
		throw new Exception(String.format("Limitation(s): %s of %s measure are not fulfilled", violetedLimitations, this.getClass().getName()));
	}

	HashingStrategy<String> strategy = new HashingStrategy<String>() {

		public int computeHashCode(String object) {
			return object.hashCode();
		}

		public boolean equals(String o1, String o2) {
			return o1.equals(o2);
		}
	};
	TObjectShortMap<String> activity2short = new TObjectShortCustomHashMap<String>(strategy, 10, 0.5f, (short) -1);
    
	System.out.println(String.format("Constructing automaton RET that encodes the retrieved model."));
	long start = System.currentTimeMillis();
	if (retrievedTraces instanceof NetSystem) {
		retrievedTraces = Utils.constructAutomatonFromNetSystem((NetSystem) retrievedTraces, activity2short);
	} else if (retrievedTraces instanceof XLog){
		retrievedTraces = Utils.constructAutomatonFromLog((XLog) retrievedTraces, activity2short);
	}
    long finish = System.currentTimeMillis();
    System.out.println(String.format("Automaton RET constructed in                                %s ms.", (finish-start)));
    System.out.println(String.format("Automaton RET has %s states and %s transitions.", ((Automaton)retrievedTraces).getNumberOfStates(), Utils.numberOfTransitions((Automaton)retrievedTraces)));
    
	System.out.println(String.format("Constructing automaton REL that encodes the relevant model."));
	start = System.currentTimeMillis();
	if (relevantTraces instanceof NetSystem) {
		relevantTraces = Utils.constructAutomatonFromNetSystem((NetSystem) relevantTraces, activity2short);
	} else if (relevantTraces instanceof XLog){
		relevantTraces = Utils.constructAutomatonFromLog((XLog) relevantTraces, activity2short);
	}
    finish = System.currentTimeMillis();
    System.out.println(String.format("Automaton REL constructed in                                %s ms.", (finish-start)));
    System.out.println(String.format("Automaton REL has %s states and %s transitions.", ((Automaton)relevantTraces).getNumberOfStates(), Utils.numberOfTransitions((Automaton)relevantTraces)));
    
    start = System.nanoTime();
	this.measureValue = this.computeMeasureValue();
	this.measureComputationTime = System.nanoTime()-start;

	return this.measureValue;
}
 
Example #19
Source File: MultiPatternAutomaton.java    From multiregexp with MIT License 4 votes vote down vote up
static MultiPatternAutomaton make(final List<Automaton> automata) {
    for (final Automaton automaton: automata) {
        automaton.determinize();
    }

    final char[] points = DkBricsAutomatonHelper.pointsUnion(automata);

    // states that are still to be visited
    final Queue<MultiState> statesToVisits = new LinkedList<>();
    final MultiState initialState = initialState(automata);
    statesToVisits.add(initialState);

    final List<int[]> transitionList = new ArrayList<>();

    final Map<MultiState, Integer> multiStateIndex = new HashMap<>();
    multiStateIndex.put(initialState, 0);

    while (!statesToVisits.isEmpty()) {
        final MultiState visitingState = statesToVisits.remove();
        assert multiStateIndex.containsKey(visitingState);
        final int[] curTransitions = new int[points.length];
        for (int c=0; c<points.length; c++) {
            final char point = points[c];
            final MultiState destState = visitingState.step(point);
            if (destState.isNull()) {
                curTransitions[c] = -1;
            }
            else {
                final int destStateId;
                if (!multiStateIndex.containsKey(destState)) {
                    statesToVisits.add(destState);
                    destStateId = multiStateIndex.size();
                    multiStateIndex.put(destState, destStateId);
                }
                else {
                    destStateId = multiStateIndex.get(destState);
                }
                curTransitions[c] = destStateId;
            }
        }
        transitionList.add(curTransitions);
    }

    assert transitionList.size() == multiStateIndex.size();
    final int nbStates = multiStateIndex.size();

    final int[] transitions = new int[nbStates * points.length];
    for (int stateId=0; stateId<nbStates; stateId++) {
        for (int pointId = 0; pointId<points.length; pointId++) {
            transitions[stateId * points.length + pointId] = transitionList.get(stateId)[pointId];
        }
    }

    final int[][] acceptValues = new int[nbStates][];
    for (final Map.Entry<MultiState, Integer> entry: multiStateIndex.entrySet()) {
        final Integer stateId = entry.getValue();
        final MultiState multiState = entry.getKey();
        acceptValues[stateId] = multiState.toAcceptValues();
    }

    return new MultiPatternAutomaton(acceptValues, transitions, points, automata.size());
}