Java Code Examples for org.apache.lucene.util.automaton.Automaton

The following examples show how to use org.apache.lucene.util.automaton.Automaton. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: XAnalyzingSuggester.java    License: Apache License 2.0 6 votes vote down vote up
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
    // TODO: is there a Reader from a CharSequence?
    // Turn tokenstream into automaton:
    Automaton automaton = null;

    try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
        automaton = getTokenStreamToAutomaton().toAutomaton(ts);
    }

    automaton = replaceSep(automaton);

    // TODO: we can optimize this somewhat by determinizing
    // while we convert

    // This automaton should not blow up during determinize:
    automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
    return automaton;
}
 
Example 2
Source Project: Elasticsearch   Source File: IncludeExclude.java    License: Apache License 2.0 6 votes vote down vote up
private Automaton toAutomaton() {
    Automaton a = null;
    if (include != null) {
        a = include.toAutomaton();
    } else if (includeValues != null) {
        a = Automata.makeStringUnion(includeValues);
    } else {
        a = Automata.makeAnyString();
    }
    if (exclude != null) {
        a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    } else if (excludeValues != null) {
        a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    }
    return a;
}
 
Example 3
Source Project: Elasticsearch   Source File: ContextMapping.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a automaton for a given context query this automaton will be used
 * to find the matching paths with the fst
 *
 * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
 * @param queries list of {@link ContextQuery} defining the lookup context
 *
 * @return Automaton matching the given Query
 */
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
    Automaton a = Automata.makeEmptyString();

    Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
    if (preserveSep) {
        // if separators are preserved the fst contains a SEP_LABEL
        // behind each gap. To have a matching automaton, we need to
        // include the SEP_LABEL in the query as well
        gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
    }

    for (ContextQuery query : queries) {
        a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
    }

    // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
    // of strings).
    return Operations.determinize(a, Integer.MAX_VALUE);
}
 
Example 4
Source Project: lucene-solr   Source File: FuzzySuggester.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
Example 5
Source Project: lucene-solr   Source File: AnalyzingSuggester.java    License: Apache License 2.0 6 votes vote down vote up
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
  // Analyze surface form:
  Automaton automaton;
  try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {

    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    automaton = ts2a.toAutomaton(ts);
  }

  automaton = replaceSep(automaton);
  automaton = convertAutomaton(automaton);

  // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
  // assert SpecialOperations.isFinite(automaton);

  // Get all paths from the automaton (there can be
  // more than one path, eg if the analyzer created a
  // graph using SynFilter or WDF):
  return automaton;
}
 
Example 6
Source Project: lucene-solr   Source File: FuzzyCompletionQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final Automaton originalAutomata;
  try (CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text()) ) {
    originalAutomata = stream.toAutomaton(unicodeAware);
  }
  Set<IntsRef> refs = new HashSet<>();
  Automaton automaton = toLevenshteinAutomata(originalAutomata, refs);
  if (unicodeAware) {
    Automaton utf8automaton = new UTF32ToUTF8().convert(automaton);
    utf8automaton = Operations.determinize(utf8automaton, maxDeterminizedStates);
    automaton = utf8automaton;
  }
  // TODO Accumulating all refs is bad, because the resulting set may be very big.
  // TODO Better iterate over automaton again inside FuzzyCompletionWeight?
  return new FuzzyCompletionWeight(this, automaton, refs);
}
 
Example 7
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 6 votes vote down vote up
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
  final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
  final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
  if (matchAllContexts || contexts.size() == 0) {
    return Operations.concatenate(matchAllAutomaton, sep);
  } else {
    Automaton contextsAutomaton = null;
    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
      final ContextMetaData contextMetaData = entry.getValue();
      final IntsRef ref = entry.getKey();
      Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
      if (contextMetaData.exact == false) {
        contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
      }
      contextAutomaton = Operations.concatenate(contextAutomaton, sep);
      if (contextsAutomaton == null) {
        contextsAutomaton = contextAutomaton;
      } else {
        contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
      }
    }
    return contextsAutomaton;
  }
}
 
Example 8
Source Project: lucene-solr   Source File: TermAutomatonQuery.java    License: Apache License 2.0 6 votes vote down vote up
public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermStates> termStates, float boost) throws IOException {
  super(TermAutomatonQuery.this);
  this.automaton = automaton;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity();
  List<TermStatistics> allTermStats = new ArrayList<>();
  for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) {
    Integer termID = ent.getKey();
    if (ent.getValue() != null) {
      TermStates ts = termStates.get(termID);
      if (ts.docFreq() > 0) {
        allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq()));
      }
    }
  }

  if (allTermStats.isEmpty()) {
    stats = null; // no terms matched at all, will not use sim
  } else {
    stats = similarity.scorer(boost, searcher.collectionStatistics(field),
                                     allTermStats.toArray(new TermStatistics[allTermStats.size()]));
  }
}
 
Example 9
Source Project: lucene-solr   Source File: TestSynonymGraphFilter.java    License: Apache License 2.0 6 votes vote down vote up
/** Just creates a side path from startState to endState with the provided tokens. */
private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List<Integer> flatStates) {
  int lastState = startState;
  for(int i=0;i<tokens.length;i++) {
    int nextState;
    if (i == tokens.length-1) {
      nextState = endState;
    } else if (flatStates == null || i >= flatStates.size()) {
      nextState = a.createState();
      if (flatStates != null) {
        assert i == flatStates.size();
        flatStates.add(nextState);
      }
    } else {
      nextState = flatStates.get(i);
    }
    a.addTransition(lastState, nextState, tokens[i]);

    lastState = nextState;
  }
}
 
Example 10
Source Project: lucene-solr   Source File: TestDuelingAnalyzers.java    License: Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  Automaton single = new Automaton();
  int initial = single.createState();
  int accept = single.createState();
  single.setAccept(accept, true);

  // build an automaton matching this jvm's letter definition
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      single.addTransition(initial, accept, i);
    }
  }
  Automaton repeat = Operations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
 
Example 11
Source Project: mtas   Source File: MtasToken.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates the automaton map.
 *
 * @param prefix the prefix
 * @param valueList the value list
 * @param filter the filter
 * @return the map
 */
public static Map<String, Automaton> createAutomatonMap(String prefix,
    List<String> valueList, Boolean filter) {
  HashMap<String, Automaton> automatonMap = new HashMap<>();
  if (valueList != null) {
    for (String item : valueList) {
      if (filter) {
        item = item.replaceAll("([\\\"\\)\\(\\<\\>\\.\\@\\#\\]\\[\\{\\}])",
            "\\\\$1");
      }
      automatonMap.put(item,
          new RegExp(prefix + MtasToken.DELIMITER + item + "\u0000*")
              .toAutomaton());
    }
  }
  return automatonMap;
}
 
Example 12
Source Project: lucene-solr   Source File: PrefixQuery.java    License: Apache License 2.0 6 votes vote down vote up
/** Build an automaton accepting all terms with the specified prefix. */
public static Automaton toAutomaton(BytesRef prefix) {
  final int numStatesAndTransitions = prefix.length+1;
  final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions);
  int lastState = automaton.createState();
  for(int i=0;i<prefix.length;i++) {
    int state = automaton.createState();
    automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff);
    lastState = state;
  }
  automaton.setAccept(lastState, true);
  automaton.addTransition(lastState, lastState, 0, 255);
  automaton.finishState();
  assert automaton.isDeterministic();
  return automaton;
}
 
Example 13
Source Project: lucene-solr   Source File: GraphTokenStreamFiniteStrings.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the articulation points (or cut vertices) of the graph:
 * https://en.wikipedia.org/wiki/Biconnected_component
 */
public int[] articulationPoints() {
  if (det.getNumStates() == 0) {
    return new int[0];
  }
  //
  Automaton.Builder undirect = new Automaton.Builder();
  undirect.copy(det);
  for (int i = 0; i < det.getNumStates(); i++) {
    int numT = det.initTransition(i, transition);
    for (int j = 0; j < numT; j++) {
      det.getNextTransition(transition);
      undirect.addTransition(transition.dest, i, transition.min);
    }
  }
  int numStates = det.getNumStates();
  BitSet visited = new BitSet(numStates);
  int[] depth = new int[det.getNumStates()];
  int[] low = new int[det.getNumStates()];
  int[] parent = new int[det.getNumStates()];
  Arrays.fill(parent, -1);
  List<Integer> points = new ArrayList<>();
  articulationPointsRecurse(undirect.finish(), 0, 0, depth, low, parent, visited, points);
  Collections.reverse(points);
  return points.stream().mapToInt(p -> p).toArray();
}
 
Example 14
Source Project: lucene-solr   Source File: TestRegexpQuery.java    License: Apache License 2.0 6 votes vote down vote up
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = Operations.union(Arrays
        .asList(Automata.makeString("quick"),
        Automata.makeString("brown"),
        Automata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
    myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
  assertEquals(1, searcher.search(query, 5).totalHits.value);
}
 
Example 15
Source Project: Elasticsearch   Source File: XAnalyzingSuggester.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new suggester.
 *
 * @param indexAnalyzer Analyzer that will be used for
 *   analyzing suggestions while building the index.
 * @param queryAnalyzer Analyzer that will be used for
 *   analyzing query text during lookup
 * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
 * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
 *   surface forms to keep for a single analyzed form.
 *   When there are too many surface forms we discard the
 *   lowest weighted ones.
 * @param maxGraphExpansions Maximum number of graph paths
 *   to expand from the analyzed form.  Set this to -1 for
 *   no limit.
 */
public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
                           boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
                           int sepLabel, int payloadSep, int endByte, int holeCharacter) {
    // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput
  this.indexAnalyzer = indexAnalyzer;
  this.queryAnalyzer = queryAnalyzer;
  this.fst = fst;
  this.hasPayloads = hasPayloads;
  if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) {
    throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options);
  }
  this.exactFirst = (options & EXACT_FIRST) != 0;
  this.preserveSep = (options & PRESERVE_SEP) != 0;

  // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions
  this.queryPrefix = queryPrefix;

  // NOTE: this is just an implementation limitation; if
  // somehow this is a problem we could fix it by using
  // more than one byte to disambiguate ... but 256 seems
  // like it should be way more then enough.
  if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) {
    throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")");
  }
  this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;

  if (maxGraphExpansions < 1 && maxGraphExpansions != -1) {
    throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
  }
  this.maxGraphExpansions = maxGraphExpansions;
  this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
  this.preservePositionIncrements = preservePositionIncrements;
  this.sepLabel = sepLabel;
  this.payloadSep = payloadSep;
  this.endByte = endByte;
  this.holeCharacter = holeCharacter;
}
 
Example 16
Source Project: lucene-solr   Source File: TestGraphTokenizers.java    License: Apache License 2.0 5 votes vote down vote up
public void testSynOverHole2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("xyz", 1, 1),
        token("abc", 0, 3),
        token("def", 2, 1),
      });
    final Automaton expected = Operations.union(
      join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")), s2a("abc"));
    assertSameLanguage(expected, ts);
  }
 
Example 17
Source Project: Elasticsearch   Source File: XAnalyzingSuggester.java    License: Apache License 2.0 5 votes vote down vote up
/** Returns all completion paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
 
Example 18
Source Project: Elasticsearch   Source File: XAnalyzingSuggester.java    License: Apache License 2.0 5 votes vote down vote up
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
    final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
    Automaton automaton;
    try (TokenStream ts = stream) {
        automaton = toAutomaton(ts, ts2a);
    }
    LimitedFiniteStringsIterator finiteStrings =
            new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
    Set<IntsRef> set = new HashSet<>();
    for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
        set.add(IntsRef.deepCopyOf(string));
    }
    return Collections.unmodifiableSet(set);
}
 
Example 19
Source Project: BioSolr   Source File: XJoinQParserPlugin.java    License: Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
Filter makeFilter(String fname, Iterator<BytesRef> it) {
  Automaton union = Automata.makeStringUnion(IteratorUtils.toList(it));
  return new MultiTermQueryWrapperFilter<AutomatonQuery>(new AutomatonQuery(new Term(fname), union)) {
  };
}
 
Example 20
Source Project: lucene-solr   Source File: GraphQuery.java    License: Apache License 2.0 5 votes vote down vote up
/** Build an automaton to represent the frontier query */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0 ; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;    
}
 
Example 21
Source Project: Elasticsearch   Source File: CategoryContextMapping.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Automaton toAutomaton() {
    List<Automaton> automatons = new ArrayList<>();
    for (CharSequence value : values) {
        automatons.add(Automata.makeString(value.toString()));
    }
    return Operations.union(automatons);
}
 
Example 22
Source Project: Elasticsearch   Source File: GeolocationContextMapping.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Automaton toAutomaton() {
    Automaton automaton;
    if(precisions == null || precisions.length == 0) {
         automaton = Automata.makeString(location);
    } else {
        automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
        for (int i = 1; i < precisions.length; i++) {
            final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i])));
            automaton = Operations.union(automaton, Automata.makeString(cell));
        }
    }
    return automaton;
}
 
Example 23
Source Project: lucene-solr   Source File: FuzzySuggester.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected Automaton convertAutomaton(Automaton a) {
  if (unicodeAware) {
    Automaton utf8automaton = new UTF32ToUTF8().convert(a);
    utf8automaton = Operations.determinize(utf8automaton, DEFAULT_MAX_DETERMINIZED_STATES);
    return utf8automaton;
  } else {
    return a;
  }
}
 
Example 24
Source Project: lucene-solr   Source File: AnalyzingSuggester.java    License: Apache License 2.0 5 votes vote down vote up
/** Returns all prefix paths to initialize the search. */
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {
  return prefixPaths;
}
 
Example 25
Source Project: lucene-solr   Source File: AnalyzingSuggester.java    License: Apache License 2.0 5 votes vote down vote up
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  Automaton automaton = null;
  try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
    automaton = getTokenStreamToAutomaton().toAutomaton(ts);
  }

  automaton = replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  automaton = Operations.determinize(automaton, DEFAULT_MAX_DETERMINIZED_STATES);
  return automaton;
}
 
Example 26
Source Project: lucene-solr   Source File: RegexCompletionQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  // If an empty regex is provided, we return an automaton that matches nothing. This ensures
  // consistency with PrefixCompletionQuery, which returns no results for an empty term.
  Automaton automaton = getTerm().text().isEmpty()
      ? Automata.makeEmpty()
      : new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates);
  return new CompletionWeight(this, automaton);
}
 
Example 27
Source Project: lucene-solr   Source File: CompletionScorer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a scorer for a field-specific <code>suggester</code> scoped by <code>acceptDocs</code>
 */
protected CompletionScorer(final CompletionWeight weight, final NRTSuggester suggester,
                           final LeafReader reader, final Bits filterDocs,
                           final boolean filtered, final Automaton automaton) throws IOException {
  this.weight = weight;
  this.suggester = suggester;
  this.reader = reader;
  this.automaton = automaton;
  this.filtered = filtered;
  this.filterDocs = filterDocs;
}
 
Example 28
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 5 votes vote down vote up
public ContextCompletionWeight(CompletionQuery query, Automaton automaton, CompletionWeight innerWeight,
                               Map<IntsRef, Float> contextMap,
                               int[] contextLengths) throws IOException {
  super(query, automaton);
  this.contextMap = contextMap;
  this.contextLengths = contextLengths;
  this.innerWeight = innerWeight;
}
 
Example 29
Source Project: lucene-solr   Source File: TestGraphTokenizers.java    License: Apache License 2.0 5 votes vote down vote up
public void testOverlappedTokensLattice2() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 3),
        token("def", 1, 1),
        token("ghi", 1, 1),
      });
    final Automaton a1 = s2a("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    assertSameLanguage(Operations.union(a1, a2), ts);
  }
 
Example 30
Source Project: mtas   Source File: MtasJoinQParser.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Query parse() throws SyntaxError {
  if (id == null) {
    throw new SyntaxError("no " + MTAS_JOIN_QPARSER_COLLECTION);
  } else if (fields == null) {
    throw new SyntaxError("no " + MTAS_JOIN_QPARSER_FIELD);
  } else {

    BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();

    MtasSolrCollectionCache mtasSolrJoinCache = null;
    for (PluginHolder<SearchComponent> item : req.getCore()
        .getSearchComponents().getRegistry().values()) {
      if (item.get() instanceof MtasSolrSearchComponent) {
        mtasSolrJoinCache = ((MtasSolrSearchComponent) item.get())
            .getCollectionCache();
      }
    }
    if (mtasSolrJoinCache != null) {
      Automaton automaton;
      try {
        automaton = mtasSolrJoinCache.getAutomatonById(id);
        if (automaton != null) {
          for (String field : fields) {
            booleanQueryBuilder.add(
                new AutomatonQuery(new Term(field), automaton), Occur.SHOULD);
          }
        } else {
          throw new IOException("no data for collection '" + id + "'");
        }
      } catch (IOException e) {
        throw new SyntaxError(
            "could not construct automaton: " + e.getMessage(), e);
      }
      return booleanQueryBuilder.build();
    } else {
      throw new SyntaxError("no MtasSolrSearchComponent found");
    }
  }
}