Java Code Examples for org.apache.lucene.index.Term#text()

The following examples show how to use org.apache.lucene.index.Term#text() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PersistentClassIndex.java    From netbeans with Apache License 2.0 6 votes vote down vote up
@CheckForNull
@Override
@SuppressWarnings("StringEquality")
public Void convert(@NonNull final Index.WithTermFrequencies.TermFreq param) throws Stop {
    final Term term = param.getTerm();
    if (fieldName != term.field()) {
        throw new Stop();
    }
    final int docCount = param.getFreq();
    final String encBinName = term.text();
    final String binName = encBinName.substring(
        0,
        encBinName.length() - postfixLen);
    final int dotIndex = binName.lastIndexOf('.');  //NOI18N
    final String pkgName = dotIndex == -1 ? "" : binName.substring(0, dotIndex);    //NOI18N
    final Integer typeCount = typeFreq.get(binName);
    final Integer pkgCount = pkgFreq.get(pkgName);
    typeFreq.put(binName, typeCount == null ? docCount : docCount + typeCount);
    pkgFreq.put(pkgName, pkgCount == null ? docCount : docCount + pkgCount);
    return null;
}
 
Example 2
Source File: QueryUtil.java    From netbeans with Apache License 2.0 6 votes vote down vote up
@Override
public String convert(Term currentTerm) throws Stop {
    if (fieldName != currentTerm.field()) {
        throw STOP;
    }
    String currentText = currentTerm.text();
    if (all || currentText.startsWith(value)) {
        if (directOnly) {
            int index = currentText.indexOf('.', value.length());    //NOI18N
            if (index>0) {
                currentText = currentText.substring(0,index);
            }
        }
        return currentText;
    }
    return null;
}
 
Example 3
Source File: MtasSpanWildcardQuery.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas span wildcard query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanWildcardQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  WildcardQuery wcq = new WildcardQuery(term);
  query = new SpanMultiTermQueryWrapper<>(wcq);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}
 
Example 4
Source File: MtasSpanPrefixQuery.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas span prefix query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanPrefixQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  PrefixQuery pfq = new PrefixQuery(term);
  query = new SpanMultiTermQueryWrapper<>(pfq);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}
 
Example 5
Source File: TestSolrCoreParser.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static void checkChooseOneWordQuery(boolean span, Query query, String fieldName, String ... expectedTermTexts) {
  final Term term;
  if (span) {
    assertTrue(query instanceof SpanTermQuery);
    final SpanTermQuery stq = (SpanTermQuery)query;
    term = stq.getTerm();
  } else {
    assertTrue(query instanceof TermQuery);
    final TermQuery tq = (TermQuery)query;
    term = tq.getTerm();
  }
  final String text = term.text();
  boolean foundExpected = false;
  for (String expected : expectedTermTexts) {
    foundExpected |= expected.equals(text);
  }
  assertEquals(fieldName, term.field());
  assertTrue("expected term text ("+text+") not found in ("+expectedTermTexts+")", foundExpected);
}
 
Example 6
Source File: Queries.java    From netbeans with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean termCompare(Term term) {
    if (fieldName == term.field()) {
        String searchText = term.text();
        if (searchText.startsWith(startPrefix)) {
            return pattern.matcher(term.text()).matches();
        }
    }
    endEnum = true;
    return false;
}
 
Example 7
Source File: TermCharacterFilter.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public boolean filter(Term t) 
{
	String termText = t.text();
    for (int i = 0; i < termText.length(); ++i) {
      if (!Character.isLetter(termText.charAt(i))) {
        return false;
      }
    }
	return true;
}
 
Example 8
Source File: TermStopListFilter.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public boolean filter(Term t) 
{
	String term = t.text();
	
	if (stopwords.contains(term)) return false;		
	else return true;
}
 
Example 9
Source File: WeightedSpanTermExtractor.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
 * 
 * @param terms
 *          Map to place created WeightedSpanTerms in
 * @param query
 *          Query to extract Terms from
 * @throws IOException If there is a low-level I/O error
 */
protected void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query, float boost) throws IOException {
  Set<Term> nonWeightedTerms = new HashSet<>();
  final IndexSearcher searcher = new IndexSearcher(getLeafContext());
  searcher.rewrite(query).visit(QueryVisitor.termCollector(nonWeightedTerms));

  for (final Term queryTerm : nonWeightedTerms) {

    if (fieldNameComparator(queryTerm.field())) {
      WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(boost, queryTerm.text());
      terms.put(queryTerm.text(), weightedSpanTerm);
    }
  }
}
 
Example 10
Source File: 1139461_WildcardQuery_0_t.java    From coming with MIT License 5 votes vote down vote up
public WildcardQuery(Term term) {
  this.term = term;
  String text = term.text();
  this.termContainsWildcard = (text.indexOf('*') != -1)
      || (text.indexOf('?') != -1);
  this.termIsPrefix = termContainsWildcard 
      && (text.indexOf('?') == -1) 
      && (text.indexOf('*') == text.length() - 1);
}
 
Example 11
Source File: 1139461_WildcardQuery_0_s.java    From coming with MIT License 5 votes vote down vote up
public WildcardQuery(Term term) {
  this.term = term;
  String text = term.text();
  this.termContainsWildcard = (text.indexOf('*') != -1)
      || (text.indexOf('?') != -1);
  this.termIsPrefix = termContainsWildcard 
      && (text.indexOf('?') == -1) 
      && (text.indexOf('*') == text.length() - 1);
}
 
Example 12
Source File: QueryParserBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a new FuzzyQuery instance
 * @param term Term
 * @param minimumSimilarity minimum similarity
 * @param prefixLength prefix length
 * @return new FuzzyQuery Instance
 */
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
  // FuzzyQuery doesn't yet allow constant score rewrite
  String text = term.text();
  int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, 
      text.codePointCount(0, text.length()));
  return new FuzzyQuery(term,numEdits,prefixLength);
}
 
Example 13
Source File: CrateRegexTermsEnum.java    From crate with Apache License 2.0 5 votes vote down vote up
CrateRegexTermsEnum(TermsEnum tenum, Term term, int flags) {
    super(tenum);
    String text = term.text();
    this.regexImpl = CrateRegexCapabilities.compile(text, flags);

    setInitialSeekTerm(new BytesRef(""));
}
 
Example 14
Source File: SolrQueryParserBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a new FuzzyQuery instance
 * @param term Term
 * @param minimumSimilarity minimum similarity
 * @param prefixLength prefix length
 * @return new FuzzyQuery Instance
 */
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
  // FuzzyQuery doesn't yet allow constant score rewrite
  String text = term.text();
  int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity,
      text.codePointCount(0, text.length()));
  return new FuzzyQuery(term,numEdits,prefixLength);
}
 
Example 15
Source File: QueryUtil.java    From netbeans with Apache License 2.0 4 votes vote down vote up
PackageFilter(final @NonNull Term startTerm, final boolean directOnly) {
    this.fieldName = startTerm.field();
    this.value = startTerm.text();
    this.directOnly = directOnly;
    this.all = value.length() == 0;
}
 
Example 16
Source File: TestRegexpRandom2.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
DumbRegexpQuery(Term term, int flags) {
  super(term.field());
  RegExp re = new RegExp(term.text(), flags);
  automaton = re.toAutomaton();
}
 
Example 17
Source File: DirectSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRefBuilder spare) throws IOException {

  Terms terms = MultiTerms.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, term, editDistance, Math.max(minPrefix, editDistance - 1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
  
  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  while ((candidateTerm = e.next()) != null) {
    // For FuzzyQuery, boost is the score:
    float score = e.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && score <= stQueue.peek().boost) {
      continue;
    }
    
    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm)) {
      continue;
    }
    
    int df = e.docFreq();
    
    // check docFreq if required
    if (df <= docfreq) {
      continue;
    }
    
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
    } else {
      spare.copyUTF8Bytes(candidateTerm);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }
    
    if (score < accuracy) {
      continue;
    }
    
    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = score;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    e.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }
    
  return stQueue;
}
 
Example 18
Source File: WordBreakSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private int generateBreakUpSuggestions(Term term, IndexReader ir,
    int numberBreaks, int maxSuggestions, int useMinSuggestionFrequency,
    SuggestWord[] prefix, Queue<SuggestWordArrayWrapper> suggestions,
    int totalEvaluations, BreakSuggestionSortMethod sortMethod)
    throws IOException {
  String termText = term.text();
  int termLength = termText.codePointCount(0, termText.length());
  int useMinBreakWordLength = minBreakWordLength;
  if (useMinBreakWordLength < 1) {
    useMinBreakWordLength = 1;
  }
  if (termLength < (useMinBreakWordLength * 2)) {
    return 0;
  }    
  
  int thisTimeEvaluations = 0;
  for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++) {
    int end = termText.offsetByCodePoints(0, i);
    String leftText = termText.substring(0, end);
    String rightText = termText.substring(end);
    SuggestWord leftWord = generateSuggestWord(ir, term.field(), leftText);
    
    if (leftWord.freq >= useMinSuggestionFrequency) {
      SuggestWord rightWord = generateSuggestWord(ir, term.field(), rightText);
      if (rightWord.freq >= useMinSuggestionFrequency) {
        SuggestWordArrayWrapper suggestion = new SuggestWordArrayWrapper(
            newSuggestion(prefix, leftWord, rightWord));
        suggestions.offer(suggestion);
        if (suggestions.size() > maxSuggestions) {
          suggestions.poll();
        }
      }        
      int newNumberBreaks = numberBreaks + 1;
      if (newNumberBreaks <= maxChanges) {
        int evaluations = generateBreakUpSuggestions(new Term(term.field(),
            rightWord.string), ir, newNumberBreaks, maxSuggestions,
            useMinSuggestionFrequency, newPrefix(prefix, leftWord),
            suggestions, totalEvaluations, sortMethod);
        totalEvaluations += evaluations;
      }
    }
    
    thisTimeEvaluations++;
    totalEvaluations++;
    if (totalEvaluations >= maxEvaluations) {
      break;
    }
  }
  return thisTimeEvaluations;
}
 
Example 19
Source File: FuzzyTermsEnum.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first
 * valid term if such a term exists.
 *
 * @param terms Delivers terms.
 * @param atts An AttributeSource used to share automata between segments
 * @param term Pattern term.
 * @param maxEdits Maximum edit distance.
 * @param prefixLength the length of the required common prefix
 * @param transpositions whether transpositions should count as a single edit
 * @throws IOException if there is a low-level IO error
 */
FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, int maxEdits, int prefixLength, boolean transpositions) throws IOException {
  this(terms, atts, term, () -> new FuzzyAutomatonBuilder(term.text(), maxEdits, prefixLength, transpositions));
}
 
Example 20
Source File: FuzzyTermsEnum.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first
 * valid term if such a term exists.
 *
 * @param terms Delivers terms.
 * @param term Pattern term.
 * @param maxEdits Maximum edit distance.
 * @param prefixLength the length of the required common prefix
 * @param transpositions whether transpositions should count as a single edit
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, Term term, int maxEdits, int prefixLength, boolean transpositions) throws IOException {
  this(terms, new AttributeSource(), term, () -> new FuzzyAutomatonBuilder(term.text(), maxEdits, prefixLength, transpositions));
}