Java Code Examples for org.apache.lucene.index.TermsEnum#SeekStatus

The following examples show how to use org.apache.lucene.index.TermsEnum#SeekStatus . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContainsPrefixTreeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private boolean seek(Cell cell) throws IOException {
  if (thisTerm == null)
    return false;
  final int compare = indexedCell.compareToNoLeaf(cell);
  if (compare > 0) {
    return false;//leap-frog effect
  } else if (compare == 0) {
    return true; // already there!
  } else {//compare > 0
    //seek!
    seekTerm = cell.getTokenBytesNoLeaf(seekTerm);
    final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm);
    if (seekStatus == TermsEnum.SeekStatus.END) {
      thisTerm = null;//all done
      return false;
    }
    thisTerm = termsEnum.term();
    indexedCell = grid.readCell(thisTerm, indexedCell);
    if (seekStatus == TermsEnum.SeekStatus.FOUND) {
      return true;
    }
    return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0;
  }
}
 
Example 2
Source File: DocToDoubleVectorUtils.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * create a sparse <code>Double</code> vector given doc and field term vectors using local frequency of the terms in the doc
 *
 * @param docTerms   term vectors for a given document
 * @param fieldTerms field term vectors
 * @return a sparse vector of <code>Double</code>s as an array
 * @throws IOException in case accessing the underlying index fails
 */
public static Double[] toSparseLocalFreqDoubleArray(Terms docTerms, Terms fieldTerms) throws IOException {
  TermsEnum fieldTermsEnum = fieldTerms.iterator();
  Double[] freqVector = null;
  if (docTerms != null && fieldTerms.size() > -1) {
    freqVector = new Double[(int) fieldTerms.size()];
    int i = 0;
    TermsEnum docTermsEnum = docTerms.iterator();
    BytesRef term;
    while ((term = fieldTermsEnum.next()) != null) {
      TermsEnum.SeekStatus seekStatus = docTermsEnum.seekCeil(term);
      if (seekStatus.equals(TermsEnum.SeekStatus.END)) {
        docTermsEnum = docTerms.iterator();
      }
      if (seekStatus.equals(TermsEnum.SeekStatus.FOUND)) {
        long termFreqLocal = docTermsEnum.totalTermFreq(); // the total number of occurrences of this term in the given document
        freqVector[i] = Long.valueOf(termFreqLocal).doubleValue();
      } else {
        freqVector[i] = 0d;
      }
      i++;
    }
  }
  return freqVector;
}
 
Example 3
Source File: SrndTermQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator();

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
 
Example 4
Source File: SolrRangeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public RangeTermsEnum(Terms terms) throws IOException {
  if (terms == null) {
    positioned = true;
  } else {
    te = terms.iterator();
    if (lower != null) {
      TermsEnum.SeekStatus status = te.seekCeil(lower);
      if (status == TermsEnum.SeekStatus.END) {
        positioned = true;
        curr = null;
      } else if (status == SeekStatus.FOUND) {
        positioned = includeLower();
        curr = te.term();
      } else {
        // lower bound not found, so includeLower is irrelevant
        positioned = true;
        curr = te.term();
      }
    }
  }
}
 
Example 5
Source File: TestTermBytesComparator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private TermsEnum.SeekStatus assertGreaterUntil(int expectedPosition, MockBlockReader blockReader, BytesRef lookedTerm) throws IOException {
  TermsEnum.SeekStatus seekStatus = blockReader.seekInBlock(lookedTerm);
  assertEquals("looked Term: " + lookedTerm.utf8ToString(), expectedPosition, blockReader.lineIndexInBlock - 1);

  //reset the state
  blockReader.reset();
  return seekStatus;
}
 
Example 6
Source File: SrndTruncQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator();

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
 
Example 7
Source File: SrndPrefixQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator();

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}
 
Example 8
Source File: TermPrefixCursor.java    From SolrTextTagger with Apache License 2.0 5 votes vote down vote up
/** Seeks to prefixBuf or the next term that is prefixed by prefixBuf plus the separator char.
 * Sets docIds. **/
private boolean seekPrefix() throws IOException {
  TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefixBuf);

  docIds = null;//invalidate
  switch (seekStatus) {
    case END:
      return false;

    case FOUND:
      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      docIds = postingsEnumToIntsRef(postingsEnum, liveDocs);
      if (docIds.length > 0) {
        return true;
      }

      //Pretend we didn't find it; go to next term
      docIds = null;
      if (termsEnum.next() == null) { // case END
        return false;
      }
      //fall through to NOT_FOUND

    case NOT_FOUND:
      //termsEnum must start with prefixBuf to continue
      BytesRef teTerm = termsEnum.term();

      if (teTerm.length > prefixBuf.length) {
        for (int i = 0; i < prefixBuf.length; i++) {
          if (prefixBuf.bytes[prefixBuf.offset + i] != teTerm.bytes[teTerm.offset + i])
            return false;
        }
        if (teTerm.bytes[teTerm.offset + prefixBuf.length] != SEPARATOR_CHAR)
          return false;
        return true;
      }
      return false;
  }
  throw new IllegalStateException(seekStatus.toString());
}
 
Example 9
Source File: MultiPhrasePrefixQuery.java    From crate with Apache License 2.0 5 votes vote down vote up
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
Example 10
Source File: TestTermBytesComparator.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void assertAlwaysGreater(MockBlockReader blockReader, BytesRef lookedTerm) throws IOException {
  TermsEnum.SeekStatus seekStatus = assertGreaterUntil(-1, blockReader, lookedTerm);
  assertEquals(TermsEnum.SeekStatus.END, seekStatus);
}
 
Example 11
Source File: TermGroupFacetCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
  if (segmentFacetCounts != null) {
    segmentResults.add(createSegmentResult());
  }

  groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
  facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField);
  facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
  if (facetFieldNumTerms == 0) {
    facetOrdTermsEnum = null;
  } else {
    facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
  }
  // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
  segmentFacetCounts = new int[facetFieldNumTerms + 1];
  segmentTotalCount = 0;

  segmentGroupedFacetHits.clear();
  for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
    int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue);
    if (groupedFacetHit.groupValue != null && groupOrd < 0) {
      continue;
    }

    int facetOrd;
    if (groupedFacetHit.facetValue != null) {
      if (facetOrdTermsEnum == null || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) {
        continue;
      }
      facetOrd = (int) facetOrdTermsEnum.ord();
    } else {
      facetOrd = facetFieldNumTerms;
    }

    // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
    int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
    segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
  }

  if (facetPrefix != null) {
    TermsEnum.SeekStatus seekStatus;
    if (facetOrdTermsEnum != null) {
      seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix);
    } else {
      seekStatus = TermsEnum.SeekStatus.END;
    }

    if (seekStatus != TermsEnum.SeekStatus.END) {
      startFacetOrd = (int) facetOrdTermsEnum.ord();
    } else {
      startFacetOrd = 0;
      endFacetOrd = 0;
      return;
    }

    BytesRefBuilder facetEndPrefix = new BytesRefBuilder();
    facetEndPrefix.append(facetPrefix);
    facetEndPrefix.append(UnicodeUtil.BIG_TERM);
    seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get());
    if (seekStatus != TermsEnum.SeekStatus.END) {
      endFacetOrd = (int) facetOrdTermsEnum.ord();
    } else {
      endFacetOrd = facetFieldNumTerms; // Don't include null...
    }
  } else {
    startFacetOrd = 0;
    endFacetOrd = facetFieldNumTerms + 1;
  }
}
 
Example 12
Source File: TermPrefixCursor.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Seeks to prefixBuf or the next term that is prefixed by prefixBuf plus the separator char.
 * Sets docIds. **/
@SuppressWarnings({"fallthrough"})
private boolean seekPrefix() throws IOException {
  TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefixBuf);

  docIds = null;//invalidate
  switch (seekStatus) {
    case END:
      return false;

    case FOUND:
      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      docIds = postingsEnumToIntsRef(postingsEnum, liveDocs);
      if (docIds.length > 0) {
        return true;
      }

      //Pretend we didn't find it; go to next term
      docIds = null;
      if (termsEnum.next() == null) { // case END
        return false;
      }
      //fall through to NOT_FOUND

    case NOT_FOUND:
      //termsEnum must start with prefixBuf to continue
      BytesRef teTerm = termsEnum.term();

      if (teTerm.length > prefixBuf.length) {
        for (int i = 0; i < prefixBuf.length; i++) {
          if (prefixBuf.bytes[prefixBuf.offset + i] != teTerm.bytes[teTerm.offset + i])
            return false;
        }
        if (teTerm.bytes[teTerm.offset + prefixBuf.length] != SEPARATOR_CHAR)
          return false;
        return true;
      }
      return false;
  }
  throw new IllegalStateException(seekStatus.toString());
}