Java Code Examples for org.apache.lucene.index.PostingsEnum#advance()

The following examples show how to use org.apache.lucene.index.PostingsEnum#advance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DisjunctionMatchesIterator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 *
 * Only terms that have at least one match in the given document will be included
 */
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Objects.requireNonNull(field);
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      }
      else {
        reuse = pe;
      }
    }
  }
  return null;
}
 
Example 2
Source File: DisjunctionMatchesIterator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void init() throws IOException {
  List<MatchesIterator> mis = new ArrayList<>();
  mis.add(first);
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        mis.add(new TermMatchesIterator(query, pe));
        reuse = null;
      } else {
        reuse = pe;
      }
    }
  }
  it = fromSubIterators(mis);
}
 
Example 3
Source File: CodecCollector.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Compute termvector number full.
 *
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @param positionsData
 *          the positions data
 * @return the termvector number full
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static TermvectorNumberFull computeTermvectorNumberFull(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum,
    LeafReaderContext lrc, PostingsEnum postingsEnum,
    Map<Integer, Integer> positionsData) throws IOException {
  TermvectorNumberFull result = new TermvectorNumberFull(docSet.size());
  Iterator<Integer> docIterator = docSet.iterator();
  int localTermDocId = termDocId;
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  while (docIterator.hasNext()) {
    int docId = docIterator.next() - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.args[result.docNumber] = postingsEnum.freq();
      result.positions[result.docNumber] = (positionsData == null) ? 0
          : positionsData.get(docId + lrc.docBase);
      result.docNumber++;
    }
  }
  return result;
}
 
Example 4
Source File: ReconstructCommand.java    From clue with Apache License 2.0 6 votes vote down vote up
public String reconstructNoPositions(TermsEnum te, int docid, Bits liveDocs) throws IOException{
  List<String> textList = new ArrayList<String>();
  BytesRef text;
  PostingsEnum postings = null;
  while ((text = te.next()) != null) {
    postings = te.postings(postings, PostingsEnum.FREQS);
    int iterDoc = postings.advance(docid);
    if (iterDoc == docid) {
      textList.add(text.utf8ToString());
    }
  }
  StringBuilder buf = new StringBuilder();
  for (String s : textList) {
    buf.append(s+" ");
  }
  return buf.toString();
}
 
Example 5
Source File: FieldOffsetStrategy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
  TermsEnum termsEnum = termsIndex.iterator();//does not return null
  for (BytesRef term : sourceTerms) {
    if (termsEnum.seekExact(term)) {
      PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
      if (postingsEnum == null) {
        // no offsets or positions available
        throw new IllegalArgumentException("field '" + getField() + "' was indexed without offsets, cannot highlight");
      }
      if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted
        results.add(new OffsetsEnum.OfPostings(term, postingsEnum));
      }
    }
  }
}
 
Example 6
Source File: TaxonomyIndexArrays.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void initParents(IndexReader reader, int first) throws IOException {
  if (reader.maxDoc() == first) {
    return;
  }
  
  // it's ok to use MultiTerms because we only iterate on one posting list.
  // breaking it to loop over the leaves() only complicates code for no
  // apparent gain.
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,
      Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF,
      PostingsEnum.PAYLOADS);

  // shouldn't really happen, if it does, something's wrong
  if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
    throw new CorruptIndexException("Missing parent data for category " + first, reader.toString());
  }
  
  int num = reader.maxDoc();
  for (int i = first; i < num; i++) {
    if (positions.docID() == i) {
      if (positions.freq() == 0) { // shouldn't happen
        throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
      }
      
      parents[i] = positions.nextPosition();
      
      if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        if (i + 1 < num) {
          throw new CorruptIndexException("Missing parent data for category "+ (i + 1), reader.toString());
        }
        break;
      }
    } else { // this shouldn't happen
      throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
    }
  }
}
 
Example 7
Source File: MultiPhraseQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public int advance(int target) throws IOException {
  PostingsEnum top = docsQueue.top();

  do {
    top.advance(target);
    top = docsQueue.updateTop();
  } while (top.docID() < target);

  return top.docID();
}
 
Example 8
Source File: TestBlockPostingsFormat3.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * checks advancing docs
 */
public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
  if (leftDocs == null) {
    assertNull(rightDocs);
    return;
  }
  int docid = -1;
  int averageGap = MAXDOC / (1+docFreq);
  int skipInterval = 16;

  while (true) {
    if (random().nextBoolean()) {
      // nextDoc()
      docid = leftDocs.nextDoc();
      assertEquals(docid, rightDocs.nextDoc());
    } else {
      // advance()
      int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
      docid = leftDocs.advance(skip);
      assertEquals(docid, rightDocs.advance(skip));
    }
    
    if (docid == DocIdSetIterator.NO_MORE_DOCS) {
      return;
    }
    // we don't assert freqs, they are allowed to be different
  }
}
 
Example 9
Source File: TestBlockPostingsFormat3.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * checks advancing docs + positions
 */
public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
  if (leftDocs == null || rightDocs == null) {
    assertNull(leftDocs);
    assertNull(rightDocs);
    return;
  }
  
  int docid = -1;
  int averageGap = MAXDOC / (1+docFreq);
  int skipInterval = 16;

  while (true) {
    if (random().nextBoolean()) {
      // nextDoc()
      docid = leftDocs.nextDoc();
      assertEquals(docid, rightDocs.nextDoc());
    } else {
      // advance()
      int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
      docid = leftDocs.advance(skip);
      assertEquals(docid, rightDocs.advance(skip));
    }
    
    if (docid == DocIdSetIterator.NO_MORE_DOCS) {
      return;
    }
    int freq = leftDocs.freq();
    assertEquals(freq, rightDocs.freq());
    for (int i = 0; i < freq; i++) {
      assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
      // we don't compare the payloads, it's allowed that one is empty etc
    }
  }
}
 
Example 10
Source File: CodecCollector.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Compute termvector number basic.
 *
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param r
 *          the r
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @return the termvector number basic
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static TermvectorNumberBasic computeTermvectorNumberBasic(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r,
    LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException {
  TermvectorNumberBasic result = new TermvectorNumberBasic();
  boolean hasDeletedDocuments = (r.getLiveDocs() != null);
  if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) {
    try {
      return computeTermvectorNumberBasic(termsEnum, r);
    } catch (IOException e) {
      log.debug("problem", e);
      // problem
    }
  }
  result.docNumber = 0;
  result.valueSum[0] = 0;
  int localTermDocId = termDocId;
  Iterator<Integer> docIterator = docSet.iterator();
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  int docId;
  while (docIterator.hasNext()) {
    docId = docIterator.next() - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.docNumber++;
      result.valueSum[0] += postingsEnum.freq();
    }
    if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
  }
  return result;
}
 
Example 11
Source File: PayloadFilteredTermIntervalsSource.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException {
  PostingsEnum pe = te.postings(null, PostingsEnum.ALL);
  if (pe.advance(doc) != doc) {
    return null;
  }
  return new IntervalMatchesIterator() {

    @Override
    public int gaps() {
      return 0;
    }

    @Override
    public int width() {
      return 1;
    }

    int upto = pe.freq();
    int pos = -1;

    @Override
    public boolean next() throws IOException {
      do {
        if (upto <= 0) {
          pos = IntervalIterator.NO_MORE_INTERVALS;
          return false;
        }
        upto--;
        pos = pe.nextPosition();
      }
      while (filter.test(pe.getPayload()) == false);
      return true;
    }

    @Override
    public int startPosition() {
      return pos;
    }

    @Override
    public int endPosition() {
      return pos;
    }

    @Override
    public int startOffset() throws IOException {
      return pe.startOffset();
    }

    @Override
    public int endOffset() throws IOException {
      return pe.endOffset();
    }

    @Override
    public MatchesIterator getSubMatches() {
      return null;
    }

    @Override
    public Query getQuery() {
      throw new UnsupportedOperationException();
    }
  };
}
 
Example 12
Source File: TermIntervalsSource.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static IntervalMatchesIterator matches(TermsEnum te, int doc, String field) throws IOException {
  TermQuery query = new TermQuery(new Term(field, te.term()));
  PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
  if (pe.advance(doc) != doc) {
    return null;
  }
  return new IntervalMatchesIterator() {

    @Override
    public int gaps() {
      return 0;
    }

    @Override
    public int width() {
      return 1;
    }

    int upto = pe.freq();
    int pos = -1;

    @Override
    public boolean next() throws IOException {
      if (upto <= 0) {
        pos = IntervalIterator.NO_MORE_INTERVALS;
        return false;
      }
      upto--;
      pos = pe.nextPosition();
      return true;
    }

    @Override
    public int startPosition() {
      return pos;
    }

    @Override
    public int endPosition() {
      return pos;
    }

    @Override
    public int startOffset() throws IOException {
      return pe.startOffset();
    }

    @Override
    public int endOffset() throws IOException {
      return pe.endOffset();
    }

    @Override
    public MatchesIterator getSubMatches() {
      return null;
    }

    @Override
    public Query getQuery() {
      return query;
    }
  };
}