Java Code Examples for org.apache.lucene.index.PostingsEnum

The following examples show how to use org.apache.lucene.index.PostingsEnum. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestIDVersionPostingsFormat.java    License: Apache License 2.0 6 votes vote down vote up
/** Returns docID if found, else -1. */
public int lookup(BytesRef id, long version) throws IOException {
  for(int seg=0;seg<numSegs;seg++) {
    if (((IDVersionSegmentTermsEnum) termsEnums[seg]).seekExact(id, version)) {
      if (VERBOSE) {
        System.out.println("  found in seg=" + termsEnums[seg]);
      }
      postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0);
      int docID = postingsEnums[seg].nextDoc();
      if (docID != PostingsEnum.NO_MORE_DOCS && (liveDocs[seg] == null || liveDocs[seg].get(docID))) {
        lastVersion = ((IDVersionSegmentTermsEnum) termsEnums[seg]).getVersion();
        return docBases[seg] + docID;
      }
      assert hasDeletions;
    }
  }

  return -1;
}
 
Example 2
Source Project: mtas   Source File: CodecCollector.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Collect collection.
 *
 * @param reader
 *          the reader
 * @param docSet
 *          the doc set
 * @param collectionInfo
 *          the collection info
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public static void collectCollection(IndexReader reader, List<Integer> docSet,
    ComponentCollection collectionInfo) throws IOException {
  if (collectionInfo.action().equals(ComponentCollection.ACTION_CHECK)) {
    // can't do anything in lucene for check
  } else if (collectionInfo.action()
      .equals(ComponentCollection.ACTION_LIST)) {
    // can't do anything in lucene for list
  } else if (collectionInfo.action()
      .equals(ComponentCollection.ACTION_CREATE)) {
    BytesRef term = null;
    PostingsEnum postingsEnum = null;
    Integer docId;
    Integer termDocId = -1;
    Terms terms;
    LeafReaderContext lrc;
    LeafReader r;
    ListIterator<LeafReaderContext> iterator = reader.leaves().listIterator();
    while (iterator.hasNext()) {
      lrc = iterator.next();
      r = lrc.reader();
      for (String field : collectionInfo.fields()) {
        if ((terms = r.terms(field)) != null) {
          TermsEnum termsEnum = terms.iterator();
          while ((term = termsEnum.next()) != null) {
            Iterator<Integer> docIterator = docSet.iterator();
            postingsEnum = termsEnum.postings(postingsEnum,
                PostingsEnum.NONE);
            termDocId = -1;
            while (docIterator.hasNext()) {
              docId = docIterator.next() - lrc.docBase;
              if ((docId >= termDocId) && ((docId.equals(termDocId))
                  || ((termDocId = postingsEnum.advance(docId))
                      .equals(docId)))) {
                collectionInfo.addValue(term.utf8ToString());
                break;
              }
              if (termDocId.equals(PostingsEnum.NO_MORE_DOCS)) {
                break;
              }
            }
          }
        }
      }
    }
  }
}
 
Example 3
/**
 * Creates the TermsEnum (if not already created) and must be called before any calls to getBackgroundFrequency
 * @param context The aggregation context 
 * @return The number of documents in the index (after an optional filter might have been applied)
 */
public long prepareBackground(AggregationContext context) {
    if (termsEnum != null) {
        // already prepared - return 
        return termsEnum.getNumDocs();
    }
    SearchContext searchContext = context.searchContext();
    IndexReader reader = searchContext.searcher().getIndexReader();
    try {
        if (numberOfAggregatorsCreated == 1) {
            // Setup a termsEnum for sole use by one aggregator
            termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter);
        } else {
            // When we have > 1 agg we have possibility of duplicate term frequency lookups 
            // and so use a TermsEnum that caches results of all term lookups
            termsEnum = new FreqTermsEnum(reader, indexedFieldName, true, false, filter, searchContext.bigArrays());
        }
    } catch (IOException e) {
        throw new ElasticsearchException("failed to build terms enumeration", e);
    }
    return termsEnum.getNumDocs();
}
 
Example 4
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 6 votes vote down vote up
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
Example 5
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 6 votes vote down vote up
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
    for (int j = 0; j < termFreq; j++) {
        int nextPos = posEnum.nextPosition();
        if (curTerms.hasPositions()) {
            currentPositions[j] = nextPos;
        }
        if (curTerms.hasOffsets()) {
            currentStartOffset[j] = posEnum.startOffset();
            currentEndOffset[j] = posEnum.endOffset();
        }
        if (curTerms.hasPayloads()) {
            BytesRef curPayload = posEnum.getPayload();
            if (curPayload != null) {
                currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
            } else {
                currentPayloads[j] = null;
            }
        }
    }
}
 
Example 6
Source Project: lucene-solr   Source File: PhraseHelper.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  if (!fieldMatcher.test(term.field())) {
    return;
  }

  SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes());
  if (offsetsEnum == null) {
    // If it's pos insensitive we handle it outside of PhraseHelper.  term.field() is from the Query.
    if (positionInsensitiveTerms.contains(term.bytes())) {
      return;
    }
    offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq());
    termToOffsetsEnums.put(term.bytes(), offsetsEnum);
  }
  offsetsEnum.add(postings.startOffset(), postings.endOffset());
}
 
Example 7
Source Project: lucene-solr   Source File: LukeRequestHandler.java    License: Apache License 2.0 6 votes vote down vote up
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
  PostingsEnum postingsEnum = null;
  TermsEnum termsEnum = terms.iterator();
  BytesRef text;
  // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
  for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) {
    text = termsEnum.next();
    if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
      return null;
    }
    postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
    final Bits liveDocs = reader.getLiveDocs();
    if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (liveDocs != null && liveDocs.get(postingsEnum.docID())) {
        continue;
      }
      return reader.document(postingsEnum.docID());
    }
  }
  return null;
}
 
Example 8
Source Project: lucene-solr   Source File: TermVectorEntry.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns a new term vector entry representing the specified term, and optionally, positions.
 *
 * @param te - positioned terms iterator
 * @return term vector entry
 * @throws IOException - if there is a low level IO error.
 */
static TermVectorEntry of(TermsEnum te) throws IOException {
  Objects.requireNonNull(te);

  String termText = BytesRefUtils.decode(te.term());

  List<TermVectorEntry.TermVectorPosition> tvPositions = new ArrayList<>();
  PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
  pe.nextDoc();
  int freq = pe.freq();
  for (int i = 0; i < freq; i++) {
    int pos = pe.nextPosition();
    if (pos < 0) {
      // no position information available
      continue;
    }
    TermVectorPosition tvPos = TermVectorPosition.of(pos, pe);
    tvPositions.add(tvPos);
  }

  return new TermVectorEntry(termText, te.totalTermFreq(), tvPositions);
}
 
Example 9
Source Project: lucene-solr   Source File: TermPosting.java    License: Apache License 2.0 6 votes vote down vote up
static TermPosting of(int position, PostingsEnum penum) throws IOException {
  TermPosting posting = new TermPosting();

  // set position
  posting.position = position;

  // set offset (if available)
  int sOffset = penum.startOffset();
  int eOffset = penum.endOffset();
  if (sOffset >= 0 && eOffset >= 0) {
    posting.startOffset = sOffset;
    posting.endOffset = eOffset;
  }

  // set payload (if available)
  if (penum.getPayload() != null) {
    posting.payload = BytesRef.deepCopyOf(penum.getPayload());
  }

  return posting;
}
 
Example 10
Source Project: lucene-solr   Source File: DocumentsImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Optional<Integer> firstTermDoc() {
  if (tenum == null) {
    // terms enum is not set
    log.warn("Terms enum un-positioned.");
    return Optional.empty();
  }

  try {
    setPostingsIterator(tenum.postings(penum, PostingsEnum.ALL));

    if (penum.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
      // no docs available for this term
      resetPostingsIterator();
      log.warn("No docs available for term: {} in field: {}.", BytesRefUtils.decode(tenum.term()), curField);
      return Optional.empty();
    } else {
      return Optional.of(penum.docID());
    }
  } catch (IOException e) {
    resetPostingsIterator();
    throw new LukeException(String.format(Locale.ENGLISH, "Term docs not available for field: %s.", curField), e);
  }
}
 
Example 11
Source Project: lucene-solr   Source File: DocumentsImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Optional<Integer> nextTermDoc() {
  if (penum == null) {
    // postings enum is not initialized
    log.warn("Postings enum un-positioned for field: {}.", curField);
    return Optional.empty();
  }

  try {
    if (penum.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
      // end of the iterator
      resetPostingsIterator();
      if (log.isInfoEnabled()) {
        log.info("Reached the end of the postings iterator for term: {} in field: {}", BytesRefUtils.decode(tenum.term()), curField);
      }
      return Optional.empty();
    } else {
      return Optional.of(penum.docID());
    }
  } catch (IOException e) {
    resetPostingsIterator();
    throw new LukeException(String.format(Locale.ENGLISH, "Term docs not available for field: %s.", curField), e);
  }
}
 
Example 12
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
    if (!matches)
        return;
    if (upto >= payloadToMatch.size()) {
        matches = false;
        return;
    }
    BytesRef payload = postings.getPayload();
    if (payloadToMatch.get(upto) == null) {
        matches = payload == null;
        upto++;
        return;
    }
    if (payload == null) {
        matches = false;
        upto++;
        return;
    }
    matches = payloadToMatch.get(upto).bytesEquals(payload);
    upto++;
}
 
Example 13
Source Project: crate   Source File: ShardSplittingQuery.java    License: Apache License 2.0 6 votes vote down vote up
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader,
                                  IntConsumer consumer) throws IOException {
    Terms terms = leafReader.terms(idField);
    TermsEnum iterator = terms.iterator();
    BytesRef idTerm;
    PostingsEnum postingsEnum = null;
    while ((idTerm = iterator.next()) != null) {
        if (includeInShard.test(idTerm) == false) {
            postingsEnum = iterator.postings(postingsEnum);
            int doc;
            while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                consumer.accept(doc);
            }
        }
    }
}
 
Example 14
Source Project: lucene-solr   Source File: PhraseQuery.java    License: Apache License 2.0 6 votes vote down vote up
public PostingsAndFreq(PostingsEnum postings, ImpactsEnum impacts, int position, Term... terms) {
  this.postings = postings;
  this.impacts = impacts;
  this.position = position;
  nTerms = terms==null ? 0 : terms.length;
  if (nTerms>0) {
    if (terms.length==1) {
      this.terms = terms;
    } else {
      Term[] terms2 = new Term[terms.length];
      System.arraycopy(terms, 0, terms2, 0, terms.length);
      Arrays.sort(terms2);
      this.terms = terms2;
    }
  } else {
    this.terms = null;
  }
}
 
Example 15
Source Project: lucene-solr   Source File: SpanPayloadCheckQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  if (!matches)
    return;
  if (upto >= payloadToMatch.size()) {
    matches = false;
    return;
  }
  BytesRef payload = postings.getPayload();
  if (payloadToMatch.get(upto) == null) {
    matches = payload == null;
    upto++;
    return;
  }
  if (payload == null) {
    matches = false;
    upto++;
    return;
  }
  matches = payloadToMatch.get(upto).bytesEquals(payload);
  upto++;
}
 
Example 16
Source Project: lucene-solr   Source File: TestUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static PostingsEnum docs(Random random, TermsEnum termsEnum, PostingsEnum reuse, int flags) throws IOException {
  // TODO: simplify this method? it would be easier to randomly either use the flags passed, or do the random selection,
  // FREQS should be part fo the random selection instead of outside on its own?
  if (random.nextBoolean()) {
    if (random.nextBoolean()) {
      final int posFlags;
      switch (random.nextInt(4)) {
        case 0: posFlags = PostingsEnum.POSITIONS; break;
        case 1: posFlags = PostingsEnum.OFFSETS; break;
        case 2: posFlags = PostingsEnum.PAYLOADS; break;
        default: posFlags = PostingsEnum.ALL; break;
      }
      return termsEnum.postings(null, posFlags);
    }
    flags |= PostingsEnum.FREQS;
  }
  return termsEnum.postings(reuse, flags);
}
 
Example 17
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 *
 * Only terms that have at least one match in the given document will be included
 */
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Objects.requireNonNull(field);
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      }
      else {
        reuse = pe;
      }
    }
  }
  return null;
}
 
Example 18
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 6 votes vote down vote up
private void init() throws IOException {
  List<MatchesIterator> mis = new ArrayList<>();
  mis.add(first);
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        mis.add(new TermMatchesIterator(query, pe));
        reuse = null;
      } else {
        reuse = pe;
      }
    }
  }
  it = fromSubIterators(mis);
}
 
Example 19
Source Project: lucene-solr   Source File: TermsIncludingScoreQuery.java    License: Apache License 2.0 6 votes vote down vote up
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException {
  BytesRef spare = new BytesRef();
  PostingsEnum postingsEnum = null;
  for (int i = 0; i < terms.size(); i++) {
    if (termsEnum.seekExact(terms.get(ords[i], spare))) {
      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      float score = TermsIncludingScoreQuery.this.scores[ords[i]];
      for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
        matchingDocs.set(doc);
        // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
        // can only happen in a many-to-many relation
        scores[doc] = score;
      }
    }
  }
}
 
Example 20
Source Project: lucene-solr   Source File: TermsIncludingScoreQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException {
  BytesRef spare = new BytesRef();
  PostingsEnum postingsEnum = null;
  for (int i = 0; i < terms.size(); i++) {
    if (termsEnum.seekExact(terms.get(ords[i], spare))) {
      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      float score = TermsIncludingScoreQuery.this.scores[ords[i]];
      for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
        // I prefer this:
        /*if (scores[doc] < score) {
          scores[doc] = score;
          matchingDocs.set(doc);
        }*/
        // But this behaves the same as MVInnerScorer and only then the tests will pass:
        if (!matchingDocs.get(doc)) {
          scores[doc] = score;
          matchingDocs.set(doc);
        }
      }
    }
  }
}
 
Example 21
Source Project: lucene-solr   Source File: IDVersionPostingsReader.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) throws IOException {
  SingleDocsEnum docsEnum;

  if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
    SinglePostingsEnum posEnum;

    if (reuse instanceof SinglePostingsEnum) {
      posEnum = (SinglePostingsEnum) reuse;
    } else {
      posEnum = new SinglePostingsEnum();
    }
    IDVersionTermState _termState = (IDVersionTermState) termState;
    posEnum.reset(_termState.docID, _termState.idVersion);
    return posEnum;
  }

  if (reuse instanceof SingleDocsEnum) {
    docsEnum = (SingleDocsEnum) reuse;
  } else {
    docsEnum = new SingleDocsEnum();
  }
  docsEnum.reset(((IDVersionTermState) termState).docID);

  return docsEnum;
}
 
Example 22
Source Project: lucene-solr   Source File: PhraseQuery.java    License: Apache License 2.0 5 votes vote down vote up
public PostingsAndFreq(PostingsEnum postings, ImpactsEnum impacts, int position, List<Term> terms) {
  this.postings = postings;
  this.impacts = impacts;
  this.position = position;
  nTerms = terms == null ? 0 : terms.size();
  if (nTerms > 0) {
    Term[] terms2 = terms.toArray(new Term[0]);
    if (nTerms > 1) {
      Arrays.sort(terms2);
    }
    this.terms = terms2;
  } else {
    this.terms = null;
  }
}
 
Example 23
Source Project: Elasticsearch   Source File: IndexFieldTerm.java    License: Apache License 2.0 5 votes vote down vote up
private int convertToLuceneFlags(int flags) {
    int lucenePositionsFlags = PostingsEnum.NONE;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? PostingsEnum.FREQS : 0x0;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_POSITIONS) > 0 ? PostingsEnum.POSITIONS : 0x0;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? PostingsEnum.PAYLOADS : 0x0;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? PostingsEnum.OFFSETS : 0x0;
    return lucenePositionsFlags;
}
 
Example 24
Source Project: lucene-solr   Source File: TestTeeSinkTokenFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  Document doc = new Document();
  TokenStream tokenStream = analyzer.tokenStream("field", "abcd   ");
  TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
  TokenStream sink = tee.newSinkTokenStream();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  Field f1 = new Field("field", tee, ft);
  Field f2 = new Field("field", sink, ft);
  doc.add(f1);
  doc.add(f2);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  Terms vector = r.getTermVectors(0).terms("field");
  assertEquals(1, vector.size());
  TermsEnum termsEnum = vector.iterator();
  termsEnum.next();
  assertEquals(2, termsEnum.totalTermFreq());
  PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
  assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(2, positions.freq());
  positions.nextPosition();
  assertEquals(0, positions.startOffset());
  assertEquals(4, positions.endOffset());
  positions.nextPosition();
  assertEquals(8, positions.startOffset());
  assertEquals(12, positions.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
  r.close();
  dir.close();
  analyzer.close();
}
 
Example 25
Source Project: lucene-solr   Source File: PayloadSpanCollector.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  BytesRef payload = postings.getPayload();
  if (payload == null)
    return;
  final byte[] bytes = new byte[payload.length];
  System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
  payloads.add(bytes);
}
 
Example 26
Source Project: lucene-solr   Source File: TermSpans.java    License: Apache License 2.0 5 votes vote down vote up
public TermSpans(LeafSimScorer scorer,
                  PostingsEnum postings, Term term, float positionsCost) {
  this.postings = Objects.requireNonNull(postings);
  this.term = Objects.requireNonNull(term);
  this.doc = -1;
  this.position = -1;
  assert positionsCost > 0; // otherwise the TermSpans should not be created.
  this.positionsCost = positionsCost;
}
 
Example 27
Source Project: pyramid   Source File: ESIndex.java    License: Apache License 2.0 5 votes vote down vote up
private Map<Integer,String> getTermVectorWithException(String field, String id) throws IOException {
    TermVectorsResponse response = client.prepareTermVector(indexName, documentType, id)
            .setOffsets(false).setPositions(true).setFieldStatistics(false)
            .setTermStatistics(false)
            .setSelectedFields(field).
                    execute().actionGet();

    Map<Integer,String> map = new HashMap<>();
    Terms terms = response.getFields().terms(field);
    if (terms==null){
        return map;
    }
    TermsEnum iterator = terms.iterator();
    PostingsEnum postings = null;
    
    for (BytesRef termBytes = null; (termBytes = iterator.next()) != null; ) {
    	String term = termBytes.utf8ToString();
    	
    	postings = iterator.postings(postings, PostingsEnum.ALL);
    	
    	//there can only be one doc since we are getting with id. get the doc and the position 
    	postings.nextDoc();
    	
    	int tf = postings.freq();
    	
    	for (int i = 0; i < tf; i++) {
    		int pos = postings.nextPosition();
            map.put(pos,term);
    	}
    	
    }
    
    return map;
}
 
Example 28
Source Project: lucene-solr   Source File: DocSetUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static DocSet createBigSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxDoc, int firstReader) throws IOException {
  long[] bits = new long[FixedBitSet.bits2words(maxDoc)];
  int sz = 0;
  for (int i = firstReader; i < postList.length; i++) {
    PostingsEnum postings = postList[i];
    if (postings == null) continue;
    LeafReaderContext ctx = leaves.get(i);
    Bits liveDocs = ctx.reader().getLiveDocs();
    int base = ctx.docBase;
    for (; ; ) {
      int subId = postings.nextDoc();
      if (subId == DocIdSetIterator.NO_MORE_DOCS) break;
      if (liveDocs != null && !liveDocs.get(subId)) continue;
      int globalId = subId + base;
      bits[globalId >> 6] |= (1L << globalId);
      sz++;
    }
  }

  BitDocSet docSet = new BitDocSet( new FixedBitSet(bits, maxDoc), sz );

  int smallSetSize = smallSetSize(maxDoc);
  if (sz < smallSetSize) {
    // make this optional?
    DocSet smallSet = toSmallSet( docSet );
    // assert equals(docSet, smallSet);
    return smallSet;
  }

  return docSet;
}
 
Example 29
@Before
public void setUp() throws IOException
{
    initMocks(this);

    // Link up the mocks.
    when(mockTerms.iterator()).thenReturn(mockTermsEnum);
    when(mockTermsEnum.postings(null, PostingsEnum.NONE)).thenReturn(mockPostingsEnum);
}
 
Example 30
Source Project: lucene-solr   Source File: PhraseHelper.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Terms terms(String field) throws IOException {
  // ensure the underlying PostingsEnum returns offsets.  It's sad we have to do this to use the SpanCollector.
  return new FilterTerms(super.terms(fieldName)) {
    @Override
    public TermsEnum iterator() throws IOException {
      return new FilterTermsEnum(in.iterator()) {
        @Override
        public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
          return super.postings(reuse, flags | PostingsEnum.OFFSETS);
        }
      };
    }
  };
}