Java Code Examples for org.apache.lucene.index.DocsEnum

The following examples show how to use org.apache.lucene.index.DocsEnum. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(baseDocId + docs.docID());
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example 2
@Override
public void getDocumentsWithWord(String word, IntArrayList documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(docs.docID() + baseDocId);
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example 3
@Test
public void testTermDocIterable() throws IOException {
  for (int pass = 0; pass < 1; pass++) {
    for (int id = 0; id < BLOCKS; id++) {
      DocsEnum termDocs = reader.termDocsEnum(new Term("id", Integer.toString(id)));
      TermDocIterable iterable = new TermDocIterable(termDocs, reader);
      int count = 0;
      int i = 0;
      long s = System.nanoTime();
      for (Document document : iterable) {
        count++;
        assertEquals(i, Integer.parseInt(document.get("field")));
        i++;
      }
      long time = System.nanoTime() - s;
      System.out.println(time / 1000000.0 + " " + id + " " + pass);
      assertEquals(COUNT_PER_BLOCK, count);
    }
  }
}
 
Example 4
Source Project: incubator-retired-blur   Source File: IndexImporter.java    License: Apache License 2.0 5 votes vote down vote up
private void applyDeletes(Directory directory, IndexWriter indexWriter, IndexSearcherCloseable searcher,
    String shard, boolean emitDeletes, Configuration configuration) throws IOException {
  DirectoryReader newReader = DirectoryReader.open(directory);
  try {
    List<AtomicReaderContext> newLeaves = newReader.getContext().leaves();
    BlurPartitioner blurPartitioner = new BlurPartitioner();
    Text key = new Text();
    int numberOfShards = _shardContext.getTableContext().getDescriptor().getShardCount();
    int shardId = ShardUtil.getShardIndex(shard);

    Action action = new Action() {
      @Override
      public void found(AtomicReader reader, Bits liveDocs, TermsEnum termsEnum) throws IOException {
        DocsEnum docsEnum = termsEnum.docs(liveDocs, null);
        if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          indexWriter.deleteDocuments(new Term(BlurConstants.ROW_ID, BytesRef.deepCopyOf(termsEnum.term())));
        }
      }
    };

    LOG.info("Applying deletes for table [{0}] shard [{1}] new reader [{2}]", _table, shard, newReader);
    boolean skipCheckRowIds = isInternal(newReader);
    LOG.info("Skip rowid check [{0}] for table [{1}] shard [{2}] new reader [{3}]", skipCheckRowIds, _table, shard,
        newReader);
    for (AtomicReaderContext context : newLeaves) {
      AtomicReader newAtomicReader = context.reader();
      if (isFastRowIdDeleteSupported(newAtomicReader)) {
        runNewRowIdCheckAndDelete(indexWriter, emitDeletes, blurPartitioner, key, numberOfShards, shardId,
            newAtomicReader, skipCheckRowIds);
      } else {
        runOldMergeSortRowIdCheckAndDelete(emitDeletes, searcher.getIndexReader(), blurPartitioner, key,
            numberOfShards, shardId, action, newAtomicReader);
      }
    }
  } finally {
    newReader.close();
  }
}
 
Example 5
Source Project: incubator-retired-blur   Source File: BlurUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId,
    int numberOfDocsInRow, Bits liveDocs) throws IOException {
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.FAMILY);
  TermsEnum iterator = terms.iterator(null);
  BytesRef text = new BytesRef(family);
  int lastDocId = primeDocRowId + numberOfDocsInRow;
  if (iterator.seekExact(text, true)) {
    DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE);
    int doc = primeDocRowId;
    while ((doc = docs.advance(doc)) < lastDocId) {
      bits.set(doc - primeDocRowId);
    }
  }
}
 
Example 6
Source Project: incubator-retired-blur   Source File: TermDocIterable.java    License: Apache License 2.0 5 votes vote down vote up
public TermDocIterable(DocsEnum docsEnum, AtomicReader reader, ResetableDocumentStoredFieldVisitor fieldSelector) {
  if (docsEnum == null) {
    throw new NullPointerException("docsEnum can not be null.");
  }
  this.docsEnum = docsEnum;
  this.reader = reader;
  this.fieldSelector = fieldSelector;
}
 
Example 7
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
  assert !eof;
  //if (DEBUG) {
  //System.out.println("BTTR.docs seg=" + segment);
  //}
  currentFrame.decodeMetaData();
  //if (DEBUG) {
  //System.out.println("  state=" + currentFrame.state);
  //}
  return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
}
 
Example 8
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
  AtomicReader reader = context.reader();
  List<DocIdSet> list = new ArrayList<DocIdSet>();

  Fields fields = reader.fields();
  Terms terms = fields.terms(_fieldName);
  if (terms == null) {
    // if field is not present then show nothing.
    return DocIdSet.EMPTY_DOCIDSET;
  }
  TermsEnum iterator = terms.iterator(null);
  BytesRef bytesRef;
  DocumentVisibilityEvaluator visibilityEvaluator = new DocumentVisibilityEvaluator(_authorizations);
  while ((bytesRef = iterator.next()) != null) {
    if (isVisible(visibilityEvaluator, bytesRef)) {
      DocIdSet docIdSet = _filterCacheStrategy.getDocIdSet(_fieldName, bytesRef, reader);
      if (docIdSet != null) {
        list.add(docIdSet);
      } else {
        // Do not use acceptDocs because we want the acl cache to be version
        // agnostic.
        DocsEnum docsEnum = iterator.docs(null, null);
        list.add(buildCache(reader, docsEnum, bytesRef));
      }
    }
  }
  return getLogicalOr(list);
}
 
Example 9
Source Project: incubator-retired-blur   Source File: SecureAtomicReader.java    License: Apache License 2.0 5 votes vote down vote up
private boolean checkDocs() throws IOException {
  DocsEnum maskDocsEnum = _maskTermsEnum.docs(null, null, DocsEnum.FLAG_NONE);
  DocsEnum docsEnum = in.docs(null, null, DocsEnum.FLAG_NONE);
  int docId;
  while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (maskDocsEnum.advance(docId) != docId) {
      return true;
    }
  }
  return false;
}
 
Example 10
Source Project: incubator-retired-blur   Source File: SecureAtomicReader.java    License: Apache License 2.0 5 votes vote down vote up
private boolean hasAccess(BytesRef term) throws IOException {
  DocsEnum docsEnum = in.docs(null, null, DocsEnum.FLAG_NONE);
  int docId;
  while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (_accessControlReader.hasAccess(ReadType.TERMS_ENUM, docId)) {
      return true;
    }
  }
  return false;
}
 
Example 11
Source Project: SourcererCC   Source File: TermSearcher.java    License: GNU General Public License v3.0 4 votes vote down vote up
public synchronized void searchWithPosition(int queryTermsSeen) {
    if (null != this.reader) {
        if (null != this.reader.getContext()) {
            if (null != this.reader.getContext().leaves()) {
                Term term = new Term("tokens", this.searchTerm);
                for (AtomicReaderContext ctx : this.reader.getContext()
                        .leaves()) {
                    int base = ctx.docBase;
                    // SpanTermQuery spanQ = new SpanTermQuery(term);
                    try {
                        DocsAndPositionsEnum docEnum = MultiFields
                                .getTermPositionsEnum(ctx.reader(),
                                        MultiFields.getLiveDocs(ctx
                                                .reader()), "tokens", term
                                                .bytes());
                        if (null != docEnum) {
                            int doc = DocsEnum.NO_MORE_DOCS;
                            while ((doc = docEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
                                long docId = doc + base;
                                CandidateSimInfo simInfo = null;
                                if (this.simMap.containsKey(docId)) {
                                    simInfo = this.simMap.get(docId);
                                    simInfo.similarity = simInfo.similarity
                                            + Math.min(freqTerm,
                                                    docEnum.freq());

                                } else {
                                    if (earlierDocs.contains(docId))
                                        continue;

                                    Document d = SearchManager.searcher
                                            .get(shard).getDocument(docId);
                                    long candidateId = Long.parseLong(d
                                            .get("id"));
                                    // Get rid of these early -- we're only
                                    // looking for candidates
                                    // whose ids are smaller than the query
                                    if (candidateId >= this.queryId) {
                                        // System.out.println("Query " +
                                        // this.queryId +
                                        // ", getting rid of " +
                                        // candidateId);
                                        earlierDocs.add(docId);
                                        continue; // we reject the candidate
                                    }

                                    simInfo = new CandidateSimInfo();
                                    simInfo.doc = d;
                                    simInfo.candidateSize = Integer
                                            .parseInt(d.get("size"));
                                    simInfo.similarity = Math.min(freqTerm,
                                            docEnum.freq());
                                    // System.out.println("before putting in simmap "+
                                    // Util.debug_thread());
                                    this.simMap.put(docId, simInfo);
                                    // System.out.println("after putting in simmap "+
                                    // Util.debug_thread());
                                }
                                simInfo.queryMatchPosition = queryTermsSeen;
                                int candidatePos = docEnum.nextPosition();
                                simInfo.candidateMatchPosition = candidatePos
                                        + docEnum.freq();
                                if (!Util.isSatisfyPosFilter(
                                        this.simMap.get(docId).similarity,
                                        this.querySize, queryTermsSeen,
                                        simInfo.candidateSize,
                                        simInfo.candidateMatchPosition,
                                        this.computedThreshold)) {
                                    // System.out.println("before removing in simmap "+
                                    // Util.debug_thread());
                                    this.simMap.remove(docId);
                                    // System.out.println("after removing in simmap "+
                                    // Util.debug_thread());
                                }
                            }
                        } else {
                            logger.trace("docEnum is null, " + base
                                    + ", term: " + this.searchTerm
                                    + Util.debug_thread());
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        logger.error("exception caught " + e.getMessage()
                                + Util.debug_thread() + " search term:"
                                + this.searchTerm);
                    }
                }
            } else {
                logger.debug("leaves are null, " + this.searchTerm
                        + Util.debug_thread());
            }
        } else {
            logger.debug("getContext is null, " + this.searchTerm
                    + Util.debug_thread());
        }
    } else {
        logger.debug("this.reader is null, " + this.searchTerm
                + Util.debug_thread());
    }
}
 
Example 12
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example 13
Source Project: incubator-retired-blur   Source File: TermDocIterable.java    License: Apache License 2.0 4 votes vote down vote up
public TermDocIterable(DocsEnum docsEnum, AtomicReader reader) {
  this(docsEnum, reader, new ResetableDocumentStoredFieldVisitor());
}
 
Example 14
Source Project: incubator-retired-blur   Source File: ExitableReader.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
  checkRunningState();
  return _termsEnum.docs(liveDocs, reuse, flags);
}
 
Example 15
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
  currentFrame.decodeMetaData();
  return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
 
Example 16
Source Project: incubator-retired-blur   Source File: SecureAtomicReader.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
  Bits secureLiveDocs = getSecureLiveDocs(liveDocs, _maxDoc, _accessControlReader);
  return in.docs(secureLiveDocs, reuse, flags);
}