org.apache.lucene.index.DocsEnum Java Examples

The following examples show how to use org.apache.lucene.index.DocsEnum. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(baseDocId + docs.docID());
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example #2
Source File: LuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void getDocumentsWithWord(String word, IntArrayList documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(docs.docID() + baseDocId);
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example #3
Source File: TermDocIterableTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
@Test
public void testTermDocIterable() throws IOException {
  for (int pass = 0; pass < 1; pass++) {
    for (int id = 0; id < BLOCKS; id++) {
      DocsEnum termDocs = reader.termDocsEnum(new Term("id", Integer.toString(id)));
      TermDocIterable iterable = new TermDocIterable(termDocs, reader);
      int count = 0;
      int i = 0;
      long s = System.nanoTime();
      for (Document document : iterable) {
        count++;
        assertEquals(i, Integer.parseInt(document.get("field")));
        i++;
      }
      long time = System.nanoTime() - s;
      System.out.println(time / 1000000.0 + " " + id + " " + pass);
      assertEquals(COUNT_PER_BLOCK, count);
    }
  }
}
 
Example #4
Source File: IndexImporter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void applyDeletes(Directory directory, IndexWriter indexWriter, IndexSearcherCloseable searcher,
    String shard, boolean emitDeletes, Configuration configuration) throws IOException {
  DirectoryReader newReader = DirectoryReader.open(directory);
  try {
    List<AtomicReaderContext> newLeaves = newReader.getContext().leaves();
    BlurPartitioner blurPartitioner = new BlurPartitioner();
    Text key = new Text();
    int numberOfShards = _shardContext.getTableContext().getDescriptor().getShardCount();
    int shardId = ShardUtil.getShardIndex(shard);

    Action action = new Action() {
      @Override
      public void found(AtomicReader reader, Bits liveDocs, TermsEnum termsEnum) throws IOException {
        DocsEnum docsEnum = termsEnum.docs(liveDocs, null);
        if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          indexWriter.deleteDocuments(new Term(BlurConstants.ROW_ID, BytesRef.deepCopyOf(termsEnum.term())));
        }
      }
    };

    LOG.info("Applying deletes for table [{0}] shard [{1}] new reader [{2}]", _table, shard, newReader);
    boolean skipCheckRowIds = isInternal(newReader);
    LOG.info("Skip rowid check [{0}] for table [{1}] shard [{2}] new reader [{3}]", skipCheckRowIds, _table, shard,
        newReader);
    for (AtomicReaderContext context : newLeaves) {
      AtomicReader newAtomicReader = context.reader();
      if (isFastRowIdDeleteSupported(newAtomicReader)) {
        runNewRowIdCheckAndDelete(indexWriter, emitDeletes, blurPartitioner, key, numberOfShards, shardId,
            newAtomicReader, skipCheckRowIds);
      } else {
        runOldMergeSortRowIdCheckAndDelete(emitDeletes, searcher.getIndexReader(), blurPartitioner, key,
            numberOfShards, shardId, action, newAtomicReader);
      }
    }
  } finally {
    newReader.close();
  }
}
 
Example #5
Source File: BlurUtil.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId,
    int numberOfDocsInRow, Bits liveDocs) throws IOException {
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.FAMILY);
  TermsEnum iterator = terms.iterator(null);
  BytesRef text = new BytesRef(family);
  int lastDocId = primeDocRowId + numberOfDocsInRow;
  if (iterator.seekExact(text, true)) {
    DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE);
    int doc = primeDocRowId;
    while ((doc = docs.advance(doc)) < lastDocId) {
      bits.set(doc - primeDocRowId);
    }
  }
}
 
Example #6
Source File: TermDocIterable.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
public TermDocIterable(DocsEnum docsEnum, AtomicReader reader, ResetableDocumentStoredFieldVisitor fieldSelector) {
  if (docsEnum == null) {
    throw new NullPointerException("docsEnum can not be null.");
  }
  this.docsEnum = docsEnum;
  this.reader = reader;
  this.fieldSelector = fieldSelector;
}
 
Example #7
Source File: BlockTreeTermsReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
  assert !eof;
  //if (DEBUG) {
  //System.out.println("BTTR.docs seg=" + segment);
  //}
  currentFrame.decodeMetaData();
  //if (DEBUG) {
  //System.out.println("  state=" + currentFrame.state);
  //}
  return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
}
 
Example #8
Source File: DocumentVisibilityFilter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
  AtomicReader reader = context.reader();
  List<DocIdSet> list = new ArrayList<DocIdSet>();

  Fields fields = reader.fields();
  Terms terms = fields.terms(_fieldName);
  if (terms == null) {
    // if field is not present then show nothing.
    return DocIdSet.EMPTY_DOCIDSET;
  }
  TermsEnum iterator = terms.iterator(null);
  BytesRef bytesRef;
  DocumentVisibilityEvaluator visibilityEvaluator = new DocumentVisibilityEvaluator(_authorizations);
  while ((bytesRef = iterator.next()) != null) {
    if (isVisible(visibilityEvaluator, bytesRef)) {
      DocIdSet docIdSet = _filterCacheStrategy.getDocIdSet(_fieldName, bytesRef, reader);
      if (docIdSet != null) {
        list.add(docIdSet);
      } else {
        // Do not use acceptDocs because we want the acl cache to be version
        // agnostic.
        DocsEnum docsEnum = iterator.docs(null, null);
        list.add(buildCache(reader, docsEnum, bytesRef));
      }
    }
  }
  return getLogicalOr(list);
}
 
Example #9
Source File: SecureAtomicReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private boolean checkDocs() throws IOException {
  DocsEnum maskDocsEnum = _maskTermsEnum.docs(null, null, DocsEnum.FLAG_NONE);
  DocsEnum docsEnum = in.docs(null, null, DocsEnum.FLAG_NONE);
  int docId;
  while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (maskDocsEnum.advance(docId) != docId) {
      return true;
    }
  }
  return false;
}
 
Example #10
Source File: SecureAtomicReader.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private boolean hasAccess(BytesRef term) throws IOException {
  DocsEnum docsEnum = in.docs(null, null, DocsEnum.FLAG_NONE);
  int docId;
  while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (_accessControlReader.hasAccess(ReadType.TERMS_ENUM, docId)) {
      return true;
    }
  }
  return false;
}
 
Example #11
Source File: TermSearcher.java    From SourcererCC with GNU General Public License v3.0 4 votes vote down vote up
public synchronized void searchWithPosition(int queryTermsSeen) {
    if (null != this.reader) {
        if (null != this.reader.getContext()) {
            if (null != this.reader.getContext().leaves()) {
                Term term = new Term("tokens", this.searchTerm);
                for (AtomicReaderContext ctx : this.reader.getContext()
                        .leaves()) {
                    int base = ctx.docBase;
                    // SpanTermQuery spanQ = new SpanTermQuery(term);
                    try {
                        DocsAndPositionsEnum docEnum = MultiFields
                                .getTermPositionsEnum(ctx.reader(),
                                        MultiFields.getLiveDocs(ctx
                                                .reader()), "tokens", term
                                                .bytes());
                        if (null != docEnum) {
                            int doc = DocsEnum.NO_MORE_DOCS;
                            while ((doc = docEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
                                long docId = doc + base;
                                CandidateSimInfo simInfo = null;
                                if (this.simMap.containsKey(docId)) {
                                    simInfo = this.simMap.get(docId);
                                    simInfo.similarity = simInfo.similarity
                                            + Math.min(freqTerm,
                                                    docEnum.freq());

                                } else {
                                    if (earlierDocs.contains(docId))
                                        continue;

                                    Document d = SearchManager.searcher
                                            .get(shard).getDocument(docId);
                                    long candidateId = Long.parseLong(d
                                            .get("id"));
                                    // Get rid of these early -- we're only
                                    // looking for candidates
                                    // whose ids are smaller than the query
                                    if (candidateId >= this.queryId) {
                                        // System.out.println("Query " +
                                        // this.queryId +
                                        // ", getting rid of " +
                                        // candidateId);
                                        earlierDocs.add(docId);
                                        continue; // we reject the candidate
                                    }

                                    simInfo = new CandidateSimInfo();
                                    simInfo.doc = d;
                                    simInfo.candidateSize = Integer
                                            .parseInt(d.get("size"));
                                    simInfo.similarity = Math.min(freqTerm,
                                            docEnum.freq());
                                    // System.out.println("before putting in simmap "+
                                    // Util.debug_thread());
                                    this.simMap.put(docId, simInfo);
                                    // System.out.println("after putting in simmap "+
                                    // Util.debug_thread());
                                }
                                simInfo.queryMatchPosition = queryTermsSeen;
                                int candidatePos = docEnum.nextPosition();
                                simInfo.candidateMatchPosition = candidatePos
                                        + docEnum.freq();
                                if (!Util.isSatisfyPosFilter(
                                        this.simMap.get(docId).similarity,
                                        this.querySize, queryTermsSeen,
                                        simInfo.candidateSize,
                                        simInfo.candidateMatchPosition,
                                        this.computedThreshold)) {
                                    // System.out.println("before removing in simmap "+
                                    // Util.debug_thread());
                                    this.simMap.remove(docId);
                                    // System.out.println("after removing in simmap "+
                                    // Util.debug_thread());
                                }
                            }
                        } else {
                            logger.trace("docEnum is null, " + base
                                    + ", term: " + this.searchTerm
                                    + Util.debug_thread());
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        logger.error("exception caught " + e.getMessage()
                                + Util.debug_thread() + " search term:"
                                + this.searchTerm);
                    }
                }
            } else {
                logger.debug("leaves are null, " + this.searchTerm
                        + Util.debug_thread());
            }
        } else {
            logger.debug("getContext is null, " + this.searchTerm
                    + Util.debug_thread());
        }
    } else {
        logger.debug("this.reader is null, " + this.searchTerm
                + Util.debug_thread());
    }
}
 
Example #12
Source File: WindowSupportingLuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example #13
Source File: TermDocIterable.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
public TermDocIterable(DocsEnum docsEnum, AtomicReader reader) {
  this(docsEnum, reader, new ResetableDocumentStoredFieldVisitor());
}
 
Example #14
Source File: ExitableReader.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
  checkRunningState();
  return _termsEnum.docs(liveDocs, reuse, flags);
}
 
Example #15
Source File: BlockTreeTermsReader.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
  currentFrame.decodeMetaData();
  return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
 
Example #16
Source File: SecureAtomicReader.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
  Bits secureLiveDocs = getSecureLiveDocs(liveDocs, _maxDoc, _accessControlReader);
  return in.docs(secureLiveDocs, reuse, flags);
}