org.apache.lucene.index.LeafReader Java Examples

The following examples show how to use org.apache.lucene.index.LeafReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: Elasticsearch   Author: baidu   File: GeoPointArrayIndexFieldData.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception {
    LeafReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    AtomicGeoPointFieldData data = null;
    // TODO: Use an actual estimator to estimate before loading.
    NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA));
    if (terms == null) {
        data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc());
        estimator.afterLoad(null, data.ramBytesUsed());
        return data;
    }
    return (Version.indexCreated(indexSettings).before(Version.V_2_2_0)) ?
        loadLegacyFieldData(reader, estimator, terms, data) : loadFieldData22(reader, estimator, terms, data);
}
 
Example #2
Source Project: crate   Author: crate   File: LuceneBatchIterator.java    License: Apache License 2.0 6 votes vote down vote up
private boolean innerMoveNext() throws IOException {
    while (tryAdvanceDocIdSetIterator()) {
        LeafReader reader = currentLeaf.reader();
        Bits liveDocs = reader.getLiveDocs();
        int doc;
        while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) {
                continue;
            }
            onDoc(doc);
            return true;
        }
        currentDocIdSetIt = null;
    }
    clearState();
    return false;
}
 
Example #3
Source Project: lucene-solr   Author: apache   File: ConfusionMatrixGeneratorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetConfusionMatrixWithSNB() throws Exception {
  LeafReader reader = null;
  try {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    reader = getSampleIndex(analyzer);
    Classifier<BytesRef> classifier = new SimpleNaiveBayesClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
    ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
        classifier, categoryFieldName, textFieldName, -1);
    checkCM(confusionMatrix);
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
}
 
Example #4
Source Project: Elasticsearch   Author: baidu   File: PerThreadIDAndVersionLookup.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Initialize lookup for the provided segment
 */
public PerThreadIDAndVersionLookup(LeafReader reader) throws IOException {
    TermsEnum termsEnum = null;
    NumericDocValues versions = null;
    boolean hasPayloads = false;

    Fields fields = reader.fields();
    if (fields != null) {
        Terms terms = fields.terms(UidFieldMapper.NAME);
        if (terms != null) {
            hasPayloads = terms.hasPayloads();
            termsEnum = terms.iterator();
            assert termsEnum != null;
            versions = reader.getNumericDocValues(VersionFieldMapper.NAME);
        }
    }

    this.versions = versions;
    this.termsEnum = termsEnum;
    this.hasPayloads = hasPayloads;
}
 
Example #5
Source Project: lucene-solr   Author: apache   File: ConfusionMatrixGeneratorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetConfusionMatrixWithBM25NB() throws Exception {
  LeafReader reader = null;
  try {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    reader = getSampleIndex(analyzer);
    Classifier<BytesRef> classifier = new BM25NBClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
    ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
        classifier, categoryFieldName, textFieldName, -1);
    checkCM(confusionMatrix);
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
}
 
Example #6
Source Project: lucene-solr   Author: apache   File: TopLevelJoinQuery.java    License: Apache License 2.0 6 votes vote down vote up
private SortedSetDocValues validateAndFetchDocValues(SolrIndexSearcher solrSearcher, String fieldName, String querySide) throws IOException {
  final IndexSchema schema = solrSearcher.getSchema();
  final SchemaField field = schema.getFieldOrNull(fieldName);
  if (field == null) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, querySide + " field '" + fieldName + "' does not exist");
  }

  if (!field.hasDocValues()) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        "'top-level' join queries require both 'from' and 'to' fields to have docValues, but " + querySide +
            " field [" + fieldName +  "] does not.");
  }

  final LeafReader leafReader = solrSearcher.getSlowAtomicReader();
  if (field.multiValued()) {
    return DocValues.getSortedSet(leafReader, fieldName);
  }
  return DocValues.singleton(DocValues.getSorted(leafReader, fieldName));
}
 
Example #7
Source Project: lucene-solr   Author: apache   File: UnifiedHighlighter.java    License: Apache License 2.0 6 votes vote down vote up
static IndexReader wrap(IndexReader reader) throws IOException {
  LeafReader[] leafReaders = reader.leaves().stream()
      .map(LeafReaderContext::reader)
      .map(TermVectorReusingLeafReader::new)
      .toArray(LeafReader[]::new);
  return new BaseCompositeReader<IndexReader>(leafReaders) {
    @Override
    protected void doClose() throws IOException {
      reader.close();
    }

    @Override
    public CacheHelper getReaderCacheHelper() {
      return null;
    }
  };
}
 
Example #8
Source Project: lucene-solr   Author: apache   File: KNearestNeighborClassifierTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBasicUsage() throws Exception {
  LeafReader leafReader = null;
  try {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    leafReader = getSampleIndex(analyzer);
    checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    ClassificationResult<BytesRef> resultDS =  checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new BM25Similarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    ClassificationResult<BytesRef> resultLMS =  checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    assertTrue(resultDS.getScore() != resultLMS.getScore());
  } finally {
    if (leafReader != null) {
      leafReader.close();
    }
  }
}
 
Example #9
Source Project: lucene-solr   Author: apache   File: TestGeo3DPoint.java    License: Apache License 2.0 6 votes vote down vote up
public static String explain(String fieldName, GeoShape shape, GeoPoint targetDocPoint, GeoPoint scaledDocPoint, IndexReader reader, int docID) throws Exception {

    final XYZBounds bounds = new XYZBounds();
    shape.getBounds(bounds);
    
    // First find the leaf reader that owns this doc:
    int subIndex = ReaderUtil.subIndex(docID, reader.leaves());
    LeafReader leafReader = reader.leaves().get(subIndex).reader();

    StringBuilder b = new StringBuilder();
    b.append("target is in leaf " + leafReader + " of full reader " + reader + "\n");

    DocIdSetBuilder hits = new DocIdSetBuilder(leafReader.maxDoc());
    ExplainingVisitor visitor = new ExplainingVisitor(shape, targetDocPoint, scaledDocPoint,
      new PointInShapeIntersectVisitor(hits, shape, bounds),
      docID - reader.leaves().get(subIndex).docBase, 3, Integer.BYTES, b);

    // Do first phase, where we just figure out the "path" that leads to the target docID:
    leafReader.getPointValues(fieldName).intersect(visitor);

    // Do second phase, where we we see how the wrapped visitor responded along that path:
    visitor.startSecondPhase();
    leafReader.getPointValues(fieldName).intersect(visitor);

    return b.toString();
  }
 
Example #10
Source Project: lucene-solr   Author: apache   File: TestCompressingTermVectorsFormat.java    License: Apache License 2.0 6 votes vote down vote up
public void testNoOrds() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  doc.add(new Field("foo", "this is a test", ft));
  iw.addDocument(doc);
  LeafReader ir = getOnlyLeafReader(iw.getReader());
  Terms terms = ir.getTermVector(0, "foo");
  assertNotNull(terms);
  TermsEnum termsEnum = terms.iterator();
  assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("this")));

  expectThrows(UnsupportedOperationException.class, termsEnum::ord);
  expectThrows(UnsupportedOperationException.class, () -> termsEnum.seekExact(0));

  ir.close();
  iw.close();
  dir.close();
}
 
Example #11
Source Project: mtas   Author: meertensinstituut   File: CodecCollector.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Collect collection.
 *
 * @param reader
 *          the reader
 * @param docSet
 *          the doc set
 * @param collectionInfo
 *          the collection info
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public static void collectCollection(IndexReader reader, List<Integer> docSet,
    ComponentCollection collectionInfo) throws IOException {
  if (collectionInfo.action().equals(ComponentCollection.ACTION_CHECK)) {
    // can't do anything in lucene for check
  } else if (collectionInfo.action()
      .equals(ComponentCollection.ACTION_LIST)) {
    // can't do anything in lucene for list
  } else if (collectionInfo.action()
      .equals(ComponentCollection.ACTION_CREATE)) {
    BytesRef term = null;
    PostingsEnum postingsEnum = null;
    Integer docId;
    Integer termDocId = -1;
    Terms terms;
    LeafReaderContext lrc;
    LeafReader r;
    ListIterator<LeafReaderContext> iterator = reader.leaves().listIterator();
    while (iterator.hasNext()) {
      lrc = iterator.next();
      r = lrc.reader();
      for (String field : collectionInfo.fields()) {
        if ((terms = r.terms(field)) != null) {
          TermsEnum termsEnum = terms.iterator();
          while ((term = termsEnum.next()) != null) {
            Iterator<Integer> docIterator = docSet.iterator();
            postingsEnum = termsEnum.postings(postingsEnum,
                PostingsEnum.NONE);
            termDocId = -1;
            while (docIterator.hasNext()) {
              docId = docIterator.next() - lrc.docBase;
              if ((docId >= termDocId) && ((docId.equals(termDocId))
                  || ((termDocId = postingsEnum.advance(docId))
                      .equals(docId)))) {
                collectionInfo.addValue(term.utf8ToString());
                break;
              }
              if (termDocId.equals(PostingsEnum.NO_MORE_DOCS)) {
                break;
              }
            }
          }
        }
      }
    }
  }
}
 
Example #12
Source Project: lucene-solr   Author: apache   File: TestMemoryIndex.java    License: Apache License 2.0 6 votes vote down vote up
public void testIndexingPointsAndDocValues() throws Exception {
  FieldType type = new FieldType();
  type.setDimensions(1, 4);
  type.setDocValuesType(DocValuesType.BINARY);
  type.freeze();
  Document doc = new Document();
  byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
  doc.add(new BinaryPoint("field", packedPoint, type));
  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();

  assertEquals(1, leafReader.getPointValues("field").size());
  assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
  assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());

  BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
  assertEquals(0, dvs.nextDoc());
  assertEquals("term", dvs.binaryValue().utf8ToString());
}
 
Example #13
Source Project: lucene-solr   Author: apache   File: KNearestNeighborClassifierTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This test is for the scenario where in the first topK results from the MLT query, we have less results
 * for the expected class than the results for the bad class.
 * But the results for the expected class have a better score in comparison with the results of the second class.
 * So we would expect a greater score for the best ranked class.
 *
 * @throws Exception if any error happens
 */
@Test
public void testUnbalancedClasses() throws Exception {
  LeafReader leafReader = null;
  try {
    Analyzer analyzer = new EnglishAnalyzer();
    leafReader = getSampleIndex(analyzer);
    KNearestNeighborClassifier knnClassifier = new KNearestNeighborClassifier(leafReader, null,analyzer, null, 3, 1, 1, categoryFieldName, textFieldName);
    List<ClassificationResult<BytesRef>> classes = knnClassifier.getClasses(SUPER_STRONG_TECHNOLOGY_INPUT);
    assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
    checkCorrectClassification(knnClassifier, SUPER_STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
  } finally {
    if (leafReader != null) {
      leafReader.close();
    }
  }
}
 
Example #14
Source Project: lucene-solr   Author: apache   File: TestQueryBitSetProducer.java    License: Apache License 2.0 6 votes vote down vote up
public DummyDirectoryReader(DirectoryReader in) throws IOException {
  super(in, new SubReaderWrapper() {
    @Override
    public LeafReader wrap(LeafReader reader) {
      return new FilterLeafReader(reader) {

        @Override
        public CacheHelper getCoreCacheHelper() {
          return null;
        }

        @Override
        public CacheHelper getReaderCacheHelper() {
          return null;
        }};
    }
  });
}
 
Example #15
Source Project: HongsCORE   Author: ihongs   File: StatisHelper.java    License: MIT License 6 votes vote down vote up
@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
    LeafReader reader = lrc.reader( );

    for (int i = 0; i < fields.length; i ++) {
        if (groups[i][0] >= 1) {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
        } else {
            values[i] = reader.      getNumericDocValues("#"+fields[i]);
        }
        } else {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedSetDocValues("%"+fields[i]);
        } else {
            values[i] = reader.   getSortedDocValues("#"+fields[i]);
        }
        }
    }

    return this;
}
 
Example #16
Source Project: lucene-solr   Author: apache   File: LukeRequestHandler.java    License: Apache License 2.0 6 votes vote down vote up
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
  PostingsEnum postingsEnum = null;
  TermsEnum termsEnum = terms.iterator();
  BytesRef text;
  // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
  for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) {
    text = termsEnum.next();
    if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
      return null;
    }
    postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
    final Bits liveDocs = reader.getLiveDocs();
    if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (liveDocs != null && liveDocs.get(postingsEnum.docID())) {
        continue;
      }
      return reader.document(postingsEnum.docID());
    }
  }
  return null;
}
 
Example #17
Source Project: crate   Author: crate   File: IndexSearcherWrapper.java    License: Apache License 2.0 5 votes vote down vote up
private NonClosingReaderWrapper(DirectoryReader in) throws IOException {
    super(in, new SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
            return reader;
        }
    });
}
 
Example #18
Source Project: crate   Author: crate   File: LuceneChangesSnapshot.java    License: Apache License 2.0 5 votes vote down vote up
private boolean assertDocSoftDeleted(LeafReader leafReader, int segmentDocId) throws IOException {
    final NumericDocValues ndv = leafReader.getNumericDocValues(Lucene.SOFT_DELETES_FIELD);
    if (ndv == null || ndv.advanceExact(segmentDocId) == false) {
        throw new IllegalStateException("DocValues for field [" + Lucene.SOFT_DELETES_FIELD + "] is not found");
    }
    return ndv.longValue() == 1;
}
 
Example #19
Source Project: lucene-solr   Author: apache   File: MultiNormsLeafSimScorer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Sole constructor: Score documents of {@code reader} with {@code scorer}.
 *
 */
MultiNormsLeafSimScorer(SimScorer scorer, LeafReader reader, Collection<FieldAndWeight> normFields, boolean needsScores) throws IOException {
  this.scorer = Objects.requireNonNull(scorer);
  if (needsScores) {
    final List<NumericDocValues> normsList = new ArrayList<>();
    final List<Float> weightList = new ArrayList<>();
    for (FieldAndWeight field : normFields) {
      NumericDocValues norms = reader.getNormValues(field.field);
      if (norms != null) {
        normsList.add(norms);
        weightList.add(field.weight);
      }
    }
    if (normsList.isEmpty()) {
      norms = null;
    } else if (normsList.size() == 1) {
      norms = normsList.get(0);
    } else {
      final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
      final float[] weightArr = new float[normsList.size()];
      for (int i = 0; i < weightList.size(); i++) {
        weightArr[i] = weightList.get(i);
      }
      norms = new MultiFieldNormValues(normsArr, weightArr);
    }
  } else {
    norms = null;
  }
}
 
Example #20
Source Project: lucene-solr   Author: apache   File: DataSplitterTest.java    License: Apache License 2.0 5 votes vote down vote up
public static void assertSplit(LeafReader originalIndex, double testRatio, double crossValidationRatio, String... fieldNames) throws Exception {

    BaseDirectoryWrapper trainingIndex = newDirectory();
    BaseDirectoryWrapper testIndex = newDirectory();
    BaseDirectoryWrapper crossValidationIndex = newDirectory();

    try {
      DatasetSplitter datasetSplitter = new DatasetSplitter(testRatio, crossValidationRatio);
      datasetSplitter.split(originalIndex, trainingIndex, testIndex, crossValidationIndex, new MockAnalyzer(random()), true, classFieldName, fieldNames);

      assertNotNull(trainingIndex);
      assertNotNull(testIndex);
      assertNotNull(crossValidationIndex);

      DirectoryReader trainingReader = DirectoryReader.open(trainingIndex);
      assertEquals((int) (originalIndex.maxDoc() * (1d - testRatio - crossValidationRatio)), trainingReader.maxDoc(), 20);
      DirectoryReader testReader = DirectoryReader.open(testIndex);
      assertEquals((int) (originalIndex.maxDoc() * testRatio), testReader.maxDoc(), 20);
      DirectoryReader cvReader = DirectoryReader.open(crossValidationIndex);
      assertEquals((int) (originalIndex.maxDoc() * crossValidationRatio), cvReader.maxDoc(), 20);

      trainingReader.close();
      testReader.close();
      cvReader.close();
      closeQuietly(trainingReader);
      closeQuietly(testReader);
      closeQuietly(cvReader);
    } finally {
      if (trainingIndex != null) {
        trainingIndex.close();
      }
      if (testIndex != null) {
        testIndex.close();
      }
      if (crossValidationIndex != null) {
        crossValidationIndex.close();
      }
    }
  }
 
Example #21
Source Project: crate   Author: crate   File: ShardUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tries to extract the shard id from a reader if possible, when its not possible,
 * will return null.
 */
@Nullable
public static ShardId extractShardId(LeafReader reader) {
    final ElasticsearchLeafReader esReader = ElasticsearchLeafReader.getElasticsearchLeafReader(reader);
    if (esReader != null) {
        assert reader.getRefCount() > 0 : "ElasticsearchLeafReader is already closed";
        return esReader.shardId();
    }
    return null;
}
 
Example #22
Source Project: lucene-solr   Author: apache   File: SimpleNaiveBayesClassifierTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBasicUsage() throws Exception {
  LeafReader leafReader = null;
  try {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    leafReader = getSampleIndex(analyzer);
    SimpleNaiveBayesClassifier classifier = new SimpleNaiveBayesClassifier(leafReader, analyzer, null, categoryFieldName, textFieldName);
    checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    checkCorrectClassification(classifier, POLITICS_INPUT, POLITICS_RESULT);
  } finally {
    if (leafReader != null) {
      leafReader.close();
    }
  }
}
 
Example #23
Source Project: Elasticsearch   Author: baidu   File: IndexSearcherWrappingService.java    License: Apache License 2.0 5 votes vote down vote up
private NonClosingReaderWrapper(DirectoryReader in) throws IOException {
    super(in, new SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
            return reader;
        }
    });
}
 
Example #24
Source Project: Elasticsearch   Author: baidu   File: Engine.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tries to extract a segment reader from the given index reader.
 * If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
 */
protected static SegmentReader segmentReader(LeafReader reader) {
    if (reader instanceof SegmentReader) {
        return (SegmentReader) reader;
    } else if (reader instanceof FilterLeafReader) {
        final FilterLeafReader fReader = (FilterLeafReader) reader;
        return segmentReader(FilterLeafReader.unwrap(fReader));
    }
    // hard fail - we can't get a SegmentReader
    throw new IllegalStateException("Can not extract segment reader from given index reader [" + reader + "]");
}
 
Example #25
Source Project: lucene-solr   Author: apache   File: ShapeQuery.java    License: Apache License 2.0 5 votes vote down vote up
private Scorer getContainsDenseScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException {
  final FixedBitSet result = new FixedBitSet(reader.maxDoc());
  final long[] cost = new long[]{0};
  // Get potential  documents.
  final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
  values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
  result.andNot(excluded);
  final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
  return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}
 
Example #26
Source Project: lucene-solr   Author: apache   File: Monitor.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Match a DocumentBatch against the queries stored in the Monitor, also returning information
 * about which queries were selected by the presearcher, and why.
 *
 * @param docs    a DocumentBatch to match against the index
 * @param factory a {@link MatcherFactory} to use to create a {@link CandidateMatcher} for the match run
 * @param <T>     the type of QueryMatch produced by the CandidateMatcher
 * @return a {@link PresearcherMatches} object containing debug information
 * @throws IOException on IO errors
 */
public <T extends QueryMatch> PresearcherMatches<T> debug(Document[] docs, MatcherFactory<T> factory)
    throws IOException {
  try (DocumentBatch batch = DocumentBatch.of(analyzer, docs)) {
    LeafReader reader = batch.get();
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null);
    PresearcherQueryCollector<T> collector = new PresearcherQueryCollector<>(factory.createMatcher(searcher));
    long buildTime = queryIndex.search(t -> new ForceNoBulkScoringQuery(presearcher.buildQuery(reader, t)), collector);
    return collector.getMatches(buildTime);
  }
}
 
Example #27
Source Project: lucene-solr   Author: apache   File: TestLucene80DocValuesFormat.java    License: Apache License 2.0 5 votes vote down vote up
private void assertDVAdvance(Directory dir, int jumpStep) throws IOException {
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();


    for (int jump = jumpStep; jump < r.maxDoc(); jump += jumpStep) {
      // Create a new instance each time to ensure jumps from the beginning
      NumericDocValues docValues = DocValues.getNumeric(r, "dv");
      for (int docID = 0; docID < r.maxDoc(); docID += jump) {
        String base = "document #" + docID + "/" + r.maxDoc() + ", jumping " + jump + " from #" + (docID-jump);
        String storedValue = r.document(docID).get("stored");
        if (storedValue == null) {
          assertFalse("There should be no DocValue for " + base,
              docValues.advanceExact(docID));
        } else {
          assertTrue("There should be a DocValue for " + base,
              docValues.advanceExact(docID));
          assertEquals("The doc value should be correct for " + base,
              Long.parseLong(storedValue), docValues.longValue());
        }
      }
    }
  }
  ir.close();
}
 
Example #28
Source Project: SearchServices   Author: Alfresco   File: AlfrescoLukeRequestHandler.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
/** Returns the sum of RAM bytes used by each segment */
private static long getIndexHeapUsed(DirectoryReader reader) {
	long indexHeapRamBytesUsed = 0;
	for (LeafReaderContext leafReaderContext : reader.leaves()) {
		LeafReader leafReader = leafReaderContext.reader();
		if (leafReader instanceof SegmentReader) {
			indexHeapRamBytesUsed += ((SegmentReader) leafReader)
					.ramBytesUsed();
		} else {
			// Not supported for any reader that is not a SegmentReader
			return -1;
		}
	}
	return indexHeapRamBytesUsed;
}
 
Example #29
Source Project: rdf4j   Author: eclipse   File: LuceneIndex.java    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each
 * document represent a set of statements with the specified Resource as a subject, which are stored in a specific
 * context
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {
	List<Document> result = new ArrayList<>();

	IndexReader reader = getIndexReader();
	List<LeafReaderContext> leaves = reader.leaves();
	int size = leaves.size();
	for (int i = 0; i < size; i++) {
		LeafReader lreader = leaves.get(i).reader();
		addDocuments(lreader, uriTerm, result);
	}

	return result;
}
 
Example #30
Source Project: rdf4j   Author: eclipse   File: LuceneIndex.java    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void addDocuments(LeafReader reader, Term term, Collection<Document> documents) throws IOException {
	PostingsEnum docs = reader.postings(term);
	if (docs != null) {
		int docId;
		while ((docId = docs.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
			Bits liveDocs = reader.getLiveDocs();
			// Maybe some of the docs have been deleted! Check that too..
			if (liveDocs != null && !liveDocs.get(docId)) {
				continue;
			}
			Document document = readDocument(reader, docId, null);
			documents.add(document);
		}
	}
}