Java Code Examples for org.apache.lucene.search.IndexSearcher#getIndexReader()

The following examples show how to use org.apache.lucene.search.IndexSearcher#getIndexReader() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
private void gatherIndexInfo(Map<String, Object> map) {
    SortedMap<String, IndexStatistics> indexStats = new TreeMap<>();
    for (IndexType indexType : IndexType.values()) {
        IndexStatistics stat = new IndexStatistics();
        IndexSearcher searcher = indexManager.getSearcher(indexType);
        stat.setName(indexType.name());
        indexStats.put(indexType.name(), stat);
        if (searcher != null) {
            IndexReader reader = searcher.getIndexReader();
            stat.setCount(reader.numDocs());
            stat.setDeletedCount(reader.numDeletedDocs());
            indexManager.release(indexType, searcher);
        } else {
            stat.setCount(0);
            stat.setDeletedCount(0);
        }
    }
    map.put("indexStatistics", indexStats);

    try (Analyzer analyzer = analyzerFactory.getAnalyzer()) {
        map.put("indexLuceneVersion", analyzer.getVersion().toString());
    } catch (IOException e) {
        LOG.debug("Unable to gather information", e);
    }
}
 
Example 2
Source Project: Elasticsearch   File: TermSuggester.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
 
Example 3
Source Project: lucene-solr   File: HighlighterTest.java    License: Apache License 2.0 6 votes vote down vote up
private void searchIndex() throws IOException, InvalidTokenOffsetsException {
  Query query = new TermQuery(new Term("t_text1", "random"));
  IndexReader reader = DirectoryReader.open(dir1);
  IndexSearcher searcher = newSearcher(reader);
  // This scorer can return negative idf -> null fragment
  Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
  // This scorer doesn't use idf (patch version)
  //Scorer scorer = new QueryTermScorer( query, "t_text1" );
  Highlighter h = new Highlighter( scorer );

  TopDocs hits = searcher.search(query, 10);
  for( int i = 0; i < hits.totalHits.value; i++ ){
    Document doc = searcher.doc( hits.scoreDocs[i].doc );
    String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
    if (VERBOSE) System.out.println("result:" +  result);
    assertEquals("more <B>random</B> words for second field", result);
  }
  reader.close();
}
 
Example 4
Source Project: lucene-solr   File: ReadTask.java    License: Apache License 2.0 6 votes vote down vote up
protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  int res = 0;
  if (withTraverse()) {
    final ScoreDoc[] scoreDocs = hits.scoreDocs;
    int traversalSize = Math.min(scoreDocs.length, traversalSize());

    if (traversalSize > 0) {
      boolean retrieve = withRetrieve();
      for (int m = 0; m < traversalSize; m++) {
        int id = scoreDocs[m].doc;
        res++;
        if (retrieve) {
          Document document = retrieveDoc(reader, id);
          res += document != null ? 1 : 0;
        }
      }
    }
  }
  return res;
}
 
Example 5
@Override
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  highlighter.setFragmentScorer(new QueryScorer(q));
  // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    Document document = reader.document(scoreDoc.doc, hlFields);
    Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
    for (IndexableField indexableField : document) {
      TokenStream tokenStream;
      if (termVecs) {
        tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
            indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
      } else {
        tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
      }
      // will close TokenStream:
      String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
      preventOptimizeAway = fragments.length;
    }
  }
}
 
Example 6
private static Facets getAllFacets(String indexFieldName, IndexSearcher searcher, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
  if (random().nextBoolean()) {
    // Aggregate the facet counts:
    FacetsCollector c = new FacetsCollector();

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);

    return new FastTaxonomyFacetCounts(taxoReader, config, c);
  } else {
    return new FastTaxonomyFacetCounts(indexFieldName, searcher.getIndexReader(), taxoReader, config);
  }
}
 
Example 7
Source Project: mtas   File: CodecCollector.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Collect spans for occurences.
 *
 * @param occurences
 *          the occurences
 * @param prefixes
 *          the prefixes
 * @param field
 *          the field
 * @param searcher
 *          the searcher
 * @param lrc
 *          the lrc
 * @return the map
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static Map<GroupHit, Spans> collectSpansForOccurences(
    Set<GroupHit> occurences, Set<String> prefixes, String field,
    IndexSearcher searcher, LeafReaderContext lrc) throws IOException {
  Map<GroupHit, Spans> list = new HashMap<>();
  IndexReader reader = searcher.getIndexReader();
  final float boost = 0;
  for (GroupHit hit : occurences) {
    MtasSpanQuery queryHit = createQueryFromGroupHit(prefixes, field, hit);
    if (queryHit != null) {
      MtasSpanQuery queryHitRewritten = queryHit.rewrite(reader);
      SpanWeight weight = queryHitRewritten.createWeight(searcher, false,
          boost);
      Spans spans = weight.getSpans(lrc, SpanWeight.Postings.POSITIONS);
      if (spans != null) {
        list.put(hit, spans);
      }
    }
  }
  return list;
}
 
Example 8
/**
 * Return the MediaLibraryStatistics saved on commit in the index. Ensures that each index reports the same data.
 * On invalid indices, returns null.
 */
public @Nullable MediaLibraryStatistics getStatistics() {
    MediaLibraryStatistics stats = null;
    for (IndexType indexType : IndexType.values()) {
        IndexSearcher searcher = getSearcher(indexType);
        if (searcher == null) {
            LOG.trace("No index for type " + indexType);
            return null;
        }
        IndexReader indexReader = searcher.getIndexReader();
        if (!(indexReader instanceof DirectoryReader)) {
            LOG.warn("Unexpected index type " + indexReader.getClass());
            return null;
        }
        try {
            Map<String, String> userData = ((DirectoryReader) indexReader).getIndexCommit().getUserData();
            MediaLibraryStatistics currentStats = Util.stringMapToValidObject(MediaLibraryStatistics.class,
                    userData);
            if (stats == null) {
                stats = currentStats;
            } else {
                if (!Objects.equals(stats, currentStats)) {
                    LOG.warn("Index type " + indexType + " had differing stats data");
                    return null;
                }
            }
        } catch (IOException | IllegalArgumentException e) {
            LOG.debug("Exception encountered while fetching index commit data", e);
            return null;
        }
    }
    return stats;
}
 
Example 9
Source Project: Elasticsearch   File: ValuesSource.java    License: Apache License 2.0 5 votes vote down vote up
public long globalMaxOrd(IndexSearcher indexSearcher) {
    IndexReader indexReader = indexSearcher.getIndexReader();
    if (indexReader.leaves().isEmpty()) {
        return 0;
    } else {
        LeafReaderContext atomicReaderContext = indexReader.leaves().get(0);
        RandomAccessOrds values = globalOrdinalsValues(atomicReaderContext);
        return values.getValueCount();
    }
}
 
Example 10
Source Project: Elasticsearch   File: ValuesSource.java    License: Apache License 2.0 5 votes vote down vote up
public long globalMaxOrd(IndexSearcher indexSearcher, String type) {
    DirectoryReader indexReader = (DirectoryReader) indexSearcher.getIndexReader();
    if (indexReader.leaves().isEmpty()) {
        return 0;
    } else {
        LeafReaderContext atomicReaderContext = indexReader.leaves().get(0);
        IndexParentChildFieldData globalFieldData = indexFieldData.loadGlobal(indexReader);
        AtomicParentChildFieldData afd = globalFieldData.load(atomicReaderContext);
        SortedDocValues values = afd.getOrdinalsValues(type);
        return values.getValueCount();
    }
}
 
Example 11
@Override
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  final FieldQuery fq = highlighter.getFieldQuery( q, reader);
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    for (String hlField : hlFields) {
      String[] fragments = highlighter.getBestFragments(fq, reader, scoreDoc.doc, hlField, fragSize, maxFrags,
          fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
      preventOptimizeAway = fragments.length;
    }
  }
}
 
Example 12
Source Project: lucene-solr   File: TestMemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
public void testSeekByTermOrd() throws IOException {
  MemoryIndex mi = new MemoryIndex();
  mi.addField("field", "some terms be here", analyzer);
  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  TermsEnum terms = reader.terms("field").iterator();
  terms.seekExact(0);
  assertEquals("be", terms.term().utf8ToString());
  TestUtil.checkReader(reader);
}
 
Example 13
Source Project: lucene-solr   File: TestMemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
public void testFieldsOnlyReturnsIndexedFields() throws IOException {
  Document doc = new Document();

  doc.add(new NumericDocValuesField("numeric", 29L));
  doc.add(new TextField("text", "some text", Field.Store.NO));

  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  IndexSearcher searcher = mi.createSearcher();
  IndexReader reader = searcher.getIndexReader();

  assertEquals(reader.getTermVectors(0).size(), 1);
}
 
Example 14
Source Project: lucene-solr   File: TestMemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSimilarities() throws IOException {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "a long text field that contains many many terms", analyzer);

  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  NumericDocValues norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n1 = norms.longValue();

  // Norms are re-computed when we change the Similarity
  mi.setSimilarity(new Similarity() {

    @Override
    public long computeNorm(FieldInvertState state) {
      return 74;
    }

    @Override
    public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
      throw new UnsupportedOperationException();
    }

  });
  norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n2 = norms.longValue();

  assertTrue(n1 != n2);
  TestUtil.checkReader(reader);
}
 
Example 15
/**
 * Note: if you use a counting {@link Facets} implementation, you can amortize the
 * sampled counts by calling this method. Uses the {@link FacetsConfig} and
 * the {@link IndexSearcher} to determine the upper bound for each facet value.
 */
public FacetResult amortizeFacetCounts(FacetResult res, FacetsConfig config, IndexSearcher searcher) throws IOException {
  if (res == null || totalHits <= sampleSize) {
    return res;
  }
  
  LabelAndValue[] fixedLabelValues = new LabelAndValue[res.labelValues.length];
  IndexReader reader = searcher.getIndexReader();
  DimConfig dimConfig = config.getDimConfig(res.dim);
  
  // +2 to prepend dimension, append child label
  String[] childPath = new String[res.path.length + 2];
  childPath[0] = res.dim;
  
  System.arraycopy(res.path, 0, childPath, 1, res.path.length); // reuse
  
  for (int i = 0; i < res.labelValues.length; i++) {
    childPath[res.path.length + 1] = res.labelValues[i].label;
    String fullPath = FacetsConfig.pathToString(childPath, childPath.length);
    int max = reader.docFreq(new Term(dimConfig.indexFieldName, fullPath));
    int correctedCount = (int) (res.labelValues[i].value.doubleValue() / samplingRate);
    correctedCount = Math.min(max, correctedCount);
    fixedLabelValues[i] = new LabelAndValue(res.labelValues[i].label, correctedCount);
  }
  
  // cap the total count on the total number of non-deleted documents in the reader
  int correctedTotalCount = res.value.intValue();
  if (correctedTotalCount > 0) {
    correctedTotalCount = Math.min(reader.numDocs(), (int) (res.value.doubleValue() / samplingRate));
  }
  
  return new FacetResult(res.dim, res.path, correctedTotalCount, fixedLabelValues, res.childCount);
}
 
Example 16
private void checkTerms(IndexSearcher searcher, String fieldName) throws IOException {
  IndexReader reader = searcher.getIndexReader();
  for (AtomicReaderContext context : reader.leaves()) {
    AtomicReader atomicReader = context.reader();
    Fields fields = atomicReader.fields();
    Terms terms = fields.terms(fieldName);
    TermsEnum iterator = terms.iterator(null);
    BytesRef bytesRef = iterator.next();
    if (bytesRef != null) {
      System.out.println(bytesRef.utf8ToString());
      fail("There are only restricted terms for this field [" + fieldName + "]");
    }
  }
}
 
Example 17
Source Project: crate   File: LuceneOrderedDocCollector.java    License: Apache License 2.0 5 votes vote down vote up
public LuceneOrderedDocCollector(ShardId shardId,
                                 IndexSearcher searcher,
                                 Query query,
                                 Float minScore,
                                 boolean doDocsScores,
                                 int batchSize,
                                 RamAccounting ramAccounting,
                                 CollectorContext collectorContext,
                                 Function<FieldDoc, Query> searchAfterQueryOptimize,
                                 Sort sort,
                                 List<? extends Input<?>> inputs,
                                 Collection<? extends LuceneCollectorExpression<?>> expressions) {
    super(shardId);
    this.searcher = searcher;
    this.query = query;
    this.minScore = minScore;
    this.doDocsScores = doDocsScores;
    this.ramAccounting = ramAccounting;
    // We don't want to pre-allocate for more records than what can possible be returned
    // (+1) to make sure `exhausted` is set to `true` if all records match on the first `collect` call.
    this.batchSize = Math.min(batchSize, searcher.getIndexReader().numDocs() + 1);
    this.collectorContext = collectorContext;
    this.searchAfterQueryOptimize = searchAfterQueryOptimize;
    this.sort = sort;
    this.scorer = new DummyScorer();
    this.expressions = expressions;
    this.rowFunction = new ScoreDocRowFunction(
        searcher.getIndexReader(),
        inputs,
        expressions,
        scorer,
        this::raiseIfKilled
    );
}
 
Example 18
Source Project: mtas   File: CodecUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Collect field.
 *
 * @param field
 *          the field
 * @param searcher
 *          the searcher
 * @param rawReader
 *          the raw reader
 * @param fullDocList
 *          the full doc list
 * @param fullDocSet
 *          the full doc set
 * @param fieldStats
 *          the field stats
 * @throws IllegalAccessException
 *           the illegal access exception
 * @throws IllegalArgumentException
 *           the illegal argument exception
 * @throws InvocationTargetException
 *           the invocation target exception
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public static void collectField(String field, IndexSearcher searcher,
    IndexReader rawReader, ArrayList<Integer> fullDocList,
    ArrayList<Integer> fullDocSet, ComponentField fieldStats, Status status)
    throws IllegalAccessException, IllegalArgumentException,
    InvocationTargetException, IOException {
  if (fieldStats != null) {
    IndexReader reader = searcher.getIndexReader();
    HashMap<MtasSpanQuery, SpanWeight> spansQueryWeight = new HashMap<>();
    // only if spanQueryList is not empty
    if (fieldStats.spanQueryList.size() > 0) {
      final float boost = 0;
      for (MtasSpanQuery sq : fieldStats.spanQueryList) {
        spansQueryWeight.put(sq, ((MtasSpanQuery) sq.rewrite(reader))
            .createWeight(searcher, false, boost));
      }
    }
    // collect
    CodecCollector.collectField(field, searcher, reader, rawReader,
        fullDocList, fullDocSet, fieldStats, spansQueryWeight, status);
  }
}