Example 1
Source File:    From mtas with Apache License 2.0 6 votes vote down vote up
 * Collect spans for occurences.
 * @param occurences
 *          the occurences
 * @param prefixes
 *          the prefixes
 * @param field
 *          the field
 * @param searcher
 *          the searcher
 * @param lrc
 *          the lrc
 * @return the map
 * @throws IOException
 *           Signals that an I/O exception has occurred.
private static Map<GroupHit, Spans> collectSpansForOccurences(
    Set<GroupHit> occurences, Set<String> prefixes, String field,
    IndexSearcher searcher, LeafReaderContext lrc) throws IOException {
  Map<GroupHit, Spans> list = new HashMap<>();
  IndexReader reader = searcher.getIndexReader();
  final float boost = 0;
  for (GroupHit hit : occurences) {
    MtasSpanQuery queryHit = createQueryFromGroupHit(prefixes, field, hit);
    if (queryHit != null) {
      MtasSpanQuery queryHitRewritten = queryHit.rewrite(reader);
      SpanWeight weight = queryHitRewritten.createWeight(searcher, false,
      Spans spans = weight.getSpans(lrc, SpanWeight.Postings.POSITIONS);
      if (spans != null) {
        list.put(hit, spans);
  return list;
Example 2
Source File:    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
    return response;
Example 3
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void searchIndex() throws IOException, InvalidTokenOffsetsException {
  Query query = new TermQuery(new Term("t_text1", "random"));
  IndexReader reader =;
  IndexSearcher searcher = newSearcher(reader);
  // This scorer can return negative idf -> null fragment
  Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
  // This scorer doesn't use idf (patch version)
  //Scorer scorer = new QueryTermScorer( query, "t_text1" );
  Highlighter h = new Highlighter( scorer );

  TopDocs hits =, 10);
  for( int i = 0; i < hits.totalHits.value; i++ ){
    Document doc = searcher.doc( hits.scoreDocs[i].doc );
    String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
    if (VERBOSE) System.out.println("result:" +  result);
    assertEquals("more <B>random</B> words for second field", result);
Example 4
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  int res = 0;
  if (withTraverse()) {
    final ScoreDoc[] scoreDocs = hits.scoreDocs;
    int traversalSize = Math.min(scoreDocs.length, traversalSize());

    if (traversalSize > 0) {
      boolean retrieve = withRetrieve();
      for (int m = 0; m < traversalSize; m++) {
        int id = scoreDocs[m].doc;
        if (retrieve) {
          Document document = retrieveDoc(reader, id);
          res += document != null ? 1 : 0;
  return res;
Example 5
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  highlighter.setFragmentScorer(new QueryScorer(q));
  // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    Document document = reader.document(scoreDoc.doc, hlFields);
    Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
    for (IndexableField indexableField : document) {
      TokenStream tokenStream;
      if (termVecs) {
        tokenStream = TokenSources.getTokenStream(, tvFields,
            indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
      } else {
        tokenStream = analyzer.tokenStream(, indexableField.stringValue());
      // will close TokenStream:
      String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
      preventOptimizeAway = fragments.length;
Example 6
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static Facets getAllFacets(String indexFieldName, IndexSearcher searcher, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
  if (random().nextBoolean()) {
    // Aggregate the facet counts:
    FacetsCollector c = new FacetsCollector();

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets: MatchAllDocsQuery(), c);

    return new FastTaxonomyFacetCounts(taxoReader, config, c);
  } else {
    return new FastTaxonomyFacetCounts(indexFieldName, searcher.getIndexReader(), taxoReader, config);
Example 7
Source File:    From airsonic with GNU General Public License v3.0 6 votes vote down vote up
private void gatherIndexInfo(Map<String, Object> map) {
    SortedMap<String, IndexStatistics> indexStats = new TreeMap<>();
    for (IndexType indexType : IndexType.values()) {
        IndexStatistics stat = new IndexStatistics();
        IndexSearcher searcher = indexManager.getSearcher(indexType);
        indexStats.put(, stat);
        if (searcher != null) {
            IndexReader reader = searcher.getIndexReader();
            indexManager.release(indexType, searcher);
        } else {
    map.put("indexStatistics", indexStats);

    try (Analyzer analyzer = analyzerFactory.getAnalyzer()) {
        map.put("indexLuceneVersion", analyzer.getVersion().toString());
    } catch (IOException e) {
        LOG.debug("Unable to gather information", e);
Example 8
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSeekByTermOrd() throws IOException {
  MemoryIndex mi = new MemoryIndex();
  mi.addField("field", "some terms be here", analyzer);
  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  TermsEnum terms = reader.terms("field").iterator();
  assertEquals("be", terms.term().utf8ToString());
Example 9
Source File:    From crate with Apache License 2.0 5 votes vote down vote up
public LuceneOrderedDocCollector(ShardId shardId,
                                 IndexSearcher searcher,
                                 Query query,
                                 Float minScore,
                                 boolean doDocsScores,
                                 int batchSize,
                                 RamAccounting ramAccounting,
                                 CollectorContext collectorContext,
                                 Function<FieldDoc, Query> searchAfterQueryOptimize,
                                 Sort sort,
                                 List<? extends Input<?>> inputs,
                                 Collection<? extends LuceneCollectorExpression<?>> expressions) {
    this.searcher = searcher;
    this.query = query;
    this.minScore = minScore;
    this.doDocsScores = doDocsScores;
    this.ramAccounting = ramAccounting;
    // We don't want to pre-allocate for more records than what can possible be returned
    // (+1) to make sure `exhausted` is set to `true` if all records match on the first `collect` call.
    this.batchSize = Math.min(batchSize, searcher.getIndexReader().numDocs() + 1);
    this.collectorContext = collectorContext;
    this.searchAfterQueryOptimize = searchAfterQueryOptimize;
    this.sort = sort;
    this.scorer = new DummyScorer();
    this.expressions = expressions;
    this.rowFunction = new ScoreDocRowFunction(
Example 10
Source File:    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void checkTerms(IndexSearcher searcher, String fieldName) throws IOException {
  IndexReader reader = searcher.getIndexReader();
  for (AtomicReaderContext context : reader.leaves()) {
    AtomicReader atomicReader = context.reader();
    Fields fields = atomicReader.fields();
    Terms terms = fields.terms(fieldName);
    TermsEnum iterator = terms.iterator(null);
    BytesRef bytesRef =;
    if (bytesRef != null) {
      fail("There are only restricted terms for this field [" + fieldName + "]");
Example 11
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
 * Note: if you use a counting {@link Facets} implementation, you can amortize the
 * sampled counts by calling this method. Uses the {@link FacetsConfig} and
 * the {@link IndexSearcher} to determine the upper bound for each facet value.
public FacetResult amortizeFacetCounts(FacetResult res, FacetsConfig config, IndexSearcher searcher) throws IOException {
  if (res == null || totalHits <= sampleSize) {
    return res;
  LabelAndValue[] fixedLabelValues = new LabelAndValue[res.labelValues.length];
  IndexReader reader = searcher.getIndexReader();
  DimConfig dimConfig = config.getDimConfig(res.dim);
  // +2 to prepend dimension, append child label
  String[] childPath = new String[res.path.length + 2];
  childPath[0] = res.dim;
  System.arraycopy(res.path, 0, childPath, 1, res.path.length); // reuse
  for (int i = 0; i < res.labelValues.length; i++) {
    childPath[res.path.length + 1] = res.labelValues[i].label;
    String fullPath = FacetsConfig.pathToString(childPath, childPath.length);
    int max = reader.docFreq(new Term(dimConfig.indexFieldName, fullPath));
    int correctedCount = (int) (res.labelValues[i].value.doubleValue() / samplingRate);
    correctedCount = Math.min(max, correctedCount);
    fixedLabelValues[i] = new LabelAndValue(res.labelValues[i].label, correctedCount);
  // cap the total count on the total number of non-deleted documents in the reader
  int correctedTotalCount = res.value.intValue();
  if (correctedTotalCount > 0) {
    correctedTotalCount = Math.min(reader.numDocs(), (int) (res.value.doubleValue() / samplingRate));
  return new FacetResult(res.dim, res.path, correctedTotalCount, fixedLabelValues, res.childCount);
Example 12
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSimilarities() throws IOException {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "a long text field that contains many many terms", analyzer);

  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  NumericDocValues norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n1 = norms.longValue();

  // Norms are re-computed when we change the Similarity
  mi.setSimilarity(new Similarity() {

    public long computeNorm(FieldInvertState state) {
      return 74;

    public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
      throw new UnsupportedOperationException();

  norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n2 = norms.longValue();

  assertTrue(n1 != n2);
Example 13
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFieldsOnlyReturnsIndexedFields() throws IOException {
  Document doc = new Document();

  doc.add(new NumericDocValuesField("numeric", 29L));
  doc.add(new TextField("text", "some text", Field.Store.NO));

  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  IndexSearcher searcher = mi.createSearcher();
  IndexReader reader = searcher.getIndexReader();

  assertEquals(reader.getTermVectors(0).size(), 1);
Example 14
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  final FieldQuery fq = highlighter.getFieldQuery( q, reader);
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    for (String hlField : hlFields) {
      String[] fragments = highlighter.getBestFragments(fq, reader, scoreDoc.doc, hlField, fragSize, maxFrags,
          fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
      preventOptimizeAway = fragments.length;
Example 15
Source File:    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public long globalMaxOrd(IndexSearcher indexSearcher, String type) {
    DirectoryReader indexReader = (DirectoryReader) indexSearcher.getIndexReader();
    if (indexReader.leaves().isEmpty()) {
        return 0;
    } else {
        LeafReaderContext atomicReaderContext = indexReader.leaves().get(0);
        IndexParentChildFieldData globalFieldData = indexFieldData.loadGlobal(indexReader);
        AtomicParentChildFieldData afd = globalFieldData.load(atomicReaderContext);
        SortedDocValues values = afd.getOrdinalsValues(type);
        return values.getValueCount();
Example 16
Source File:    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public long globalMaxOrd(IndexSearcher indexSearcher) {
    IndexReader indexReader = indexSearcher.getIndexReader();
    if (indexReader.leaves().isEmpty()) {
        return 0;
    } else {
        LeafReaderContext atomicReaderContext = indexReader.leaves().get(0);
        RandomAccessOrds values = globalOrdinalsValues(atomicReaderContext);
        return values.getValueCount();
Example 17
Source File:    From airsonic with GNU General Public License v3.0 5 votes vote down vote up
 * Return the MediaLibraryStatistics saved on commit in the index. Ensures that each index reports the same data.
 * On invalid indices, returns null.
public @Nullable MediaLibraryStatistics getStatistics() {
    MediaLibraryStatistics stats = null;
    for (IndexType indexType : IndexType.values()) {
        IndexSearcher searcher = getSearcher(indexType);
        if (searcher == null) {
            LOG.trace("No index for type " + indexType);
            return null;
        IndexReader indexReader = searcher.getIndexReader();
        if (!(indexReader instanceof DirectoryReader)) {
            LOG.warn("Unexpected index type " + indexReader.getClass());
            return null;
        try {
            Map<String, String> userData = ((DirectoryReader) indexReader).getIndexCommit().getUserData();
            MediaLibraryStatistics currentStats = Util.stringMapToValidObject(MediaLibraryStatistics.class,
            if (stats == null) {
                stats = currentStats;
            } else {
                if (!Objects.equals(stats, currentStats)) {
                    LOG.warn("Index type " + indexType + " had differing stats data");
                    return null;
        } catch (IOException | IllegalArgumentException e) {
            LOG.debug("Exception encountered while fetching index commit data", e);
            return null;
    return stats;
Example 18
Source File:    From mtas with Apache License 2.0 4 votes vote down vote up
 * Collect field.
 * @param field
 *          the field
 * @param searcher
 *          the searcher
 * @param rawReader
 *          the raw reader
 * @param fullDocList
 *          the full doc list
 * @param fullDocSet
 *          the full doc set
 * @param fieldStats
 *          the field stats
 * @throws IllegalAccessException
 *           the illegal access exception
 * @throws IllegalArgumentException
 *           the illegal argument exception
 * @throws InvocationTargetException
 *           the invocation target exception
 * @throws IOException
 *           Signals that an I/O exception has occurred.
public static void collectField(String field, IndexSearcher searcher,
    IndexReader rawReader, ArrayList<Integer> fullDocList,
    ArrayList<Integer> fullDocSet, ComponentField fieldStats, Status status)
    throws IllegalAccessException, IllegalArgumentException,
    InvocationTargetException, IOException {
  if (fieldStats != null) {
    IndexReader reader = searcher.getIndexReader();
    HashMap<MtasSpanQuery, SpanWeight> spansQueryWeight = new HashMap<>();
    // only if spanQueryList is not empty
    if (fieldStats.spanQueryList.size() > 0) {
      final float boost = 0;
      for (MtasSpanQuery sq : fieldStats.spanQueryList) {
        spansQueryWeight.put(sq, ((MtasSpanQuery) sq.rewrite(reader))
            .createWeight(searcher, false, boost));
    // collect
    CodecCollector.collectField(field, searcher, reader, rawReader,
        fullDocList, fullDocSet, fieldStats, spansQueryWeight, status);