org.apache.lucene.index.IndexReader Java Examples

The following examples show how to use org.apache.lucene.index.IndexReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSpanNearQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testNoPositions() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(new StringField("foo", "bar", Field.Store.NO));
  iw.addDocument(doc);
  
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher is = new IndexSearcher(ir);
  SpanTermQuery query = new SpanTermQuery(new Term("foo", "bar"));
  SpanTermQuery query2 = new SpanTermQuery(new Term("foo", "baz"));

  IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
    is.search(new SpanNearQuery(new SpanQuery[] { query, query2 }, 10, true), 5);
  });
  assertTrue(expected.getMessage().contains("was indexed without position data"));

  ir.close();
  dir.close();
}
 
Example #2
Source File: TestFieldCacheSort.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Tests sorting a single document with scores */
public void testSortOneDocumentWithScores() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(newStringField("value", "foo", Field.Store.YES));
  writer.addDocument(doc);
  IndexReader ir = UninvertingReader.wrap(writer.getReader(),
                   Collections.singletonMap("value", Type.SORTED));
  writer.close();
  
  IndexSearcher searcher = newSearcher(ir);
  Sort sort = new Sort(new SortField("value", SortField.Type.STRING));

  TopDocs expected = searcher.search(new TermQuery(new Term("value", "foo")), 10);
  assertEquals(1, expected.totalHits.value);
  TopDocs actual = searcher.search(new TermQuery(new Term("value", "foo")), 10, sort, true);
  
  assertEquals(expected.totalHits.value, actual.totalHits.value);
  assertEquals(expected.scoreDocs[0].score, actual.scoreDocs[0].score, 0F);
  TestUtil.checkReader(ir);
  ir.close();
  dir.close();
}
 
Example #3
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized CacheEntry[] getCacheEntries() {
  List<CacheEntry> result = new ArrayList<>(17);
  for(final Map.Entry<Class<?>,Cache> cacheEntry: caches.entrySet()) {
    final Cache cache = cacheEntry.getValue();
    final Class<?> cacheType = cacheEntry.getKey();
    synchronized(cache.readerCache) {
      for (final Map.Entry<IndexReader.CacheKey,Map<CacheKey, Accountable>> readerCacheEntry : cache.readerCache.entrySet()) {
        final IndexReader.CacheKey readerKey = readerCacheEntry.getKey();
        if (readerKey == null) continue;
        final Map<CacheKey, Accountable> innerCache = readerCacheEntry.getValue();
        for (final Map.Entry<CacheKey, Accountable> mapEntry : innerCache.entrySet()) {
          CacheKey entry = mapEntry.getKey();
          result.add(new CacheEntry(readerKey, entry.field,
                                    cacheType, entry.custom,
                                    mapEntry.getValue()));
        }
      }
    }
  }
  return result.toArray(new CacheEntry[result.size()]);
}
 
Example #4
Source File: TestLegacyTerms.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIntFieldMinMax() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  int minValue = Integer.MAX_VALUE;
  int maxValue = Integer.MIN_VALUE;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    int num = random().nextInt();
    minValue = Math.min(num, minValue);
    maxValue = Math.max(num, maxValue);
    doc.add(new LegacyIntField("field", num, Field.Store.NO));
    w.addDocument(doc);
  }
  
  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  assertEquals(Integer.valueOf(minValue), LegacyNumericUtils.getMinInt(terms));
  assertEquals(Integer.valueOf(maxValue), LegacyNumericUtils.getMaxInt(terms));

  r.close();
  w.close();
  dir.close();
}
 
Example #5
Source File: TestMultiPhraseQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Ignore //LUCENE-3821 fixes sloppy phrase scoring, except for this known problem 
public void testMultiSloppyWithRepeats() throws IOException {
  Directory indexStore = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
  add("a b c d e f g h i k", writer);
  IndexReader r = writer.getReader();
  writer.close();
  
  IndexSearcher searcher = newSearcher(r);
  
  MultiPhraseQuery.Builder qb = new MultiPhraseQuery.Builder();
  // this will fail, when the scorer would propagate [a] rather than [a,b],
  qb.add(new Term[] {new Term("body", "a"), new Term("body", "b")});
  qb.add(new Term[] {new Term("body", "a")});
  qb.setSlop(6);
  assertEquals(1, searcher.count(qb.build())); // should match on "a b"
  
  r.close();
  indexStore.close();
}
 
Example #6
Source File: KNearestNeighborClassifier.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link KNearestNeighborClassifier}.
 *
 * @param indexReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link org.apache.lucene.search.similarities.BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param minDocsFreq    {@link MoreLikeThis#minDocFreq} parameter
 * @param minTermFreq    {@link MoreLikeThis#minTermFreq} parameter
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestNeighborClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq,
                                  int minTermFreq, String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.mlt = new MoreLikeThis(indexReader);
  this.mlt.setAnalyzer(analyzer);
  this.mlt.setFieldNames(textFieldNames);
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  if (minDocsFreq > 0) {
    mlt.setMinDocFreq(minDocsFreq);
  }
  if (minTermFreq > 0) {
    mlt.setMinTermFreq(minTermFreq);
  }
  this.query = query;
  this.k = k;
}
 
Example #7
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBasics() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.term("highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
  ir.close();
}
 
Example #8
Source File: LuceneHelper.java    From dexter with Apache License 2.0 6 votes vote down vote up
/**
 * Builds the TFIDF vector and its norm2
 * 
 * @param tfidf
 *            - the vector containing for each term its TFIDF score, it will
 *            be populated by this method
 * @param freq
 *            - the vector containing for each term its frequency
 * @param field
 *            - the field on which to compute the inverse document frequency
 * 
 * @return the norm of the TFIDF vector
 * 
 */
private double tfidfVector(Map<String, Double> tfidf,
		Map<String, Integer> freq, String field) {
	IndexReader reader = getReader();

	double norm = 0;
	for (Map.Entry<String, Integer> entry : freq.entrySet()) {
		Term t = new Term(field, entry.getKey());
		int df = 0;
		try {
			df = reader.docFreq(t);
		} catch (IOException e) {
			logger.error("computing tfidfVector ({}) ", e.toString());
			System.exit(-1);
		}
		double idf = Math.log(collectionSize / (double) df + 1)
				/ Math.log(2) + 1;
		double tfidfValue = entry.getValue() * idf;
		norm += tfidfValue * tfidfValue;
		tfidf.put(entry.getKey(), tfidfValue);
	}
	return Math.sqrt(norm);

}
 
Example #9
Source File: FuzzyLikeThisQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  if (ignoreTF) {
    return new ConstantScoreQuery(new TermQuery(term));
  } else {
    // we build an artificial TermStates that will give an overall df and ttf
    // equal to 1
    TermStates context = new TermStates(reader.getContext());
    for (LeafReaderContext leafContext : reader.leaves()) {
      Terms terms = leafContext.reader().terms(term.field());
      if (terms != null) {
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum.seekExact(term.bytes())) {
          int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
          context.register(termsEnum.termState(), leafContext.ord, freq, freq);
        }
      }
    }
    return new TermQuery(term, context);
  }
}
 
Example #10
Source File: SearchFiles.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_DIRECTORY)));
	IndexSearcher indexSearcher = new IndexSearcher(reader);

	Analyzer analyzer = new StandardAnalyzer();
	QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer);
	String searchString = "shakespeare";
	Query query = queryParser.parse(searchString);

	TopDocs results = indexSearcher.search(query, 5);
	ScoreDoc[] hits = results.scoreDocs;

	int numTotalHits = results.totalHits;
	System.out.println(numTotalHits + " total matching documents");

	for(int i=0;i<hits.length;++i) {
		int docId = hits[i].doc;
		Document d = indexSearcher.doc(docId);
		System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score);
	}
}
 
Example #11
Source File: InternalHelpController.java    From airsonic with GNU General Public License v3.0 6 votes vote down vote up
private void gatherIndexInfo(Map<String, Object> map) {
    SortedMap<String, IndexStatistics> indexStats = new TreeMap<>();
    for (IndexType indexType : IndexType.values()) {
        IndexStatistics stat = new IndexStatistics();
        IndexSearcher searcher = indexManager.getSearcher(indexType);
        stat.setName(indexType.name());
        indexStats.put(indexType.name(), stat);
        if (searcher != null) {
            IndexReader reader = searcher.getIndexReader();
            stat.setCount(reader.numDocs());
            stat.setDeletedCount(reader.numDeletedDocs());
            indexManager.release(indexType, searcher);
        } else {
            stat.setCount(0);
            stat.setDeletedCount(0);
        }
    }
    map.put("indexStatistics", indexStats);

    try (Analyzer analyzer = analyzerFactory.getAnalyzer()) {
        map.put("indexLuceneVersion", analyzer.getVersion().toString());
    } catch (IOException e) {
        LOG.debug("Unable to gather information", e);
    }
}
 
Example #12
Source File: TestFieldCacheSort.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
  Directory indexStore = newDirectory();
  IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0; i<5; i++) {
      Document doc = new Document();
      doc.add(new StringField("string", "a"+i, Field.Store.NO));
      doc.add(new StringField("string", "b"+i, Field.Store.NO));
      writer.addDocument(doc);
  }
  writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
  writer.close();
  Sort sort = new Sort(
      new SortField("string", SortField.Type.STRING),
      SortField.FIELD_DOC);
  IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
                       Collections.singletonMap("string", Type.SORTED));
  IndexSearcher searcher = new IndexSearcher(reader);
  expectThrows(IllegalStateException.class, () -> {
    searcher.search(new MatchAllDocsQuery(), 500, sort);
  });
  reader.close();
  indexStore.close();
}
 
Example #13
Source File: right_PhraseQuery_1.5.java    From gumtree-spoon-ast-diff with Apache License 2.0 6 votes vote down vote up
final Scorer scorer(IndexReader reader) throws IOException {
   if (terms.size() == 0)			  // optimize zero-term case
     return null;
   if (terms.size() == 1) {			  // optimize one-term case
     Term term = (Term)terms.elementAt(0);
     TermDocs docs = reader.termDocs(term);
     if (docs == null)
return null;
     return new TermScorer(docs, reader.norms(term.field()), weight);
   }

   TermPositions[] tps = new TermPositions[terms.size()];
   for (int i = 0; i < terms.size(); i++) {
     TermPositions p = reader.termPositions((Term)terms.elementAt(i));
     if (p == null)
return null;
     tps[i] = p;
   }

   if (slop == 0)				  // optimize exact case
     return new ExactPhraseScorer(tps, reader.norms(field), weight);
   else
     return
new SloppyPhraseScorer(tps, slop, reader.norms(field), weight);

 }
 
Example #14
Source File: BlurUtilsTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private IndexReader getReader() throws CorruptIndexException, LockObtainFailedException, IOException {
  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer());
  IndexWriter writer = new IndexWriter(directory, conf);
  Document doc = new Document();
  doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
  doc.add(new StringField("a", "b", Store.YES));
  doc.add(new StringField("family", "f1", Store.YES));

  Document doc1 = new Document();
  doc.add(new StringField("a", "b", Store.YES));
  writer.addDocument(doc);
  writer.addDocument(doc1);
  writer.close();
  return DirectoryReader.open(directory);
}
 
Example #15
Source File: TestWildcard.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Tests if a WildcardQuery that has only a trailing * in the term is
 * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
 * preserved.
 */
public void testPrefixTerm() throws IOException {
  Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
  IndexReader reader = DirectoryReader.open(indexStore);
  IndexSearcher searcher = newSearcher(reader);

  MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
  assertMatches(searcher, wq, 2);
  
  wq = new WildcardQuery(new Term("field", "*"));
  assertMatches(searcher, wq, 2);
  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), "field");
  assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
  reader.close();
  indexStore.close();
}
 
Example #16
Source File: TestLRUQueryCache.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPropagateBulkScorer() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  w.addDocument(new Document());
  IndexReader reader = w.getReader();
  w.close();
  IndexSearcher searcher = newSearcher(reader);
  LeafReaderContext leaf = searcher.getIndexReader().leaves().get(0);
  AtomicBoolean scorerCalled = new AtomicBoolean();
  AtomicBoolean bulkScorerCalled = new AtomicBoolean();
  LRUQueryCache cache = new LRUQueryCache(1, Long.MAX_VALUE, context -> true, Float.POSITIVE_INFINITY);

  // test that the bulk scorer is propagated when a scorer should not be cached
  Weight weight = searcher.createWeight(new MatchAllDocsQuery(), ScoreMode.COMPLETE_NO_SCORES, 1);
  weight = new WeightWrapper(weight, scorerCalled, bulkScorerCalled);
  weight = cache.doCache(weight, NEVER_CACHE);
  weight.bulkScorer(leaf);
  assertEquals(true, bulkScorerCalled.get());
  assertEquals(false, scorerCalled.get());
  assertEquals(0, cache.getCacheCount());

  searcher.getIndexReader().close();
  dir.close();
}
 
Example #17
Source File: MusicSearch.java    From Easy-Cassandra-samples with Apache License 2.0 6 votes vote down vote up
private List<String> returnMusics(Query query) throws IOException {
	int hitsPerPage = 10;
	IndexReader reader = DirectoryReader.open(LuceneUtil.INSTANCE.getDirectory());
	IndexSearcher searcher = new IndexSearcher(reader);
	TopScoreDocCollector collector = TopScoreDocCollector.create(
			hitsPerPage, true);
	searcher.search(query, collector);
	ScoreDoc[] hits = collector.topDocs().scoreDocs;
	
	   
	   List<String> musics = new LinkedList<>();
	    for(int i=0;i<hits.length;++i) {
	      int docId = hits[i].doc;
	      Document d = searcher.doc(docId);
	      musics.add(d.get(COLUMN_NAME));
	    }
	return musics;
}
 
Example #18
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}
 
Example #19
Source File: TestFunctionScoreQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testTruncateNegativeScores() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
  Document doc = new Document();
  doc.add(new NumericDocValuesField("foo", -2));
  w.addDocument(doc);
  IndexReader reader = DirectoryReader.open(w);
  w.close();
  IndexSearcher searcher = newSearcher(reader);
  Query q = new FunctionScoreQuery(new MatchAllDocsQuery(), DoubleValuesSource.fromLongField("foo"));
  QueryUtils.check(random(), q, searcher);
  Explanation expl = searcher.explain(q, 0);
  assertEquals(0, expl.getValue().doubleValue(), 0f);
  assertTrue(expl.toString(), expl.getDetails()[0].getDescription().contains("truncated score"));
  reader.close();
  dir.close();
}
 
Example #20
Source File: IndexInfo.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
private IndexReader getReferenceCountingIndexReader(String id) throws IOException
{
    IndexReader reader = referenceCountingReadOnlyIndexReaders.get(id);
    if (reader == null)
    {
        throw new IllegalStateException("Indexer should have been pre-built for " + id);
    }
    return reader;
}
 
Example #21
Source File: InfiniteLoopCommand.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public Boolean execute(IndexContext context) throws IOException, InterruptedException {
  try {
    IndexReader indexReader = context.getIndexReader();
    while (true) {
      long hash = 0;
      for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
        AtomicReader reader = atomicReaderContext.reader();
        for (String field : reader.fields()) {
          Terms terms = reader.terms(field);
          BytesRef bytesRef;
          TermsEnum iterator = terms.iterator(null);
          while ((bytesRef = iterator.next()) != null) {
            hash += bytesRef.hashCode();
          }
        }
      }
      System.out.println("hashcode = " + hash);
    }
  } catch (IOException e) {
    e.printStackTrace();
    throw e;
  } catch (Throwable t) {
    t.printStackTrace();
    if (t instanceof InterruptedException) {
      throw t;
    } else if (t instanceof RuntimeException) {
      throw (RuntimeException) t;
    }
    throw new RuntimeException(t);
  }
}
 
Example #22
Source File: LuceneIndex.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns a Document representing the specified document ID (combination of resource and context), or null when no
 * such Document exists yet.
 */
private Document getDocument(Term idTerm) throws IOException {
	IndexReader reader = getIndexReader();
	List<LeafReaderContext> leaves = reader.leaves();
	int size = leaves.size();
	for (int i = 0; i < size; i++) {
		LeafReader lreader = leaves.get(i).reader();
		Document document = getDocument(lreader, idTerm);
		if (document != null) {
			return document;
		}
	}
	// no such Document
	return null;
}
 
Example #23
Source File: DeleteByQueryWrapper.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Query rewrite(IndexReader reader) throws IOException {
  Query rewritten = in.rewrite(reader);
  if (rewritten != in) {
    return new DeleteByQueryWrapper(rewritten, schema);
  } else {
    return super.rewrite(reader);
  }
}
 
Example #24
Source File: TestQueryRescorer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRescoreIsIdempotent() throws Exception {
  Directory dir = newDirectory();
  int numDocs = 100;
  String fieldName = "field";
  IndexReader reader = publishDocs(numDocs, fieldName, dir);

  // Construct a query that will get numDocs hits.
  String wordOne = dictionary.get(0);
  TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
  IndexSearcher searcher = getSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  TopDocs hits1 = searcher.search(termQuery, numDocs);
  TopDocs hits2 = searcher.search(termQuery, numDocs);

  // Next, use a more specific phrase query that will return different scores
  // from the above term query
  String wordTwo = RandomPicks.randomFrom(random(), dictionary);
  PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);

  // rescore, requesting the same hits as topN
  int topN = numDocs;
  TopDocs firstRescoreHits = QueryRescorer.rescore(searcher, hits1, phraseQuery, 2.0, topN);

  // now rescore again, where topN is less than numDocs
  topN = random().nextInt(numDocs-1);
  ScoreDoc[] secondRescoreHits = QueryRescorer.rescore(searcher, hits2, phraseQuery, 2.0, topN).scoreDocs;
  ScoreDoc[] expectedTopNScoreDocs = ArrayUtil.copyOfSubArray(firstRescoreHits.scoreDocs, 0, topN);
  CheckHits.checkEqual(phraseQuery, expectedTopNScoreDocs, secondRescoreHits);

  reader.close();
  dir.close();
}
 
Example #25
Source File: TestSpanMultiTermQueryWrapper.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrappedQueryIsNotModified() {
  final PrefixQuery pq = new PrefixQuery(new Term("field", "test"));
  int pqHash = pq.hashCode();
  SpanMultiTermQueryWrapper<PrefixQuery> wrapper = new SpanMultiTermQueryWrapper<>(pq);
  assertEquals(pqHash, pq.hashCode());
  wrapper.setRewriteMethod(new SpanMultiTermQueryWrapper.SpanRewriteMethod() {
    @Override
    public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
      return null;
    }
  });
  assertEquals(pqHash, pq.hashCode());
}
 
Example #26
Source File: TestXYShape.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testPointIndexAndDistanceQuery() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document document = new Document();
  float pX = ShapeTestUtil.nextFloat(random());
  float py = ShapeTestUtil.nextFloat(random());
  Field[] fields = XYShape.createIndexableFields(FIELDNAME, pX, py);
  for (Field f : fields) {
    document.add(f);
  }
  writer.addDocument(document);

  //// search
  IndexReader r = writer.getReader();
  writer.close();
  IndexSearcher s = newSearcher(r);
  XYCircle circle = ShapeTestUtil.nextCircle();
  Component2D circle2D = XYGeometry.create(circle);
  int expected;
  int expectedDisjoint;
  if (circle2D.contains(pX, py))  {
    expected = 1;
    expectedDisjoint = 0;
  } else {
    expected = 0;
    expectedDisjoint = 1;
  }

  Query q = XYShape.newDistanceQuery(FIELDNAME, QueryRelation.INTERSECTS, circle);
  assertEquals(expected, s.count(q));

  q = XYShape.newDistanceQuery(FIELDNAME, QueryRelation.WITHIN, circle);
  assertEquals(expected, s.count(q));

  q = XYShape.newDistanceQuery(FIELDNAME, QueryRelation.DISJOINT, circle);
  assertEquals(expectedDisjoint, s.count(q));

  IOUtils.close(r, dir);
}
 
Example #27
Source File: AbstractLuceneSearchTest.java    From aedict with GNU General Public License v3.0 5 votes vote down vote up
@Before
public void initializeLucene() throws IOException {
    directory = FSDirectory.open(new File(Main.LUCENE_INDEX));
    reader = IndexReader.open(directory, true);
    searcher = new IndexSearcher(reader);
    parser = new QueryParser(LuceneSearch.LUCENE_VERSION, getDefaultFieldName(), new StandardAnalyzer(LuceneSearch.LUCENE_VERSION));
}
 
Example #28
Source File: TestRangeFacetCounts.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBasicLong() throws Exception {
  Directory d = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), d);
  Document doc = new Document();
  NumericDocValuesField field = new NumericDocValuesField("field", 0L);
  doc.add(field);
  for(long l=0;l<100;l++) {
    field.setLongValue(l);
    w.addDocument(doc);
  }

  // Also add Long.MAX_VALUE
  field.setLongValue(Long.MAX_VALUE);
  w.addDocument(doc);

  IndexReader r = w.getReader();
  w.close();

  FacetsCollector fc = new FacetsCollector();
  IndexSearcher s = newSearcher(r);
  s.search(new MatchAllDocsQuery(), fc);

  Facets facets = new LongRangeFacetCounts("field", fc,
      new LongRange("less than 10", 0L, true, 10L, false),
      new LongRange("less than or equal to 10", 0L, true, 10L, true),
      new LongRange("over 90", 90L, false, 100L, false),
      new LongRange("90 or above", 90L, true, 100L, false),
      new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, true));

  FacetResult result = facets.getTopChildren(10, "field");
  assertEquals("dim=field path=[] value=22 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (1)\n",
               result.toString());
  
  r.close();
  d.close();
}
 
Example #29
Source File: TestConstantScoreScorer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEarlyTermination() throws IOException {
  Analyzer analyzer = new MockAnalyzer(random());
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
  final int numDocs = 50;
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    Field f = newTextField("key", i % 2 == 0 ? "foo bar" : "baz", Field.Store.YES);
    doc.add(f);
    iw.addDocument(doc);
  }
  IndexReader ir = DirectoryReader.open(iw);

  IndexSearcher is = newSearcher(ir);

  TopScoreDocCollector c = TopScoreDocCollector.create(10, null, 10);
  is.search(new ConstantScoreQuery(new TermQuery(new Term("key", "foo"))), c);
  assertEquals(11, c.totalHits);
  assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, c.totalHitsRelation);

  c = TopScoreDocCollector.create(10, null, 10);
  Query query = new BooleanQuery.Builder()
      .add(new ConstantScoreQuery(new TermQuery(new Term("key", "foo"))), Occur.SHOULD)
      .add(new ConstantScoreQuery(new TermQuery(new Term("key", "bar"))), Occur.FILTER)
      .build();
  is.search(query, c);
  assertEquals(11, c.totalHits);
  assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, c.totalHitsRelation);

  iw.close();
  ir.close();
  dir.close();
}
 
Example #30
Source File: PersistentProvenanceRepository.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
    if (submission.isCanceled()) {
        return;
    }

    try {
        final DocumentToEventConverter converter = new DocumentToEventConverter() {
            @Override
            public Set<ProvenanceEventRecord> convert(TopDocs topDocs, IndexReader indexReader) throws IOException {
                // Always authorized. We do this because we need to pull back the event, regardless of whether or not
                // the user is truly authorized, because instead of ignoring unauthorized events, we want to replace them.
                final EventAuthorizer authorizer = EventAuthorizer.GRANT_ALL;
                final DocsReader docsReader = new DocsReader();
                return docsReader.read(topDocs, authorizer, indexReader, getAllLogFiles(), new AtomicInteger(0), Integer.MAX_VALUE, maxAttributeChars);
            }
        };

        final Set<ProvenanceEventRecord> matchingRecords = LineageQuery.computeLineageForFlowFiles(getIndexManager(), indexDir, null, flowFileUuids, converter);

        final StandardLineageResult result = submission.getResult();
        result.update(replaceUnauthorizedWithPlaceholders(matchingRecords, user), matchingRecords.size());

        logger.info("Successfully created Lineage for FlowFiles with UUIDs {} in {} milliseconds; Lineage contains {} nodes and {} edges",
                flowFileUuids, result.getComputationTime(TimeUnit.MILLISECONDS), result.getNodes().size(), result.getEdges().size());
    } catch (final Throwable t) {
        logger.error("Failed to query provenance repository due to {}", t.toString());
        if (logger.isDebugEnabled()) {
            logger.error("", t);
        }

        if (t.getMessage() == null) {
            submission.getResult().setError(t.toString());
        } else {
            submission.getResult().setError(t.getMessage());
        }
    }
}