Java Code Examples for org.apache.lucene.index.IndexReader#close()

The following examples show how to use org.apache.lucene.index.IndexReader#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GeoNameResolver.java    From lucene-geo-gazetteer with Apache License 2.0 6 votes vote down vote up
/**
 * Search corresponding GeoName for each location entity
 * @param count
 * 			  Number of results for one locations
 * @param querystr
 *            it's the NER actually
 *
 * @return HashMap each name has a list of resolved entities
 * @throws IOException
 * @throws RuntimeException
 */

public HashMap<String, List<Location>> searchGeoName(String indexerPath,
												   List<String> locationNameEntities,
												   int count) throws IOException {

	if (locationNameEntities.size() == 0
			|| locationNameEntities.get(0).length() == 0)
		return new HashMap<String, List<Location>>();
	IndexReader reader = createIndexReader(indexerPath);
	HashMap<String, List<Location>> resolvedEntities =
			resolveEntities(locationNameEntities, count, reader);
	reader.close();
	return resolvedEntities;

}
 
Example 2
Source File: TestTermRangeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testAllDocs() throws Exception {
  initializeIndex(new String[]{"A", "B", "C", "D"});
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);

  TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
  assertEquals(4, searcher.search(query, 1000).scoreDocs.length);

  query = TermRangeQuery.newStringRange("content", "", null, true, true);
  assertEquals(4, searcher.search(query, 1000).scoreDocs.length);

  query = TermRangeQuery.newStringRange("content", "", null, true, false);
  assertEquals(4, searcher.search(query, 1000).scoreDocs.length);

  // and now another one
  query = TermRangeQuery.newStringRange("content", "B", null, true, true);
  assertEquals(3, searcher.search(query, 1000).scoreDocs.length);
  reader.close();
}
 
Example 3
Source File: TestUnifiedHighlighter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Make sure highlighter we can customize how emtpy
 * highlight is returned.
 */
public void testCustomEmptyHighlights() throws Exception {
  indexAnalyzer.setPositionIncrementGap(10);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Document doc = new Document();

  Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", fieldType);
  doc.add(body);
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setMaxNoHighlightPassages(0);// don't want any default summary
  Query query = new TermQuery(new Term("body", "highlighting"));
  int[] docIDs = new int[]{0};
  String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
  assertEquals(1, snippets.length);
  assertNull(snippets[0]);

  ir.close();
}
 
Example 4
Source File: TestUnifiedHighlighter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBooleanMustNot() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", fieldType);
  Document document = new Document();
  document.add(body);
  iw.addDocument(document);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher searcher = newSearcher(ir);

  BooleanQuery query2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT)
      .build();

  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD)
      .add(query2, BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(1, topDocs.totalHits.value);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setMaxLength(Integer.MAX_VALUE - 1);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertFalse(snippets[0].contains("<b>both</b>"));
  ir.close();
}
 
Example 5
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testCambridgeMA() throws Exception {
  BufferedReader r = new BufferedReader(new InputStreamReader(
      this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
  String text = r.readLine();
  r.close();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  Field body = new Field("body", text, fieldType);
  Document document = new Document();
  document.add(body);
  iw.addDocument(document);
  IndexReader ir = iw.getReader();
  try {
  iw.close();
  IndexSearcher searcher = newSearcher(ir);
  Query query =  new IntervalQuery("body",
           Intervals.unordered(Intervals.term("porter"),
               Intervals.term("square"),
               Intervals.term("massachusetts")));
  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(1, topDocs.totalHits.value);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setMaxLength(Integer.MAX_VALUE - 1);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertTrue(snippets[0].contains("<b>Square</b>"));
  assertTrue(snippets[0].contains("<b>Porter</b>"));
  } finally{ir.close();}
}
 
Example 6
Source File: TestTermRangeQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInclusiveLowerNull() throws Exception {
  //http://issues.apache.org/jira/browse/LUCENE-38
  Analyzer analyzer = new SingleCharAnalyzer();
  Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
  initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  long numHits = searcher.search(query, 1000).totalHits.value;
  // When Lucene-38 is fixed, use the assert on the next line:
  assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
  // until Lucene-38 is fixed, use this assert
  //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
  reader.close();
  initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);
  numHits = searcher.search(query, 1000).totalHits.value;
  // When Lucene-38 is fixed, use the assert on the next line:
  assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
  // until Lucene-38 is fixed, use this assert
  //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
  reader.close();
  addDoc("C");
  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);
  numHits = searcher.search(query, 1000).totalHits.value;
  // When Lucene-38 is fixed, use the assert on the next line:
  assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
  // until Lucene-38 is fixed, use this assert
  //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
   reader.close();
}
 
Example 7
Source File: TestElevationComparator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      directory,
      newIndexWriterConfig(new MockAnalyzer(random())).
          setMaxBufferedDocs(2).
          setMergePolicy(newLogMergePolicy(1000)).
          setSimilarity(new ClassicSimilarity())
  );
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

  r.close();
  directory.close();
}
 
Example 8
Source File: TestMultiPhraseQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
  Directory dir = newDirectory(); // random dir
  IndexWriterConfig cfg = newIndexWriterConfig(null);
  IndexWriter writer = new IndexWriter(dir, cfg);
  Document doc = new Document();
  doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
  writer.addDocument(doc);
  IndexReader r = DirectoryReader.open(writer);
  writer.close();
  IndexSearcher s = newSearcher(r);
  
  if (VERBOSE) {
    System.out.println("QUERY=" + q);
  }
  
  TopDocs hits = s.search(q, 1);
  assertEquals("wrong number of results", nExpected, hits.totalHits.value);
  
  if (VERBOSE) {
    for(int hit=0;hit<hits.totalHits.value;hit++) {
      ScoreDoc sd = hits.scoreDocs[hit];
      System.out.println("  hit doc=" + sd.doc + " score=" + sd.score);
    }
  }
  
  r.close();
  dir.close();
}
 
Example 9
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSimpleHighlightTest() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
  
  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered 
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
Example 10
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOneRegexp() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new RegexpQuery(new Term("body", "te.*"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  // wrong field
  highlighter.setFieldMatcher(null);//default
  BooleanQuery bq = new BooleanQuery.Builder()
      .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
      .add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD)
      .build();
  topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", bq, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a test.", snippets[0]);
  assertEquals("Test a one sentence document.", snippets[1]);

  ir.close();
}
 
Example 11
Source File: TestBlockJoin.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testToChildBlockJoinQueryExplain() throws Exception {
  final Directory dir = newDirectory();
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir);

  final List<Document> docs = new ArrayList<>();
  docs.add(makeJob("java", 2007));
  docs.add(makeJob("python", 2010));
  docs.add(makeResume("Lisa", "United Kingdom"));
  w.addDocuments(docs);

  docs.clear();
  docs.add(makeJob("java", 2006));
  docs.add(makeJob("ruby", 2005));
  docs.add(makeResume("Frank", "United States"));
  w.addDocuments(docs);
  w.deleteDocuments(new Term("skill", "java")); // delete the first child of every parent

  IndexReader r = w.getReader();
  w.close();
  IndexSearcher s = newSearcher(r, false);

  // Create a filter that defines "parent" documents in the index - in this case resumes
  BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
  Query parentQuery = new PrefixQuery(new Term("country", "United"));

  ToChildBlockJoinQuery toChildQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter);

  TopDocs hits = s.search(toChildQuery, 10);
  assertEquals(hits.scoreDocs.length, 2);
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    assertEquals(hits.scoreDocs[i].score, s.explain(toChildQuery, hits.scoreDocs[i].doc).getValue().doubleValue(), 0f);
  }

  r.close();
  dir.close();
}
 
Example 12
Source File: IndexInfo.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public int getNumberOfIndexedFields() throws IOException
{
    IndexReader reader = getMainIndexReferenceCountingReadOnlyIndexReader();
    try
    {
        return reader.getFieldNames(IndexReader.FieldOption.INDEXED).size();
    }
    finally
    {
        reader.close();
    }
}
 
Example 13
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testHighlightAllText() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected BreakIterator getBreakIterator(String field) {
      return new WholeBreakIterator();
    }
  };
  highlighter.setMaxLength(10000);
  Query query = new IntervalQuery("body", Intervals.term("test"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertEquals(
      "This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.",
      snippets[0]);
  ir.close();
}
 
Example 14
Source File: TestMatchAllDocsQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEarlyTermination() throws IOException {

    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
    final int numDocs = 500;
    for (int i = 0; i < numDocs; i++) {
      addDoc("doc" + i, iw);
    }
    IndexReader ir = DirectoryReader.open(iw);

    IndexSearcher is = newSearcher(ir);

    final int totalHitsThreshold = 200;
    TopScoreDocCollector c = TopScoreDocCollector.create(10, null, totalHitsThreshold);

    is.search(new MatchAllDocsQuery(), c);
    assertEquals(totalHitsThreshold+1, c.totalHits);
    assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, c.totalHitsRelation);

    TopScoreDocCollector c1 = TopScoreDocCollector.create(10, null, numDocs);

    is.search(new MatchAllDocsQuery(), c1);
    assertEquals(numDocs, c1.totalHits);
    assertEquals(TotalHits.Relation.EQUAL_TO, c1.totalHitsRelation);

    iw.close();
    ir.close();
    dir.close();

  }
 
Example 15
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testOverlapWithOffsetExactPhrase()
    throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    // no positions!
    customType.setStoreTermVectorOffsets(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
    // query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
    // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
    final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        new SpanTermQuery(new Term(FIELD, "the")),
        new SpanTermQuery(new Term(FIELD, "fox"))}, 0, true);

    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("<B>the fox</B> did not jump",
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 16
Source File: TestCachingTokenFilter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCaching() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  AtomicInteger resetCount = new AtomicInteger(0);
  TokenStream stream = new TokenStream() {
    private int index = 0;
    private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

    @Override
    public void reset() throws IOException {
      super.reset();
      resetCount.incrementAndGet();
    }

    @Override
    public boolean incrementToken() {
      if (index == tokens.length) {
        return false;
      } else {
        clearAttributes();
        termAtt.append(tokens[index++]);
        offsetAtt.setOffset(0,0);
        return true;
      }        
    }
    
  };

  stream = new CachingTokenFilter(stream);

  doc.add(new TextField("preanalyzed", stream));

  // 1) we consume all tokens twice before we add the doc to the index
  assertFalse(((CachingTokenFilter)stream).isCached());
  stream.reset();
  assertFalse(((CachingTokenFilter) stream).isCached());
  checkTokens(stream);
  stream.reset();  
  checkTokens(stream);
  assertTrue(((CachingTokenFilter)stream).isCached());

  // 2) now add the document to the index and verify if all tokens are indexed
  //    don't reset the stream here, the DocumentWriter should do that implicitly
  writer.addDocument(doc);
  
  IndexReader reader = writer.getReader();
  PostingsEnum termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                                        "preanalyzed",
                                                                        new BytesRef("term1"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, termPositions.freq());
  assertEquals(0, termPositions.nextPosition());

  termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                   "preanalyzed",
                                                   new BytesRef("term2"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(2, termPositions.freq());
  assertEquals(1, termPositions.nextPosition());
  assertEquals(3, termPositions.nextPosition());
  
  termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                   "preanalyzed",
                                                   new BytesRef("term3"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, termPositions.freq());
  assertEquals(2, termPositions.nextPosition());
  reader.close();
  writer.close();
  // 3) reset stream and consume tokens again
  stream.reset();
  checkTokens(stream);

  assertEquals(1, resetCount.get());

  dir.close();
}
 
Example 17
Source File: TestDocTermOrds.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSimple() throws Exception {
  Directory dir = newDirectory();
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
  Document doc = new Document();
  Field field = newTextField("field", "", Field.Store.NO);
  doc.add(field);
  field.setStringValue("a b c");
  w.addDocument(doc);

  field.setStringValue("d e f");
  w.addDocument(doc);

  field.setStringValue("a f");
  w.addDocument(doc);
  
  final IndexReader r = w.getReader();
  w.close();

  final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
  TestUtil.checkReader(ar);
  final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
  SortedSetDocValues iter = dto.iterator(ar);
  
  assertEquals(0, iter.nextDoc());
  assertEquals(0, iter.nextOrd());
  assertEquals(1, iter.nextOrd());
  assertEquals(2, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
  
  assertEquals(1, iter.nextDoc());
  assertEquals(3, iter.nextOrd());
  assertEquals(4, iter.nextOrd());
  assertEquals(5, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

  assertEquals(2, iter.nextDoc());
  assertEquals(0, iter.nextOrd());
  assertEquals(5, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

  r.close();
  dir.close();
}
 
Example 18
Source File: DependentTermQueryBuilderTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateWeight() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f2", "v1 v1", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Term qTerm1 = new Term("f1", "v1");
    Term qTerm2 = new Term("f2", "v1");
    dfc.newClause();
    dfc.prepareTerm(qTerm1);
    dfc.prepareTerm(qTerm2);
    dfc.finishedUserQuery();

    DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm1, fieldBoost1);
    DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm2, fieldBoost2);


    TopDocs topDocs = indexSearcher.search(query2, 10);

    final Weight weight2 = query2.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight2.explain(indexReader.leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();
    assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
    assertTrue(explainText.contains("4 = docFreq")); // 4 * df of f1:v1
    assertTrue(explainText.contains("2.0 = freq")); // don't use tf

    indexReader.close();
    directory.close();
    analyzer.close();

}
 
Example 19
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Runs a query with two MTQs and confirms the formatter
 * can tell which query matched which hit.
 */
public void testWhichMTQMatched() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  // use a variety of common MTQ types
  BooleanQuery query = new BooleanQuery.Builder()
      .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
      .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
      .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
      .build();
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);

  // Default formatter just bolds each hit:
  assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);

  // Now use our own formatter, that also stuffs the
  // matching term's text into the result:
  highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

    @Override
    protected PassageFormatter getFormatter(String field) {
      return new PassageFormatter() {

        @Override
        public Object format(Passage passages[], String content) {
          // Copied from DefaultPassageFormatter, but
          // tweaked to include the matched term:
          StringBuilder sb = new StringBuilder();
          int pos = 0;
          for (Passage passage : passages) {
            // don't add ellipsis if its the first one, or if its connected.
            if (passage.getStartOffset() > pos && pos > 0) {
              sb.append("... ");
            }
            pos = passage.getStartOffset();
            for (int i = 0; i < passage.getNumMatches(); i++) {
              int start = passage.getMatchStarts()[i];
              int end = passage.getMatchEnds()[i];
              // its possible to have overlapping terms
              if (start > pos) {
                sb.append(content, pos, start);
              }
              if (end > pos) {
                sb.append("<b>");
                sb.append(content, Math.max(pos, start), end);
                sb.append('(');
                sb.append(passage.getMatchTerms()[i].utf8ToString());
                sb.append(')');
                sb.append("</b>");
                pos = end;
              }
            }
            // its possible a "term" from the analyzer could span a sentence boundary.
            sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
            pos = passage.getEndOffset();
          }
          return sb.toString();
        }
      };
    }
  };

  assertEquals(1, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);

  assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);

  ir.close();
}
 
Example 20
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {

    int maxLength = 17;

    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

    final Field body = new Field("body", bodyText, fieldType);

    Document doc = new Document();
    doc.add(body);

    iw.addDocument(doc);

    IndexReader ir = iw.getReader();
    iw.close();

    IndexSearcher searcher = newSearcher(ir);

    Query query = new IntervalQuery("body", Intervals.term("test"));

    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits.value);

    UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setMaxLength(maxLength);
    String snippets[] = highlighter.highlight("body", query, topDocs);

    ir.close();
    return snippets;
  }