Java Code Examples for org.apache.lucene.search.IndexSearcher#doc()

The following examples show how to use org.apache.lucene.search.IndexSearcher#doc() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIndexedBinaryField() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  BytesRef br = new BytesRef(new byte[5]);
  Field field = new StringField("binary", br, Field.Store.YES);
  assertEquals(br, field.binaryValue());
  doc.add(field);
  w.addDocument(doc);
  IndexReader r = w.getReader();

  IndexSearcher s = newSearcher(r);
  TopDocs hits = s.search(new TermQuery(new Term("binary", br)), 1);
  assertEquals(1, hits.totalHits.value);
  Document storedDoc = s.doc(hits.scoreDocs[0].doc);
  assertEquals(br, storedDoc.getField("binary").binaryValue());

  r.close();
  w.close();
  dir.close();
}
 
Example 2
Source File: TestCoreParser.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected void dumpResults(String qType, Query q, int numDocs) throws IOException {
  if (VERBOSE) {
    System.out.println("TEST: qType=" + qType + " numDocs=" + numDocs + " " + q.getClass().getCanonicalName() + " query=" + q);
  }
  final IndexSearcher searcher = searcher();
  TopDocs hits = searcher.search(q, numDocs);
  final boolean producedResults = (hits.totalHits.value > 0);
  if (!producedResults) {
    System.out.println("TEST: qType=" + qType + " numDocs=" + numDocs + " " + q.getClass().getCanonicalName() + " query=" + q);
  }
  if (VERBOSE) {
    ScoreDoc[] scoreDocs = hits.scoreDocs;
    for (int i = 0; i < Math.min(numDocs, hits.totalHits.value); i++) {
      Document ldoc = searcher.doc(scoreDocs[i].doc);
      System.out.println("[" + ldoc.get("date") + "]" + ldoc.get("contents"));
    }
    System.out.println();
  }
  assertTrue(qType + " produced no results", producedResults);
}
 
Example 3
Source File: TestReadOnlyIndex.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Void doTestReadOnlyIndex() throws Exception {
  Directory dir = FSDirectory.open(indexPath); 
  IndexReader ireader = DirectoryReader.open(dir); 
  IndexSearcher isearcher = newSearcher(ireader);
  
  // borrows from TestDemo, but not important to keep in sync with demo

  assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
  Query query = new TermQuery(new Term("fieldname", "text"));
  TopDocs hits = isearcher.search(query, 1);
  assertEquals(1, hits.totalHits.value);
  // Iterate through the results:
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
    assertEquals(text, hitDoc.get("fieldname"));
  }

  // Test simple phrase query
  PhraseQuery phraseQuery = new PhraseQuery("fieldname", "to", "be");
  assertEquals(1, isearcher.count(phraseQuery));

  ireader.close();
  return null; // void
}
 
Example 4
Source File: HighlighterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void searchIndex() throws IOException, InvalidTokenOffsetsException {
  Query query = new TermQuery(new Term("t_text1", "random"));
  IndexReader reader = DirectoryReader.open(dir1);
  IndexSearcher searcher = newSearcher(reader);
  // This scorer can return negative idf -> null fragment
  Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
  // This scorer doesn't use idf (patch version)
  //Scorer scorer = new QueryTermScorer( query, "t_text1" );
  Highlighter h = new Highlighter( scorer );

  TopDocs hits = searcher.search(query, 10);
  for( int i = 0; i < hits.totalHits.value; i++ ){
    Document doc = searcher.doc( hits.scoreDocs[i].doc );
    String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
    if (VERBOSE) System.out.println("result:" +  result);
    assertEquals("more <B>random</B> words for second field", result);
  }
  reader.close();
}
 
Example 5
Source File: RealtimeLuceneTextIndexReader.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
private MutableRoaringBitmap getPinotDocIds(IndexSearcher indexSearcher, MutableRoaringBitmap luceneDocIds) {
  IntIterator luceneDocIDIterator = luceneDocIds.getIntIterator();
  MutableRoaringBitmap actualDocIDs = new MutableRoaringBitmap();
  try {
    while (luceneDocIDIterator.hasNext()) {
      int luceneDocId = luceneDocIDIterator.next();
      Document document = indexSearcher.doc(luceneDocId);
      int pinotDocId = Integer.valueOf(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME));
      actualDocIDs.add(pinotDocId);
    }
  } catch (Exception e) {
    LOGGER.error("Failure while retrieving document from index for column {}, exception {}", _column, e.getMessage());
    throw new RuntimeException(e);
  }
  return actualDocIDs;
}
 
Example 6
Source File: LuceneExample.java    From yuzhouwan with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    // index
    try (Directory index = new NIOFSDirectory(Paths.get("/tmp/index"))) {
        // add
        try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(new StandardAnalyzer()))) {
            Document doc = new Document();
            doc.add(new TextField("blog", "yuzhouwan.com", Field.Store.YES));
            doc.add(new StringField("github", "asdf2014", Field.Store.YES));
            writer.addDocument(doc);
            writer.commit();
        }
        // search
        try (DirectoryReader reader = DirectoryReader.open(index)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser parser = new QueryParser("blog", new StandardAnalyzer());
            Query query = parser.parse("yuzhouwan.com");
            ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
            for (ScoreDoc hit : hits) {
                Document hitDoc = searcher.doc(hit.doc);
                System.out.println(hitDoc.get("blog"));
            }
        }
    }
}
 
Example 7
Source File: SearchFiles.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_DIRECTORY)));
	IndexSearcher indexSearcher = new IndexSearcher(reader);

	Analyzer analyzer = new StandardAnalyzer();
	QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer);
	String searchString = "shakespeare";
	Query query = queryParser.parse(searchString);

	TopDocs results = indexSearcher.search(query, 5);
	ScoreDoc[] hits = results.scoreDocs;

	int numTotalHits = results.totalHits;
	System.out.println(numTotalHits + " total matching documents");

	for(int i=0;i<hits.length;++i) {
		int docId = hits[i].doc;
		Document d = indexSearcher.doc(docId);
		System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score);
	}
}
 
Example 8
Source File: MusicSearch.java    From Easy-Cassandra-samples with Apache License 2.0 6 votes vote down vote up
private List<String> returnMusics(Query query) throws IOException {
	int hitsPerPage = 10;
	IndexReader reader = DirectoryReader.open(LuceneUtil.INSTANCE.getDirectory());
	IndexSearcher searcher = new IndexSearcher(reader);
	TopScoreDocCollector collector = TopScoreDocCollector.create(
			hitsPerPage, true);
	searcher.search(query, collector);
	ScoreDoc[] hits = collector.topDocs().scoreDocs;
	
	   
	   List<String> musics = new LinkedList<>();
	    for(int i=0;i<hits.length;++i) {
	      int docId = hits[i].doc;
	      Document d = searcher.doc(docId);
	      musics.add(d.get(COLUMN_NAME));
	    }
	return musics;
}
 
Example 9
Source File: TestBackwardsCompatibility.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void changeIndexNoAdds(Random random, Directory dir) throws IOException {
  // make sure searching sees right # hits
  DirectoryReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs;
  assertEquals("wrong number of hits", 34, hits.length);
  Document d = searcher.doc(hits[0].doc);
  assertEquals("wrong first document", "0", d.get("id"));
  reader.close();

  // fully merge
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random))
                                              .setOpenMode(OpenMode.APPEND));
  writer.forceMerge(1);
  writer.close();

  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);
  hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs;
  assertEquals("wrong number of hits", 34, hits.length);
  doTestHits(hits, 34, searcher.getIndexReader());
  reader.close();
}
 
Example 10
Source File: NLPIRTokenizerTest.java    From nlpir-analysis-cn-ictclas with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// NLPIR
	NLPIRTokenizerAnalyzer nta = new NLPIRTokenizerAnalyzer("", 1, "", "", false);
	// Index
	IndexWriterConfig inconf = new IndexWriterConfig(nta);
	inconf.setOpenMode(OpenMode.CREATE_OR_APPEND);
	IndexWriter index = new IndexWriter(FSDirectory.open(Paths.get("index/")), inconf);
	Document doc = new Document();
	doc.add(new TextField("contents",
			"特朗普表示,很高兴汉堡会晤后再次同习近平主席通话。我同习主席就重大问题保持沟通和协调、两国加强各层级和各领域交往十分重要。当前,美中关系发展态势良好,我相信可以发展得更好。我期待着对中国进行国事访问。",
			Field.Store.YES));
	index.addDocument(doc);
	index.flush();
	index.close();
	// Search
	String field = "contents";
	IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("index/")));
	IndexSearcher searcher = new IndexSearcher(reader);
	QueryParser parser = new QueryParser(field, nta);
	Query query = parser.parse("特朗普习近平");
	TopDocs top = searcher.search(query, 100);
	System.out.println("总条数:" + top.totalHits);
	ScoreDoc[] hits = top.scoreDocs;
	for (int i = 0; i < hits.length; i++) {
		System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
		Document d = searcher.doc(hits[i].doc);
		System.out.println(d.get("contents"));
	}

}
 
Example 11
Source File: TestICUCollationDocValuesField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception { 
  SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
  for(int docID=0;docID<is.getIndexReader().maxDoc();docID++) {
    Document doc = is.doc(docID);
    String s = doc.getField("field").stringValue();
    boolean collatorAccepts = collator.compare(s, startPoint) >= 0 && collator.compare(s, endPoint) <= 0;
    assertEquals(docID, dvs.nextDoc());
    BytesRef br = dvs.binaryValue();
    boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
    assertEquals(collatorAccepts, luceneAccepts);
  }
}
 
Example 12
Source File: TestSearchForDuplicates.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) throws IOException {
  assertEquals("total results", expectedCount, hits.length);
  for (int i = 0 ; i < hits.length; i++) {
    if (i < 10 || (i > 94 && i < 105) ) {
      Document d = searcher.doc(hits[i].doc);
      assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
    }
  }
}
 
Example 13
Source File: Search.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * @param args
 * @throws IOException 
 * @throws ParseException 
 * @throws LoadModelException 
 */
public static void main(String[] args) throws IOException, ParseException, LoadModelException {
	String indexPath = "../tmp/lucene";
	System.out.println("Index directory '" + indexPath);
	Date start = new Date();
	Directory dir = FSDirectory.open(new File(indexPath));
	//需要先初始化 CNFactory
	CNFactory factory = CNFactory.getInstance("../models",Models.SEG_TAG);
	Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47);
	// Now search the index:
	DirectoryReader ireader = DirectoryReader.open(dir);
	IndexSearcher isearcher = new IndexSearcher(ireader);
	// Parse a simple query that searches for "text":
	QueryParser parser = new QueryParser(Version.LUCENE_47, "content", analyzer);
	Query query = parser.parse("保修费用");
	ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
	
	System.out.println("Hello World");
	// Iterate through the results:
	for (int i = 0; i < hits.length; i++) {
		Document hitDoc = isearcher.doc(hits[i].doc);
		System.out.println(hitDoc.get("content"));
		System.out.println(hits[i].score);
		
	}
	
	
	
	ireader.close();
	dir.close();
}
 
Example 14
Source File: CollationTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void assertMatches(IndexSearcher searcher, Query query, Sort sort, 
                           String expectedResult) throws IOException {
  ScoreDoc[] result = searcher.search(query, 1000, sort).scoreDocs;
  StringBuilder buff = new StringBuilder(10);
  int n = result.length;
  for (int i = 0 ; i < n ; ++i) {
    Document doc = searcher.doc(result[i].doc);
    IndexableField[] v = doc.getFields("tracer");
    for (int j = 0 ; j < v.length ; ++j) {
      buff.append(v[j].stringValue());
    }
  }
  assertEquals(expectedResult, buff.toString());
}
 
Example 15
Source File: AclDiscoverFieldTypeDefinitionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void test(int expected, boolean rowQuery, Collection<String> discoverAuthorizations) throws IOException,
    ParseException {
  DirectoryReader reader = DirectoryReader.open(_dir);
  SuperParser parser = new SuperParser(Version.LUCENE_43, _fieldManager, rowQuery, null, ScoreType.SUPER, new Term(
      BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE));

  Query query = parser.parse("fam.string:value");

  Collection<String> readAuthorizations = null;
  Set<String> discoverableFields = new HashSet<String>();
  discoverableFields.add("rowid");
  discoverableFields.add("recordid");
  discoverableFields.add("family");
  IndexSearcher searcher = new SecureIndexSearcher(reader, getAccessControlFactory(), readAuthorizations,
      discoverAuthorizations, discoverableFields, null);

  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(expected, topDocs.totalHits);
  for (int i = 0; i < expected; i++) {
    int doc = topDocs.scoreDocs[i].doc;
    Document document = searcher.doc(doc);
    List<IndexableField> fields = document.getFields();
    for (IndexableField field : fields) {
      assertTrue(discoverableFields.contains(field.name()));
    }
  }
  reader.close();
}
 
Example 16
Source File: MutatableActionTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendColumns() throws IOException {
  RAMDirectory directory = new RAMDirectory();
  DirectoryReader reader = getIndexReader(directory);
  IndexWriter writer = new IndexWriter(directory, _conf.clone());
  assertEquals(0, reader.numDocs());

  Row row = genRow();
  List<Column> cols = new ArrayList<Column>();
  cols.add(new Column("n", "v"));
  row.addToRecords(new Record("1", "fam", cols));

  _action.replaceRow(row);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  cols.clear();
  cols.add(new Column("n2", "v2"));
  Record record = new Record("1", "fam", cols);
  _action.appendColumns(row.getId(), record);
  _action.performMutate(getSearcher(reader, directory), writer);
  reader = commitAndReopen(reader, writer);
  assertEquals(2, reader.numDocs());

  IndexSearcher searcher = new IndexSearcher(reader);
  TopDocs topDocs = searcher.search(new TermQuery(new Term(BlurConstants.ROW_ID, row.getId())), 10);
  Document doc2 = searcher.doc(topDocs.scoreDocs[1].doc);
  List<IndexableField> fields = doc2.getFields();
  assertEquals(fields.size(), 5);
  String value = doc2.get("fam.n2");
  assertEquals("v2", value);
}
 
Example 17
Source File: FuzzyQueryDemo.java    From elasticsearch-full with Apache License 2.0 4 votes vote down vote up
public static void main(String []args) throws IOException {
    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = FSDirectory.open(Paths.get("/Users/admin/lucene"));

    DirectoryReader ireader = DirectoryReader.open(directory);

    IndexSearcher indexSearcher  = new IndexSearcher(ireader);

    Term term = new Term("fieldname","国");
    FuzzyQuery query = new FuzzyQuery(term);

    ScoreDoc[] hits = indexSearcher.search(query, 10, Sort.INDEXORDER).scoreDocs;

    for (int i = 0; i < hits.length; i++) {
        Document hitDoc = indexSearcher.doc(hits[i].doc);
        System.out.println(hitDoc.toString()+","+hits[i].score);
    }
    ireader.close();
    directory.close();

}
 
Example 18
Source File: Lucene.java    From StreamingRec with Apache License 2.0 4 votes vote down vote up
@Override
public LongArrayList recommendInternal(ClickData clickData) {
	//create a result list
	LongArrayList results = new LongArrayList();
	try {
		//determine the input query, which can either be based on the current item 
		//or all items from the current session depending on the configuration
		String input;
		if (!wholeSession){
			//extract the content from the current item
			input = extractContent(clickData.click.item);
		}else{
			//iteratively append the content of every item from the current user session
			input="";
			for (int i = 0 ; i<clickData.session.size(); i++ ){
				input += " "+ extractContent(clickData.session.get(i).item);
			}
		}
		//avoid an exception that happens for too large queries
           BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
           //create a query
		Query q = new QueryParser("text", analyzer)
				.parse(QueryParserUtil.escape(input));
		//set an unreasonably high retrieval amount, because we want a long recommendation list
		int hitsPerPage = 100000;
		//instantiate the retrieval objects
		IndexReader reader = DirectoryReader.open(index);
		IndexSearcher searcher = new IndexSearcher(reader);
		TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
		//execute the query
		searcher.search(q, collector);
		//iterate the hits and extract the item ids
		ScoreDoc[] hits = collector.topDocs().scoreDocs;
		for (int i = 1; i < hits.length; ++i) {
			if (hits[i].score < minScore) {
				//stop retrieving, if the lucene score is too low
				break;
			}
			int docId = hits[i].doc;
			Document d = searcher.doc(docId);
			results.add(Long.parseLong(d.get("id")));
		}
		reader.close();
	} catch (ParseException | IOException e) {
		e.printStackTrace();
	}
	//return the results
	return results;
}
 
Example 19
Source File: SORecommender.java    From scava with Eclipse Public License 2.0 4 votes vote down vote up
@Override
public Recommendation getRecommendation(Query rec_query) {
	Recommendation rec = new Recommendation();
	try {

		String compUnit = "";
		if (rec_query.getSoRecommendationSelection() == null
				|| rec_query.getSoRecommendationSelection().isEmpty()) {
			Map<String, String> param = new HashMap<String, String>();
			param.put("ImportDeclaration", "OR");
			param.put("MethodDeclaration", "OR");
			param.put("MethodInvocation", "OR");
			param.put("VariableDeclaration", "OR");
			param.put("ClassInstance", "OR");
			param.put("VariableDeclarationType", "OR");
			compUnit = makeBoostedQuery(rec_query.getCompilationUnit(), param);
		} else
			compUnit = makeBoostedQuery(rec_query.getCompilationUnit(), rec_query.getSoRecommendationSelection());

		File indexDirectory = new File(INDEX_DIRECTORY);
		Directory indexDir = FSDirectory.open(Paths.get(indexDirectory.getAbsolutePath()));
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		List<String> fields2 = getAllIndexTags(INDEX_DIRECTORY);
		String[] fields = new String[fields2.size()];
		fields = fields2.toArray(fields);
		Analyzer analzer = new StandardAnalyzer();
		MultiFieldQueryParser qp = new MultiFieldQueryParser(fields, analzer);
		org.apache.lucene.search.Query q = qp.parse(compUnit);
		TopDocs results = executeQuery(q);
		if (results != null) {
			int counter = 0;
			ArrayList<Explanation> expls = new ArrayList<Explanation>();
			ArrayList<String> Ids = new ArrayList<String>();
			for (ScoreDoc result : results.scoreDocs) {
				if (counter < luceneTreshold) {
					RecommendationItem ri = new RecommendationItem();
					org.apache.lucene.document.Document d = searcher.doc(result.doc);
					ri.setApiDocumentationLink(d.get("ID_POST"));
					expls.add(searcher.explain(q, result.doc));
					ri.setSignificance(result.score);
					Ids.add(d.get("ID_POST"));
					counter += 1;
					rec.getRecommendationItems().add(ri);
				}
			}
		}
	} catch (IOException | ParseException e) {
		logger.error(e.getMessage());
	}
	return rec;
}
 
Example 20
Source File: FieldBoostTermQueryBuilderTest.java    From querqy with Apache License 2.0 3 votes vote down vote up
@Test
public void testThatResultsAreFound() throws Exception {

    Analyzer analyzer = new KeywordAnalyzer();

    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, analyzer);

    TestUtil.addNumDocsWithStringField("f1", "v1", indexWriter, 1);
    TestUtil.addNumDocsWithStringField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = newSearcher(indexReader);


    Term term = new Term("f1", "v1");

    FieldBoostTermQueryBuilder.FieldBoostTermQuery query = new FieldBoostTermQueryBuilder()
            .createTermQuery(term, fieldBoost1);

    TopDocs topDocs = indexSearcher.search(query, 10);

    assertEquals(1, topDocs.totalHits.value);
    Document resultDoc = indexSearcher.doc(topDocs.scoreDocs[0].doc);
    assertEquals("v1", resultDoc.get("f1"));

    indexReader.close();
    directory.close();
    analyzer.close();

}