Java Code Examples for org.apache.lucene.document.Document#get()

The following examples show how to use org.apache.lucene.document.Document#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneResultSet.java    From orientdb-lucene with Apache License 2.0 6 votes vote down vote up
@Override
public OIdentifiable next() {
  if (localIndex == array.length) {
    localIndex = 0;
    fetchMoreResult();
  }
  final ScoreDoc score = array[localIndex++];
  Document ret = null;
  OContextualRecordId res = null;
  try {
    ret = queryContext.searcher.doc(score.doc);
    String rId = ret.get(OLuceneIndexManagerAbstract.RID);
    res = new OContextualRecordId(rId);
    manager.onRecordAddedToResultSet(queryContext, res, ret, score);
  } catch (IOException e) {
    e.printStackTrace();
  }
  index++;
  return res;
}
 
Example 2
Source File: LtrQueryTests.java    From elasticsearch-learning-to-rank with Apache License 2.0 6 votes vote down vote up
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores,
                               RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException {
    Document d = searcherUnderTest.doc(scoreDoc.doc);
    String idVal = d.get("id");
    int docId = Integer.decode(idVal);
    float modelScore = scores[docId];
    float queryScore = scoreDoc.score;

    assertEquals("Scores match with similarity " + similarity.getClass(), modelScore,
            queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore));

    if (!(similarity instanceof TFIDFSimilarity)) {
        // There are precision issues with these similarities when using explain
        // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain
        Explanation expl = searcherUnderTest.explain(ltrQuery, docId);

        assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(),
                queryScore, 5 * Math.ulp(modelScore));
        checkFeatureNames(expl, features);
    }
}
 
Example 3
Source File: ExampleStatsApp.java    From lucene4ir with Apache License 2.0 6 votes vote down vote up
public void iterateThroughDocList()  throws IOException {
    int n = reader.maxDoc();
    if (n>100) {
        n = 100;
    }
    for (int i = 0; i < n; i++) {
        Document doc = reader.document(i);
        // the doc.get pulls out the values stored - ONLY if you store the fields
        String docnum = doc.get("docnum");
        String title = doc.get("title");
        System.out.println("ID: " + i);
        System.out.println("docnum and title: " + docnum + " " + title);
        //System.out.println(doc.get("content"));
        iterateThroughDocTermVector(i);
    }
}
 
Example 4
Source File: ExampleStatsApp.java    From lucene4ir with Apache License 2.0 6 votes vote down vote up
public void countFieldData() throws IOException {
    int n = reader.maxDoc();
    int nt = 0;
    int nc = 0;

    for (int i = 0; i < n; i++) {
        Document doc = reader.document(i);

        // the doc.get pulls out the values stored - ONLY if you store the fields
        String title = doc.get(Lucene4IRConstants.FIELD_TITLE);
        String content = doc.get(Lucene4IRConstants.FIELD_CONTENT);
        if (title.length()>0){
            nt++;
        }
        if (content.length()>0){
            nc++;
        }
    }
    System.out.println("Num Docs: " +n + " Docs with Title text: " + nt + " Docs with Contents text: "+ nc);


}
 
Example 5
Source File: SearchEngineIndexer.java    From gravitee-management-rest-api with Apache License 2.0 6 votes vote down vote up
public void remove(Document document) throws TechnicalException {
    String type = document.get(TYPE_FIELD);
    String id = document.get(ID_FIELD);

    logger.debug("Removing document type[{}] ID[{}]", type, id);

    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(new TermQuery(new Term(ID_FIELD, id)), BooleanClause.Occur.MUST);
    bq.add(new TermQuery(new Term(TYPE_FIELD, type)), BooleanClause.Occur.MUST);

    try {
        writer.deleteDocuments(bq.build());
    } catch (IOException ioe) {
        logger.error("Fail to index document with ID: {}", id, ioe);
        throw new TechnicalException("Fail to index document with ID: " + id, ioe);
    }
}
 
Example 6
Source File: TestFuzzyQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleQueryExactMatchScoresHighest() throws Exception {
  //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking 
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smythe", writer);
  addDoc("smdssasd", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
  writer.close();
  String searchTerms[] = { "smith", "smythe", "smdssasd" };
  for (String searchTerm : searchTerms) {
    FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1);
    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
    Document bestDoc = searcher.doc(hits[0].doc);
    assertTrue(hits.length > 0);
    String topMatch = bestDoc.get("field");
    assertEquals(searchTerm, topMatch);
    if (hits.length > 1) {
      Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
      String worstMatch = worstDoc.get("field");
      assertNotSame(searchTerm, worstMatch);
    }
  }
  reader.close();
  directory.close();
}
 
Example 7
Source File: OlatFullIndexer.java    From olat with Apache License 2.0 5 votes vote down vote up
private void incrementDocumentTypeCounter(final Document document) {
    final String documentType = document.get(AbstractOlatDocument.DOCUMENTTYPE_FIELD_NAME);
    int intValue = 0;
    if (documentCounters.containsKey(documentType)) {
        final Integer docCounter = documentCounters.get(documentType);
        intValue = docCounter.intValue();
    }
    intValue++;
    documentCounters.put(documentType, new Integer(intValue));
}
 
Example 8
Source File: SearchResultsImpl.java    From olat with Apache License 2.0 5 votes vote down vote up
/**
 * Create a result document. Return null if the identity has not enough privileges to see the document.
 * 
 * @param doc
 * @param query
 * @param analyzer
 * @param doHighlight
 * @param identity
 * @param roles
 * @return
 * @throws IOException
 */
private ResultDocument createResultDocument(final Document doc, final int pos, final Query query, final Analyzer analyzer, final boolean doHighlight,
        final Identity identity, final Roles roles) throws IOException {
    boolean hasAccess = false;
    if (roles.isOLATAdmin()) {
        hasAccess = true;
    } else {
        String resourceUrl = doc.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
        if (resourceUrl == null) {
            resourceUrl = "";
        }

        final BusinessControl businessControl = BusinessControlFactory.getInstance().createFromString(resourceUrl);
        hasAccess = mainIndexer.checkAccess(null, businessControl, identity, roles);
    }

    ResultDocument resultDoc;
    if (hasAccess) {
        resultDoc = new ResultDocument(doc, pos);
        if (doHighlight) {
            doHighlight(query, analyzer, doc, resultDoc);
        }
    } else {
        resultDoc = null;
    }
    return resultDoc;
}
 
Example 9
Source File: TripleIndexContext.java    From AGDISTIS with GNU Affero General Public License v3.0 5 votes vote down vote up
private List<Triple> getFromIndex(int maxNumberOfResults, BooleanQuery bq) throws IOException {
	 log.debug("\t start asking index by context...");
	ScoreDoc[] hits = isearcher.search(bq, null, maxNumberOfResults).scoreDocs;

	if (hits.length == 0) {
		return new ArrayList<Triple>();
	}
	List<Triple> triples = new ArrayList<Triple>();
	String s, p, o;
	for (int i = 0; i < hits.length; i++) {
		Document hitDoc = isearcher.doc(hits[i].doc);
		s = hitDoc.get(FIELD_NAME_CONTEXT);
		p = hitDoc.get(FIELD_NAME_URI);
		o = hitDoc.get(FIELD_NAME_URI_COUNT);
		Triple triple = new Triple(s, p, o);
		triples.add(triple);
	}
	log.debug("\t finished asking index...");

	Collections.sort(triples);

	if (triples.size() < 500) {
		return triples.subList(0, triples.size());
	} else {
	return triples.subList(0, 500);
	}
}
 
Example 10
Source File: CourseServiceImpl.java    From TinyMooc with Apache License 2.0 5 votes vote down vote up
public List<Course> getCourses(String query) {
    try {
        List<Course> qlist = new ArrayList<Course>();
        IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH);
        long begin = new Date().getTime();
        //下面的是进行title,content 两个范围内进行收索. SHOULD 表示OR
        BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
        Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"courseIntro", "courseTitle"}, clauses, new StandardAnalyzer());//parser.parse(query);
        Filter filter = null;
        //################# 搜索相似度最高的记录 ###################
        TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000);
        Course course = null;

        //输出结果
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document targetDoc = indexSearcher.doc(scoreDoc.doc);
            course = new Course();
            String courseIntro = targetDoc.get("courseIntro");
            String courseTitle = targetDoc.get("courseTitle");
            String courseId = targetDoc.get("courseId");
            TokenStream contentTokenStream = analyzer.tokenStream("courseIntro", new StringReader(courseIntro));
            TokenStream titleTokenStream = analyzer.tokenStream("courseTitle", new StringReader(courseTitle));
            course.setCourseIntro(courseIntro);
            course.setCourseTitle(courseTitle);
            course.setCourseId(courseId);
            course.setType(targetDoc.get("type"));
            course.setCourseState(targetDoc.get("courseState"));
            qlist.add(course);
        }
        indexSearcher.close();
        return qlist;
    } catch (Exception e) {
        logger.error("getCourses error.");
        return null;
    }
}
 
Example 11
Source File: ExampleStatsApp.java    From lucene4ir with Apache License 2.0 5 votes vote down vote up
public void iterateThroughDocListAll()  throws IOException {
    int n = reader.maxDoc();
    for (int i = 0; i < n; i++) {
        Document doc = reader.document(i);
        // the doc.get pulls out the values stored - ONLY if you store the fields
        String docnum = doc.get("docnum");
        String all = doc.get(Lucene4IRConstants.FIELD_ALL).trim();
        if (all.length() == 0) {
            System.out.println("docnum: " + docnum);
        }
    }
}
 
Example 12
Source File: OperatorGlobalSearchGUIProvider.java    From rapidminer-studio with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public DragGestureListener getDragAndDropSupport(final Document document) {
	String operatorKey = document.get(GlobalSearchUtilities.FIELD_UNIQUE_ID);
	if (operatorKey == null) {
		LogService.getRoot().log(Level.WARNING, "com.rapidminer.gui.processeditor.global_search.OperatorSearchManager.error.no_key");
		return null;
	}

	try {
		return new OperatorDragGesture(OperatorService.getOperatorDescription(operatorKey).createOperatorInstance());
	} catch (OperatorCreationException e) {
		return null;
	}
}
 
Example 13
Source File: SearchEngineIndexer.java    From gravitee-management-rest-api with Apache License 2.0 5 votes vote down vote up
public long index(Document document) throws TechnicalException {
    logger.debug("Updating a document into the Lucene index");
    String id = document.get(ID_FIELD);
    try {
        long seq = writer.updateDocument(new Term(ID_FIELD, id), document);
        writer.commit();
        return seq;
    } catch (IOException ioe) {
        logger.error("Fail to index document with ID: {}", id, ioe);
        throw new TechnicalException("Fail to index document with ID: " + id, ioe);
    }
}
 
Example 14
Source File: NGramTestSetup.java    From uyuni with GNU General Public License v2.0 5 votes vote down vote up
protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}
 
Example 15
Source File: DocumentBuilder.java    From modernmt with Apache License 2.0 4 votes vote down vote up
public static String getId(Document self) {
    return self.get(DOC_ID_FIELD);
}
 
Example 16
Source File: DumpTermsApp.java    From lucene4ir with Apache License 2.0 4 votes vote down vote up
public void extractBigramsFromStoredText() throws IOException {

        HashMap<String, Integer> hmap = new HashMap<String, Integer>();
        int n = reader.maxDoc();

        for (int i = 0; i < n; i++) {

            Document doc = reader.document(i);
            String all = doc.get(lucene4ir.Lucene4IRConstants.FIELD_ALL);
            
            Analyzer a = new StandardAnalyzer();
            TokenStream ts = a.tokenStream(null, all);
            ts.reset();
            String w1 = "";
            String w2 = "";
            while (ts.incrementToken()) {
                w1 = w2;
                w2 = ts.getAttribute(CharTermAttribute.class).toString();
                if (w1 != "") {
                    //System.out.println(w1 + " " + w2);

                    String key = w1 + " " + w2;
                    if (hmap.containsKey(key)==true) {
                        int v = hmap.get(key);
                        hmap.put(key,v+1);
                    }
                    else {
                        hmap.put(key, 1);
                    }

                }
            }
        }

        Set set = hmap.entrySet();
        Iterator iterator = set.iterator();
        while(iterator.hasNext()) {
            Map.Entry me = (Map.Entry)iterator.next();
            if ((int)me.getValue() > 2) {
                System.out.print(me.getKey() + ": ");
                System.out.println(me.getValue());
            }
        }

    }
 
Example 17
Source File: LumongoSegment.java    From lumongo with Apache License 2.0 4 votes vote down vote up
private ScoredResult.Builder handleDocResult(IndexSearcher is, SortRequest sortRequest, boolean sorting, ScoreDoc[] results, int i,
		FetchType resultFetchType, List<String> fieldsToReturn, List<String> fieldsToMask, List<LumongoHighlighter> highlighterList,
		List<AnalysisHandler> analysisHandlerList) throws Exception {
	int docId = results[i].doc;

	Set<String> fieldsToFetch = fetchSet;
	if (indexConfig.getIndexSettings().getStoreDocumentInIndex()) {
		if (FetchType.FULL.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithDocument;
		}
		else if (FetchType.META.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithMeta;
		}
	}

	Document d = is.doc(docId, fieldsToFetch);

	IndexableField f = d.getField(LumongoConstants.TIMESTAMP_FIELD);
	long timestamp = f.numericValue().longValue();

	ScoredResult.Builder srBuilder = ScoredResult.newBuilder();
	String uniqueId = d.get(LumongoConstants.ID_FIELD);

	if (!highlighterList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Highlighting requires a full fetch of the document");
	}

	if (!analysisHandlerList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Analysis requires a full fetch of the document");
	}

	if (!FetchType.NONE.equals(resultFetchType)) {
		handleStoredDoc(srBuilder, uniqueId, d, resultFetchType, fieldsToReturn, fieldsToMask, highlighterList, analysisHandlerList);
	}

	srBuilder.setScore(results[i].score);

	srBuilder.setUniqueId(uniqueId);

	srBuilder.setTimestamp(timestamp);

	srBuilder.setDocId(docId);
	srBuilder.setSegment(segmentNumber);
	srBuilder.setIndexName(indexName);
	srBuilder.setResultIndex(i);

	if (sorting) {
		handleSortValues(sortRequest, results[i], srBuilder);
	}
	return srBuilder;
}
 
Example 18
Source File: MtasDocumentIndex.java    From inception with Apache License 2.0 4 votes vote down vote up
private long doCountResults(IndexSearcher searcher,
    SearchQueryRequest aRequest, MtasSpanQuery q) throws IOException
{
    ListIterator<LeafReaderContext> leafReaderContextIterator = searcher.getIndexReader()
            .leaves().listIterator();

    Map<Long, Long> annotatableDocuments = listAnnotatableDocuments(aRequest.getProject(),
        aRequest.getUser());

    final float boost = 0;
    SpanWeight spanweight = q.rewrite(searcher.getIndexReader()).createWeight(searcher, false,
            boost);

    long numResults = 0;

    while (leafReaderContextIterator.hasNext()) {
        LeafReaderContext leafReaderContext = leafReaderContextIterator.next();
        try {
            Spans spans = spanweight.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
            SegmentReader segmentReader = (SegmentReader) leafReaderContext.reader();
            if (spans != null) {
                while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                    if (segmentReader.numDocs() == segmentReader.maxDoc()
                            || segmentReader.getLiveDocs().get(spans.docID())) {
                        Document document = segmentReader.document(spans.docID());

                        // Retrieve user
                        String user = document.get(FIELD_USER);

                        // Retrieve source and annotation document ids
                        String rawSourceDocumentId = document.get(FIELD_SOURCE_DOCUMENT_ID);
                        String rawAnnotationDocumentId = document
                                .get(FIELD_ANNOTATION_DOCUMENT_ID);
                        if (rawSourceDocumentId == null || rawAnnotationDocumentId == null) {
                            log.trace("Indexed document lacks source/annotation document IDs"
                                    + " - source: {}, annotation: {}", rawSourceDocumentId,
                                rawAnnotationDocumentId);
                            continue;

                        }
                        long sourceDocumentId = Long.valueOf(rawSourceDocumentId);
                        long annotationDocumentId = Long.valueOf(rawAnnotationDocumentId);

                        // If the query is limited to a given document, skip any results
                        // which are not in the given document
                        Optional<SourceDocument> limitedToDocument = aRequest
                                .getLimitedToDocument();
                        if (limitedToDocument.isPresent() && !Objects
                            .equals(limitedToDocument.get().getId(), sourceDocumentId)) {
                            log.trace("Query limited to document {}, skipping results for "
                                    + "document {}", limitedToDocument.get().getId(),
                                sourceDocumentId);
                            continue;
                        }

                        if (annotatableDocuments.containsKey(sourceDocumentId)
                            && annotationDocumentId == -1) {
                            // Exclude result if the retrieved document is a sourcedocument
                            // (that is, has annotationDocument = -1) AND it has a
                            // corresponding annotation document for this user
                            log.trace("Skipping results from indexed source document {} in" 
                                + "favor of results from the corresponding annotation "
                                + "document", sourceDocumentId);
                            continue;
                        }
                        else if (annotationDocumentId != -1 && !aRequest.getUser().getUsername()
                            .equals(user)) {
                            // Exclude result if the retrieved document is an annotation
                            // document (that is, annotationDocument != -1 and its username
                            // is different from the quering user
                            log.trace("Skipping results from annotation document for user {} "
                                    + "which does not match the requested user {}", user,
                                aRequest.getUser().getUsername());
                            continue;
                        }

                        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                            numResults++;
                        }
                    }
                }
            }
        }
        catch (Exception e) {
            log.error("Unable to process query results", e);
            numResults = -1;
        }
    }
    return numResults;
}
 
Example 19
Source File: ExampleStatsApp.java    From lucene4ir with Apache License 2.0 4 votes vote down vote up
public void extractBigramsFromStoredText() throws IOException {

        HashMap<String, Integer> hmap = new HashMap<String, Integer>();
        int n = reader.maxDoc();

        for (int i = 0; i < n; i++) {

            Document doc = reader.document(i);
            String all = doc.get(Lucene4IRConstants.FIELD_ALL);

            //String[] words = all.split(" ");
            //for(String w: words ){
            //    System.out.println(w);
            //}

//        int n = words.length;
            //      for (int i=1; i<n; i++){
            //        System.out.println(words[i-1].toLowerCase().trim() + " " + words[i].toLowerCase().trim());
            //   }

            Analyzer a = new StandardAnalyzer();
            TokenStream ts = a.tokenStream(null, all);
            ts.reset();
            String w1 = "";
            String w2 = "";
            while (ts.incrementToken()) {
                w1 = w2;
                w2 = ts.getAttribute(CharTermAttribute.class).toString();
                if (w1 != "") {
                    //System.out.println(w1 + " " + w2);

                    String key = w1 + " " + w2;
                    if (hmap.containsKey(key)==true) {
                        int v = hmap.get(key);
                        hmap.put(key,v+1);
                    }
                    else {
                        hmap.put(key, 1);
                    }

                }
            }
        }

        Set set = hmap.entrySet();
        Iterator iterator = set.iterator();
        while(iterator.hasNext()) {
            Map.Entry me = (Map.Entry)iterator.next();
            if ((int)me.getValue() > 2) {
                System.out.print(me.getKey() + ": ");
                System.out.println(me.getValue());
            }
        }

    }
 
Example 20
Source File: RetrievalAppQueryExpansion.java    From lucene4ir with Apache License 2.0 4 votes vote down vote up
public void processQueryFile(){
    /*
    Assumes the query file contains a qno followed by the query terms.
    One query per line. i.e.

    Q1 hello world
    Q2 hello hello
    Q3 hello etc
     */
    try {
        BufferedReader br = new BufferedReader(new FileReader(p.queryFile));
        File file = new File(p.resultFile);
        FileWriter fw = new FileWriter(file);

        try {
            String line = br.readLine();
            while (line != null){

                String[] parts = line.split(" ");
                String qno = parts[0];
                String queryTerms = "";
                for (int i=1; i<parts.length; i++) {
                    queryTerms = queryTerms + " " + parts[i];
                }

                ScoreDoc[] scored = runQuery(qno, queryTerms);

                int n = Math.min(p.maxResults, scored.length);

                for(int i=0; i<n; i++){
                    Document doc = searcher.doc(scored[i].doc);
                    String docno = doc.get("docnum");
                    fw.write(qno + " QO " + docno + " " + (i+1) + " " + scored[i].score + " " + p.runTag);
                    fw.write(System.lineSeparator());
                }

                line = br.readLine();
            }

        } finally {
            br.close();
            fw.close();
        }
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }



}