Java Code Examples for org.apache.lucene.document.Document#get()

The following examples show how to use org.apache.lucene.document.Document#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: orientdb-lucene   File: LuceneResultSet.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public OIdentifiable next() {
  if (localIndex == array.length) {
    localIndex = 0;
    fetchMoreResult();
  }
  final ScoreDoc score = array[localIndex++];
  Document ret = null;
  OContextualRecordId res = null;
  try {
    ret = queryContext.searcher.doc(score.doc);
    String rId = ret.get(OLuceneIndexManagerAbstract.RID);
    res = new OContextualRecordId(rId);
    manager.onRecordAddedToResultSet(queryContext, res, ret, score);
  } catch (IOException e) {
    e.printStackTrace();
  }
  index++;
  return res;
}
 
Example 2
public void remove(Document document) throws TechnicalException {
    String type = document.get(TYPE_FIELD);
    String id = document.get(ID_FIELD);

    logger.debug("Removing document type[{}] ID[{}]", type, id);

    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(new TermQuery(new Term(ID_FIELD, id)), BooleanClause.Occur.MUST);
    bq.add(new TermQuery(new Term(TYPE_FIELD, type)), BooleanClause.Occur.MUST);

    try {
        writer.deleteDocuments(bq.build());
    } catch (IOException ioe) {
        logger.error("Fail to index document with ID: {}", id, ioe);
        throw new TechnicalException("Fail to index document with ID: " + id, ioe);
    }
}
 
Example 3
Source Project: lucene4ir   File: ExampleStatsApp.java    License: Apache License 2.0 6 votes vote down vote up
public void countFieldData() throws IOException {
    int n = reader.maxDoc();
    int nt = 0;
    int nc = 0;

    for (int i = 0; i < n; i++) {
        Document doc = reader.document(i);

        // the doc.get pulls out the values stored - ONLY if you store the fields
        String title = doc.get(Lucene4IRConstants.FIELD_TITLE);
        String content = doc.get(Lucene4IRConstants.FIELD_CONTENT);
        if (title.length()>0){
            nt++;
        }
        if (content.length()>0){
            nc++;
        }
    }
    System.out.println("Num Docs: " +n + " Docs with Title text: " + nt + " Docs with Contents text: "+ nc);


}
 
Example 4
Source Project: lucene4ir   File: ExampleStatsApp.java    License: Apache License 2.0 6 votes vote down vote up
public void iterateThroughDocList()  throws IOException {
    int n = reader.maxDoc();
    if (n>100) {
        n = 100;
    }
    for (int i = 0; i < n; i++) {
        Document doc = reader.document(i);
        // the doc.get pulls out the values stored - ONLY if you store the fields
        String docnum = doc.get("docnum");
        String title = doc.get("title");
        System.out.println("ID: " + i);
        System.out.println("docnum and title: " + docnum + " " + title);
        //System.out.println(doc.get("content"));
        iterateThroughDocTermVector(i);
    }
}
 
Example 5
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores,
                               RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException {
    Document d = searcherUnderTest.doc(scoreDoc.doc);
    String idVal = d.get("id");
    int docId = Integer.decode(idVal);
    float modelScore = scores[docId];
    float queryScore = scoreDoc.score;

    assertEquals("Scores match with similarity " + similarity.getClass(), modelScore,
            queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore));

    if (!(similarity instanceof TFIDFSimilarity)) {
        // There are precision issues with these similarities when using explain
        // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain
        Explanation expl = searcherUnderTest.explain(ltrQuery, docId);

        assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(),
                queryScore, 5 * Math.ulp(modelScore));
        checkFeatureNames(expl, features);
    }
}
 
Example 6
protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}
 
Example 7
Source Project: lucene-solr   File: TestFuzzyQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testSingleQueryExactMatchScoresHighest() throws Exception {
  //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking 
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smythe", writer);
  addDoc("smdssasd", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
  writer.close();
  String searchTerms[] = { "smith", "smythe", "smdssasd" };
  for (String searchTerm : searchTerms) {
    FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1);
    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
    Document bestDoc = searcher.doc(hits[0].doc);
    assertTrue(hits.length > 0);
    String topMatch = bestDoc.get("field");
    assertEquals(searchTerm, topMatch);
    if (hits.length > 1) {
      Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
      String worstMatch = worstDoc.get("field");
      assertNotSame(searchTerm, worstMatch);
    }
  }
  reader.close();
  directory.close();
}
 
Example 8
Source Project: olat   File: OlatFullIndexer.java    License: Apache License 2.0 5 votes vote down vote up
private void incrementDocumentTypeCounter(final Document document) {
    final String documentType = document.get(AbstractOlatDocument.DOCUMENTTYPE_FIELD_NAME);
    int intValue = 0;
    if (documentCounters.containsKey(documentType)) {
        final Integer docCounter = documentCounters.get(documentType);
        intValue = docCounter.intValue();
    }
    intValue++;
    documentCounters.put(documentType, new Integer(intValue));
}
 
Example 9
Source Project: olat   File: SearchResultsImpl.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a result document. Return null if the identity has not enough privileges to see the document.
 * 
 * @param doc
 * @param query
 * @param analyzer
 * @param doHighlight
 * @param identity
 * @param roles
 * @return
 * @throws IOException
 */
private ResultDocument createResultDocument(final Document doc, final int pos, final Query query, final Analyzer analyzer, final boolean doHighlight,
        final Identity identity, final Roles roles) throws IOException {
    boolean hasAccess = false;
    if (roles.isOLATAdmin()) {
        hasAccess = true;
    } else {
        String resourceUrl = doc.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
        if (resourceUrl == null) {
            resourceUrl = "";
        }

        final BusinessControl businessControl = BusinessControlFactory.getInstance().createFromString(resourceUrl);
        hasAccess = mainIndexer.checkAccess(null, businessControl, identity, roles);
    }

    ResultDocument resultDoc;
    if (hasAccess) {
        resultDoc = new ResultDocument(doc, pos);
        if (doHighlight) {
            doHighlight(query, analyzer, doc, resultDoc);
        }
    } else {
        resultDoc = null;
    }
    return resultDoc;
}
 
Example 10
private List<Triple> getFromIndex(int maxNumberOfResults, BooleanQuery bq) throws IOException {
	 log.debug("\t start asking index by context...");
	ScoreDoc[] hits = isearcher.search(bq, null, maxNumberOfResults).scoreDocs;

	if (hits.length == 0) {
		return new ArrayList<Triple>();
	}
	List<Triple> triples = new ArrayList<Triple>();
	String s, p, o;
	for (int i = 0; i < hits.length; i++) {
		Document hitDoc = isearcher.doc(hits[i].doc);
		s = hitDoc.get(FIELD_NAME_CONTEXT);
		p = hitDoc.get(FIELD_NAME_URI);
		o = hitDoc.get(FIELD_NAME_URI_COUNT);
		Triple triple = new Triple(s, p, o);
		triples.add(triple);
	}
	log.debug("\t finished asking index...");

	Collections.sort(triples);

	if (triples.size() < 500) {
		return triples.subList(0, triples.size());
	} else {
	return triples.subList(0, 500);
	}
}
 
Example 11
Source Project: TinyMooc   File: CourseServiceImpl.java    License: Apache License 2.0 5 votes vote down vote up
public List<Course> getCourses(String query) {
    try {
        List<Course> qlist = new ArrayList<Course>();
        IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH);
        long begin = new Date().getTime();
        //下面的是进行title,content 两个范围内进行收索. SHOULD 表示OR
        BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
        Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"courseIntro", "courseTitle"}, clauses, new StandardAnalyzer());//parser.parse(query);
        Filter filter = null;
        //################# 搜索相似度最高的记录 ###################
        TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000);
        Course course = null;

        //输出结果
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document targetDoc = indexSearcher.doc(scoreDoc.doc);
            course = new Course();
            String courseIntro = targetDoc.get("courseIntro");
            String courseTitle = targetDoc.get("courseTitle");
            String courseId = targetDoc.get("courseId");
            TokenStream contentTokenStream = analyzer.tokenStream("courseIntro", new StringReader(courseIntro));
            TokenStream titleTokenStream = analyzer.tokenStream("courseTitle", new StringReader(courseTitle));
            course.setCourseIntro(courseIntro);
            course.setCourseTitle(courseTitle);
            course.setCourseId(courseId);
            course.setType(targetDoc.get("type"));
            course.setCourseState(targetDoc.get("courseState"));
            qlist.add(course);
        }
        indexSearcher.close();
        return qlist;
    } catch (Exception e) {
        logger.error("getCourses error.");
        return null;
    }
}
 
Example 12
Source Project: lucene4ir   File: ExampleStatsApp.java    License: Apache License 2.0 5 votes vote down vote up
public void iterateThroughDocListAll()  throws IOException {
    int n = reader.maxDoc();
    for (int i = 0; i < n; i++) {
        Document doc = reader.document(i);
        // the doc.get pulls out the values stored - ONLY if you store the fields
        String docnum = doc.get("docnum");
        String all = doc.get(Lucene4IRConstants.FIELD_ALL).trim();
        if (all.length() == 0) {
            System.out.println("docnum: " + docnum);
        }
    }
}
 
Example 13
@Override
public DragGestureListener getDragAndDropSupport(final Document document) {
	String operatorKey = document.get(GlobalSearchUtilities.FIELD_UNIQUE_ID);
	if (operatorKey == null) {
		LogService.getRoot().log(Level.WARNING, "com.rapidminer.gui.processeditor.global_search.OperatorSearchManager.error.no_key");
		return null;
	}

	try {
		return new OperatorDragGesture(OperatorService.getOperatorDescription(operatorKey).createOperatorInstance());
	} catch (OperatorCreationException e) {
		return null;
	}
}
 
Example 14
public long index(Document document) throws TechnicalException {
    logger.debug("Updating a document into the Lucene index");
    String id = document.get(ID_FIELD);
    try {
        long seq = writer.updateDocument(new Term(ID_FIELD, id), document);
        writer.commit();
        return seq;
    } catch (IOException ioe) {
        logger.error("Fail to index document with ID: {}", id, ioe);
        throw new TechnicalException("Fail to index document with ID: " + id, ioe);
    }
}
 
Example 15
Source Project: inception   File: MtasDocumentIndex.java    License: Apache License 2.0 4 votes vote down vote up
private long doCountResults(IndexSearcher searcher,
    SearchQueryRequest aRequest, MtasSpanQuery q) throws IOException
{
    ListIterator<LeafReaderContext> leafReaderContextIterator = searcher.getIndexReader()
            .leaves().listIterator();

    Map<Long, Long> annotatableDocuments = listAnnotatableDocuments(aRequest.getProject(),
        aRequest.getUser());

    final float boost = 0;
    SpanWeight spanweight = q.rewrite(searcher.getIndexReader()).createWeight(searcher, false,
            boost);

    long numResults = 0;

    while (leafReaderContextIterator.hasNext()) {
        LeafReaderContext leafReaderContext = leafReaderContextIterator.next();
        try {
            Spans spans = spanweight.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
            SegmentReader segmentReader = (SegmentReader) leafReaderContext.reader();
            if (spans != null) {
                while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                    if (segmentReader.numDocs() == segmentReader.maxDoc()
                            || segmentReader.getLiveDocs().get(spans.docID())) {
                        Document document = segmentReader.document(spans.docID());

                        // Retrieve user
                        String user = document.get(FIELD_USER);

                        // Retrieve source and annotation document ids
                        String rawSourceDocumentId = document.get(FIELD_SOURCE_DOCUMENT_ID);
                        String rawAnnotationDocumentId = document
                                .get(FIELD_ANNOTATION_DOCUMENT_ID);
                        if (rawSourceDocumentId == null || rawAnnotationDocumentId == null) {
                            log.trace("Indexed document lacks source/annotation document IDs"
                                    + " - source: {}, annotation: {}", rawSourceDocumentId,
                                rawAnnotationDocumentId);
                            continue;

                        }
                        long sourceDocumentId = Long.valueOf(rawSourceDocumentId);
                        long annotationDocumentId = Long.valueOf(rawAnnotationDocumentId);

                        // If the query is limited to a given document, skip any results
                        // which are not in the given document
                        Optional<SourceDocument> limitedToDocument = aRequest
                                .getLimitedToDocument();
                        if (limitedToDocument.isPresent() && !Objects
                            .equals(limitedToDocument.get().getId(), sourceDocumentId)) {
                            log.trace("Query limited to document {}, skipping results for "
                                    + "document {}", limitedToDocument.get().getId(),
                                sourceDocumentId);
                            continue;
                        }

                        if (annotatableDocuments.containsKey(sourceDocumentId)
                            && annotationDocumentId == -1) {
                            // Exclude result if the retrieved document is a sourcedocument
                            // (that is, has annotationDocument = -1) AND it has a
                            // corresponding annotation document for this user
                            log.trace("Skipping results from indexed source document {} in" 
                                + "favor of results from the corresponding annotation "
                                + "document", sourceDocumentId);
                            continue;
                        }
                        else if (annotationDocumentId != -1 && !aRequest.getUser().getUsername()
                            .equals(user)) {
                            // Exclude result if the retrieved document is an annotation
                            // document (that is, annotationDocument != -1 and its username
                            // is different from the quering user
                            log.trace("Skipping results from annotation document for user {} "
                                    + "which does not match the requested user {}", user,
                                aRequest.getUser().getUsername());
                            continue;
                        }

                        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                            numResults++;
                        }
                    }
                }
            }
        }
        catch (Exception e) {
            log.error("Unable to process query results", e);
            numResults = -1;
        }
    }
    return numResults;
}
 
Example 16
Source Project: modernmt   File: DocumentBuilder.java    License: Apache License 2.0 4 votes vote down vote up
public static String getId(Document self) {
    return self.get(DOC_ID_FIELD);
}
 
Example 17
Source Project: lucene4ir   File: DumpTermsApp.java    License: Apache License 2.0 4 votes vote down vote up
public void extractBigramsFromStoredText() throws IOException {

        HashMap<String, Integer> hmap = new HashMap<String, Integer>();
        int n = reader.maxDoc();

        for (int i = 0; i < n; i++) {

            Document doc = reader.document(i);
            String all = doc.get(lucene4ir.Lucene4IRConstants.FIELD_ALL);
            
            Analyzer a = new StandardAnalyzer();
            TokenStream ts = a.tokenStream(null, all);
            ts.reset();
            String w1 = "";
            String w2 = "";
            while (ts.incrementToken()) {
                w1 = w2;
                w2 = ts.getAttribute(CharTermAttribute.class).toString();
                if (w1 != "") {
                    //System.out.println(w1 + " " + w2);

                    String key = w1 + " " + w2;
                    if (hmap.containsKey(key)==true) {
                        int v = hmap.get(key);
                        hmap.put(key,v+1);
                    }
                    else {
                        hmap.put(key, 1);
                    }

                }
            }
        }

        Set set = hmap.entrySet();
        Iterator iterator = set.iterator();
        while(iterator.hasNext()) {
            Map.Entry me = (Map.Entry)iterator.next();
            if ((int)me.getValue() > 2) {
                System.out.print(me.getKey() + ": ");
                System.out.println(me.getValue());
            }
        }

    }
 
Example 18
Source Project: lumongo   File: LumongoSegment.java    License: Apache License 2.0 4 votes vote down vote up
private ScoredResult.Builder handleDocResult(IndexSearcher is, SortRequest sortRequest, boolean sorting, ScoreDoc[] results, int i,
		FetchType resultFetchType, List<String> fieldsToReturn, List<String> fieldsToMask, List<LumongoHighlighter> highlighterList,
		List<AnalysisHandler> analysisHandlerList) throws Exception {
	int docId = results[i].doc;

	Set<String> fieldsToFetch = fetchSet;
	if (indexConfig.getIndexSettings().getStoreDocumentInIndex()) {
		if (FetchType.FULL.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithDocument;
		}
		else if (FetchType.META.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithMeta;
		}
	}

	Document d = is.doc(docId, fieldsToFetch);

	IndexableField f = d.getField(LumongoConstants.TIMESTAMP_FIELD);
	long timestamp = f.numericValue().longValue();

	ScoredResult.Builder srBuilder = ScoredResult.newBuilder();
	String uniqueId = d.get(LumongoConstants.ID_FIELD);

	if (!highlighterList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Highlighting requires a full fetch of the document");
	}

	if (!analysisHandlerList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Analysis requires a full fetch of the document");
	}

	if (!FetchType.NONE.equals(resultFetchType)) {
		handleStoredDoc(srBuilder, uniqueId, d, resultFetchType, fieldsToReturn, fieldsToMask, highlighterList, analysisHandlerList);
	}

	srBuilder.setScore(results[i].score);

	srBuilder.setUniqueId(uniqueId);

	srBuilder.setTimestamp(timestamp);

	srBuilder.setDocId(docId);
	srBuilder.setSegment(segmentNumber);
	srBuilder.setIndexName(indexName);
	srBuilder.setResultIndex(i);

	if (sorting) {
		handleSortValues(sortRequest, results[i], srBuilder);
	}
	return srBuilder;
}
 
Example 19
Source Project: lucene4ir   File: ExampleStatsApp.java    License: Apache License 2.0 4 votes vote down vote up
public void extractBigramsFromStoredText() throws IOException {

        HashMap<String, Integer> hmap = new HashMap<String, Integer>();
        int n = reader.maxDoc();

        for (int i = 0; i < n; i++) {

            Document doc = reader.document(i);
            String all = doc.get(Lucene4IRConstants.FIELD_ALL);

            //String[] words = all.split(" ");
            //for(String w: words ){
            //    System.out.println(w);
            //}

//        int n = words.length;
            //      for (int i=1; i<n; i++){
            //        System.out.println(words[i-1].toLowerCase().trim() + " " + words[i].toLowerCase().trim());
            //   }

            Analyzer a = new StandardAnalyzer();
            TokenStream ts = a.tokenStream(null, all);
            ts.reset();
            String w1 = "";
            String w2 = "";
            while (ts.incrementToken()) {
                w1 = w2;
                w2 = ts.getAttribute(CharTermAttribute.class).toString();
                if (w1 != "") {
                    //System.out.println(w1 + " " + w2);

                    String key = w1 + " " + w2;
                    if (hmap.containsKey(key)==true) {
                        int v = hmap.get(key);
                        hmap.put(key,v+1);
                    }
                    else {
                        hmap.put(key, 1);
                    }

                }
            }
        }

        Set set = hmap.entrySet();
        Iterator iterator = set.iterator();
        while(iterator.hasNext()) {
            Map.Entry me = (Map.Entry)iterator.next();
            if ((int)me.getValue() > 2) {
                System.out.print(me.getKey() + ": ");
                System.out.println(me.getValue());
            }
        }

    }
 
Example 20
public void processQueryFile(){
    /*
    Assumes the query file contains a qno followed by the query terms.
    One query per line. i.e.

    Q1 hello world
    Q2 hello hello
    Q3 hello etc
     */
    try {
        BufferedReader br = new BufferedReader(new FileReader(p.queryFile));
        File file = new File(p.resultFile);
        FileWriter fw = new FileWriter(file);

        try {
            String line = br.readLine();
            while (line != null){

                String[] parts = line.split(" ");
                String qno = parts[0];
                String queryTerms = "";
                for (int i=1; i<parts.length; i++) {
                    queryTerms = queryTerms + " " + parts[i];
                }

                ScoreDoc[] scored = runQuery(qno, queryTerms);

                int n = Math.min(p.maxResults, scored.length);

                for(int i=0; i<n; i++){
                    Document doc = searcher.doc(scored[i].doc);
                    String docno = doc.get("docnum");
                    fw.write(qno + " QO " + docno + " " + (i+1) + " " + scored[i].score + " " + p.runTag);
                    fw.write(System.lineSeparator());
                }

                line = br.readLine();
            }

        } finally {
            br.close();
            fw.close();
        }
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }



}