Java Code Examples for org.apache.lucene.search.spans.SpanWeight#getSpans()

The following examples show how to use org.apache.lucene.search.spans.SpanWeight#getSpans() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CodecCollector.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Collect spans for occurences.
 *
 * @param occurences
 *          the occurences
 * @param prefixes
 *          the prefixes
 * @param field
 *          the field
 * @param searcher
 *          the searcher
 * @param lrc
 *          the lrc
 * @return the map
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static Map<GroupHit, Spans> collectSpansForOccurences(
    Set<GroupHit> occurences, Set<String> prefixes, String field,
    IndexSearcher searcher, LeafReaderContext lrc) throws IOException {
  Map<GroupHit, Spans> list = new HashMap<>();
  IndexReader reader = searcher.getIndexReader();
  final float boost = 0;
  for (GroupHit hit : occurences) {
    MtasSpanQuery queryHit = createQueryFromGroupHit(prefixes, field, hit);
    if (queryHit != null) {
      MtasSpanQuery queryHitRewritten = queryHit.rewrite(reader);
      SpanWeight weight = queryHitRewritten.createWeight(searcher, false,
          boost);
      Spans spans = weight.getSpans(lrc, SpanWeight.Postings.POSITIONS);
      if (spans != null) {
        list.put(hit, spans);
      }
    }
  }
  return list;
}
 
Example 2
Source File: MtasDocumentIndex.java    From inception with Apache License 2.0 4 votes vote down vote up
private long doCountResults(IndexSearcher searcher,
    SearchQueryRequest aRequest, MtasSpanQuery q) throws IOException
{
    ListIterator<LeafReaderContext> leafReaderContextIterator = searcher.getIndexReader()
            .leaves().listIterator();

    Map<Long, Long> annotatableDocuments = listAnnotatableDocuments(aRequest.getProject(),
        aRequest.getUser());

    final float boost = 0;
    SpanWeight spanweight = q.rewrite(searcher.getIndexReader()).createWeight(searcher, false,
            boost);

    long numResults = 0;

    while (leafReaderContextIterator.hasNext()) {
        LeafReaderContext leafReaderContext = leafReaderContextIterator.next();
        try {
            Spans spans = spanweight.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
            SegmentReader segmentReader = (SegmentReader) leafReaderContext.reader();
            if (spans != null) {
                while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                    if (segmentReader.numDocs() == segmentReader.maxDoc()
                            || segmentReader.getLiveDocs().get(spans.docID())) {
                        Document document = segmentReader.document(spans.docID());

                        // Retrieve user
                        String user = document.get(FIELD_USER);

                        // Retrieve source and annotation document ids
                        String rawSourceDocumentId = document.get(FIELD_SOURCE_DOCUMENT_ID);
                        String rawAnnotationDocumentId = document
                                .get(FIELD_ANNOTATION_DOCUMENT_ID);
                        if (rawSourceDocumentId == null || rawAnnotationDocumentId == null) {
                            log.trace("Indexed document lacks source/annotation document IDs"
                                    + " - source: {}, annotation: {}", rawSourceDocumentId,
                                rawAnnotationDocumentId);
                            continue;

                        }
                        long sourceDocumentId = Long.valueOf(rawSourceDocumentId);
                        long annotationDocumentId = Long.valueOf(rawAnnotationDocumentId);

                        // If the query is limited to a given document, skip any results
                        // which are not in the given document
                        Optional<SourceDocument> limitedToDocument = aRequest
                                .getLimitedToDocument();
                        if (limitedToDocument.isPresent() && !Objects
                            .equals(limitedToDocument.get().getId(), sourceDocumentId)) {
                            log.trace("Query limited to document {}, skipping results for "
                                    + "document {}", limitedToDocument.get().getId(),
                                sourceDocumentId);
                            continue;
                        }

                        if (annotatableDocuments.containsKey(sourceDocumentId)
                            && annotationDocumentId == -1) {
                            // Exclude result if the retrieved document is a sourcedocument
                            // (that is, has annotationDocument = -1) AND it has a
                            // corresponding annotation document for this user
                            log.trace("Skipping results from indexed source document {} in" 
                                + "favor of results from the corresponding annotation "
                                + "document", sourceDocumentId);
                            continue;
                        }
                        else if (annotationDocumentId != -1 && !aRequest.getUser().getUsername()
                            .equals(user)) {
                            // Exclude result if the retrieved document is an annotation
                            // document (that is, annotationDocument != -1 and its username
                            // is different from the quering user
                            log.trace("Skipping results from annotation document for user {} "
                                    + "which does not match the requested user {}", user,
                                aRequest.getUser().getUsername());
                            continue;
                        }

                        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                            numResults++;
                        }
                    }
                }
            }
        }
        catch (Exception e) {
            log.error("Unable to process query results", e);
            numResults = -1;
        }
    }
    return numResults;
}
 
Example 3
Source File: MtasUimaParserLuceneTest.java    From inception with Apache License 2.0 4 votes vote down vote up
private static void doQuery(IndexReader indexReader, String field, MtasSpanQuery q,
        List<String> prefixes)
    throws IOException
{
    ListIterator<LeafReaderContext> iterator = indexReader.leaves().listIterator();
    IndexSearcher searcher = new IndexSearcher(indexReader);
    final float boost = 0;
    SpanWeight spanweight = q.rewrite(indexReader).createWeight(searcher, false, boost);

    while (iterator.hasNext()) {
        System.out.println("#### new iteration ####");
        LeafReaderContext lrc = iterator.next();
        Spans spans = spanweight.getSpans(lrc, SpanWeight.Postings.POSITIONS);
        SegmentReader segmentReader = (SegmentReader) lrc.reader();
        Terms terms = segmentReader.terms(field);
        CodecInfo mtasCodecInfo = CodecInfo.getCodecInfoFromTerms(terms);
        if (spans != null) {
            while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                if (segmentReader.numDocs() == segmentReader.maxDoc()
                        || segmentReader.getLiveDocs().get(spans.docID())) {
                    String idValue = segmentReader.document(spans.docID()).getField(FIELD_ID)
                            .stringValue();
                    System.out.println("********  New doc " + spans.docID() + "-" + idValue);
                    while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                        System.out.println("------");
                        List<MtasTokenString> tokens = mtasCodecInfo
                                .getPrefixFilteredObjectsByPositions(field, spans.docID(),
                                        prefixes, spans.startPosition(),
                                        (spans.endPosition() - 1));
                        for (MtasTokenString token : tokens) {
                            System.out.print("docId: " + (lrc.docBase + spans.docID()) + ", ");
                            System.out.print(" position: " + token.getPositionStart()
                                    + (!Objects.equals(token.getPositionEnd(),
                                            token.getPositionStart())
                                                    ? "-" + token.getPositionEnd()
                                                    : ""));
                            System.out.print(" offset: " + token.getOffsetStart() + "-"
                                    + token.getOffsetEnd());
                            System.out.print(" mtasId: " + token.getId());
                            System.out.println(" " + token.getPrefix()
                                    + (token.getPostfix() != null ? ":" + token.getPostfix()
                                            : "")
                                    + ", ");
                        }
                        System.out.println("------");
                        List<MtasTreeHit<String>> hits = mtasCodecInfo
                                .getPositionedTermsByPrefixesAndPositionRange(field,
                                        spans.docID(), prefixes, spans.startPosition(),
                                        (spans.endPosition() - 1));
                        for (MtasTreeHit<String> hit : hits) {
                            System.out.print("docId: " + (lrc.docBase + spans.docID()) + ", ");
                            System.out.print("position: " + hit.startPosition
                                    + (hit.endPosition != hit.startPosition
                                            ? "-" + hit.endPosition
                                            : ""));
                            System.out.println(" " + CodecUtil.termPrefix(hit.data)
                                    + (CodecUtil.termValue(hit.data) != null
                                            ? ":" + CodecUtil.termValue(hit.data)
                                            : "")
                                    + ", ");
                        }
                    }
                    // if (prefixes != null && !prefixes.isEmpty()) {
                    // }
                }
            }
        }
    }
}