Java Code Examples for org.apache.lucene.index.PostingsEnum#endOffset()
The following examples show how to use
org.apache.lucene.index.PostingsEnum#endOffset() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TermVectorsResponse.java From Elasticsearch with Apache License 2.0 | 6 votes |
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException { for (int j = 0; j < termFreq; j++) { int nextPos = posEnum.nextPosition(); if (curTerms.hasPositions()) { currentPositions[j] = nextPos; } if (curTerms.hasOffsets()) { currentStartOffset[j] = posEnum.startOffset(); currentEndOffset[j] = posEnum.endOffset(); } if (curTerms.hasPayloads()) { BytesRef curPayload = posEnum.getPayload(); if (curPayload != null) { currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length); } else { currentPayloads[j] = null; } } } }
Example 2
Source File: TermPosting.java From lucene-solr with Apache License 2.0 | 6 votes |
static TermPosting of(int position, PostingsEnum penum) throws IOException { TermPosting posting = new TermPosting(); // set position posting.position = position; // set offset (if available) int sOffset = penum.startOffset(); int eOffset = penum.endOffset(); if (sOffset >= 0 && eOffset >= 0) { posting.startOffset = sOffset; posting.endOffset = eOffset; } // set payload (if available) if (penum.getPayload() != null) { posting.payload = BytesRef.deepCopyOf(penum.getPayload()); } return posting; }
Example 3
Source File: TermVectorEntry.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Returns a new position entry representing the specified posting, and optionally, start and end offsets. * @param pos - term position * @param pe - positioned postings iterator * @return position entry * @throws IOException - if there is a low level IO error. */ static TermVectorPosition of(int pos, PostingsEnum pe) throws IOException { Objects.requireNonNull(pe); int sOffset = pe.startOffset(); int eOffset = pe.endOffset(); if (sOffset >= 0 && eOffset >= 0) { return new TermVectorPosition(pos, sOffset, eOffset); } return new TermVectorPosition(pos); }
Example 4
Source File: FrequencyCtxSentenceBasedFBWorker.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException { List<MWESentenceContext> result = new ArrayList<>(); TermsEnum tiRef= termVectorLookup.iterator(); BytesRef luceneTerm = tiRef.next(); while (luceneTerm != null) { if (luceneTerm.length == 0) { luceneTerm = tiRef.next(); continue; } String tString = luceneTerm.utf8ToString(); if(!allCandidates.contains(tString)) { luceneTerm=tiRef.next(); continue; } PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL); //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS); int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV if (doc != PostingsEnum.NO_MORE_DOCS) { int totalOccurrence = postingsEnum.freq(); for (int i = 0; i < totalOccurrence; i++) { postingsEnum.nextPosition(); int start = postingsEnum.startOffset(); int end = postingsEnum.endOffset(); BytesRef payload=postingsEnum.getPayload(); int sentenceId=-1; if(payload!=null){ sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId(); } result.add(new MWESentenceContext(tString,sentenceId, start, end)); } } luceneTerm = tiRef.next(); } Collections.sort(result); return result; }
Example 5
Source File: WindowBuildingTVM.java From wiseowl with MIT License | 4 votes |
public void map(Terms terms,Spans spans) throws IOException { int primStart = spanStart - primaryWS; int primEnd = spanEnd + primaryWS; // stores the start and end of the adjacent previous and following int adjLBStart = primStart - adjWS; int adjLBEnd = primStart - 1;//don't overlap int adjUBStart = primEnd + 1;//don't overlap int adjUBEnd = primEnd + adjWS; //stores the start and end of the secondary previous and the secondary following int secLBStart = adjLBStart - secWS; int secLBEnd = adjLBStart - 1; //don't overlap the adjacent window int secUBStart = adjUBEnd + 1; int secUBEnd = adjUBEnd + secWS; WindowTerm lastWT = null; if(terms!=null) {} TermsEnum termsEnum = terms.iterator(); BytesRef termref = null; String term=null; while ((termref = termsEnum.next()) != null) { term=termsEnum.term().utf8ToString(); PostingsEnum postings = termsEnum.postings(null, PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS); postings.nextDoc(); if (term.startsWith(NameFilter.NE_PREFIX) == false && term.startsWith(PassageRankingComponent.NE_PREFIX_LOWER) == false) {//filter out the types, as we don't need them here //construct the windows, which means we need a bunch of //bracketing variables to know what window we are in //start and end of the primary window //unfortunately, we still have to loop over the positions //we'll make this inclusive of the boundaries, do an upfront check here so //we can skip over anything that is outside of all windows //int position=spans.nextStartPosition(); int position=postings.nextPosition(); if (position >= secLBStart && position <= secUBEnd) { //fill in the windows WindowTerm wt; //offsets aren't required, but they are nice to have if (postings != null){ //log.warn("terms if postings!=null {}",term); wt = new WindowTerm(term, position, postings.startOffset(), postings.endOffset()); } else { wt = new WindowTerm(term, position); //log.warn("terms if postings==null {}",term); } if (position >= primStart && position <= primEnd) {//are we in the primary window passage.terms.add(wt); //we are only going to keep bigrams for the primary window. You could do it for the other windows, too if (lastWT != null) { WindowTerm bigramWT = new WindowTerm(lastWT.term + "," + term, lastWT.position);//we don't care about offsets for bigrams passage.bigrams.add(bigramWT); } lastWT = wt; } else if (position >= secLBStart && position <= secLBEnd) { //are we in the secondary previous window? passage.secPrevTerms.add(wt); } else if (position >= secUBStart && position <= secUBEnd) {//are we in the secondary following window? passage.secFollowTerms.add(wt); } else if (position >= adjLBStart && position <= adjLBEnd) {//are we in the adjacent previous window? passage.prevTerms.add(wt); } else if (position >= adjUBStart && position <= adjUBEnd) {//are we in the adjacent following window? passage.followTerms.add(wt); } } //} }} }
Example 6
Source File: PayloadFilteredTermIntervalsSource.java From lucene-solr with Apache License 2.0 | 4 votes |
private IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException { PostingsEnum pe = te.postings(null, PostingsEnum.ALL); if (pe.advance(doc) != doc) { return null; } return new IntervalMatchesIterator() { @Override public int gaps() { return 0; } @Override public int width() { return 1; } int upto = pe.freq(); int pos = -1; @Override public boolean next() throws IOException { do { if (upto <= 0) { pos = IntervalIterator.NO_MORE_INTERVALS; return false; } upto--; pos = pe.nextPosition(); } while (filter.test(pe.getPayload()) == false); return true; } @Override public int startPosition() { return pos; } @Override public int endPosition() { return pos; } @Override public int startOffset() throws IOException { return pe.startOffset(); } @Override public int endOffset() throws IOException { return pe.endOffset(); } @Override public MatchesIterator getSubMatches() { return null; } @Override public Query getQuery() { throw new UnsupportedOperationException(); } }; }
Example 7
Source File: TermIntervalsSource.java From lucene-solr with Apache License 2.0 | 4 votes |
static IntervalMatchesIterator matches(TermsEnum te, int doc, String field) throws IOException { TermQuery query = new TermQuery(new Term(field, te.term())); PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS); if (pe.advance(doc) != doc) { return null; } return new IntervalMatchesIterator() { @Override public int gaps() { return 0; } @Override public int width() { return 1; } int upto = pe.freq(); int pos = -1; @Override public boolean next() throws IOException { if (upto <= 0) { pos = IntervalIterator.NO_MORE_INTERVALS; return false; } upto--; pos = pe.nextPosition(); return true; } @Override public int startPosition() { return pos; } @Override public int endPosition() { return pos; } @Override public int startOffset() throws IOException { return pe.startOffset(); } @Override public int endOffset() throws IOException { return pe.endOffset(); } @Override public MatchesIterator getSubMatches() { return null; } @Override public Query getQuery() { return query; } }; }
Example 8
Source File: TestMatchesIterator.java From lucene-solr with Apache License 2.0 | 4 votes |
public TermMatch(PostingsEnum pe, int position) throws IOException { this.position = position; this.startOffset = pe.startOffset(); this.endOffset = pe.endOffset(); }
Example 9
Source File: FrequencyCtxWindowBasedFBWorker.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
private List<MWEInSentence> collectTermSentenceContext(Terms termVectorLookup, Map<Integer, Integer> sentenceBoundaries) throws IOException { List<MWEInSentence> result = new ArrayList<>(); TermsEnum tiRef = termVectorLookup.iterator(); BytesRef luceneTerm = tiRef.next(); while (luceneTerm != null) { if (luceneTerm.length == 0) { luceneTerm = tiRef.next(); continue; } String tString = luceneTerm.utf8ToString(); if (!allCandidates.contains(tString)) { luceneTerm = tiRef.next(); continue; } PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL); //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS); int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV if (doc != PostingsEnum.NO_MORE_DOCS) { int totalOccurrence = postingsEnum.freq(); for (int i = 0; i < totalOccurrence; i++) { postingsEnum.nextPosition(); int start = postingsEnum.startOffset(); int end = postingsEnum.endOffset(); BytesRef payload = postingsEnum.getPayload(); SentenceContext sentenceContextInfo = null; if (payload != null) { sentenceContextInfo = new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())); } if (sentenceContextInfo == null) result.add(new MWEInSentence(tString, start, end, 0, 0, 0)); else { result.add(new MWEInSentence(tString, start, end, sentenceContextInfo.getFirstTokenIdx(), sentenceContextInfo.getLastTokenIdx(), sentenceContextInfo.getSentenceId())); Integer endBound = sentenceBoundaries.get(sentenceContextInfo.getSentenceId()); if (endBound == null || endBound < sentenceContextInfo.getLastTokenIdx()) sentenceBoundaries.put(sentenceContextInfo.getSentenceId(), sentenceContextInfo.getLastTokenIdx()); } } } luceneTerm = tiRef.next(); } Collections.sort(result); return result; }