Java Code Examples for org.apache.lucene.index.PostingsEnum#getPayload()
The following examples show how to use
org.apache.lucene.index.PostingsEnum#getPayload() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TermVectorsResponse.java From Elasticsearch with Apache License 2.0 | 6 votes |
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException { for (int j = 0; j < termFreq; j++) { int nextPos = posEnum.nextPosition(); if (curTerms.hasPositions()) { currentPositions[j] = nextPos; } if (curTerms.hasOffsets()) { currentStartOffset[j] = posEnum.startOffset(); currentEndOffset[j] = posEnum.endOffset(); } if (curTerms.hasPayloads()) { BytesRef curPayload = posEnum.getPayload(); if (curPayload != null) { currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length); } else { currentPayloads[j] = null; } } } }
Example 2
Source File: TermPosting.java From lucene-solr with Apache License 2.0 | 6 votes |
static TermPosting of(int position, PostingsEnum penum) throws IOException { TermPosting posting = new TermPosting(); // set position posting.position = position; // set offset (if available) int sOffset = penum.startOffset(); int eOffset = penum.endOffset(); if (sOffset >= 0 && eOffset >= 0) { posting.startOffset = sOffset; posting.endOffset = eOffset; } // set payload (if available) if (penum.getPayload() != null) { posting.payload = BytesRef.deepCopyOf(penum.getPayload()); } return posting; }
Example 3
Source File: SpanPayloadCheckQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (!matches) return; if (upto >= payloadToMatch.size()) { matches = false; return; } BytesRef payload = postings.getPayload(); if (payloadToMatch.get(upto) == null) { matches = payload == null; upto++; return; } if (payload == null) { matches = false; upto++; return; } matches = payloadToMatch.get(upto).bytesEquals(payload); upto++; }
Example 4
Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (!matches) return; if (upto >= payloadToMatch.size()) { matches = false; return; } BytesRef payload = postings.getPayload(); if (payloadToMatch.get(upto) == null) { matches = payload == null; upto++; return; } if (payload == null) { matches = false; upto++; return; } matches = payloadToMatch.get(upto).bytesEquals(payload); upto++; }
Example 5
Source File: PayloadScoreQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { BytesRef payload = postings.getPayload(); float payloadFactor = decoder.computePayloadFactor(payload); payloadScore = function.currentScore(docID(), getField(), in.startPosition(), in.endPosition(), payloadsSeen, payloadScore, payloadFactor); payloadsSeen++; }
Example 6
Source File: PayloadSpanCollector.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { BytesRef payload = postings.getPayload(); if (payload == null) return; final byte[] bytes = new byte[payload.length]; System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length); payloads.add(bytes); }
Example 7
Source File: FrequencyCtxSentenceBasedFBWorker.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException { List<MWESentenceContext> result = new ArrayList<>(); TermsEnum tiRef= termVectorLookup.iterator(); BytesRef luceneTerm = tiRef.next(); while (luceneTerm != null) { if (luceneTerm.length == 0) { luceneTerm = tiRef.next(); continue; } String tString = luceneTerm.utf8ToString(); if(!allCandidates.contains(tString)) { luceneTerm=tiRef.next(); continue; } PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL); //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS); int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV if (doc != PostingsEnum.NO_MORE_DOCS) { int totalOccurrence = postingsEnum.freq(); for (int i = 0; i < totalOccurrence; i++) { postingsEnum.nextPosition(); int start = postingsEnum.startOffset(); int end = postingsEnum.endOffset(); BytesRef payload=postingsEnum.getPayload(); int sentenceId=-1; if(payload!=null){ sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId(); } result.add(new MWESentenceContext(tString,sentenceId, start, end)); } } luceneTerm = tiRef.next(); } Collections.sort(result); return result; }
Example 8
Source File: VectorScoreQuery.java From solr-vector-scoring with Apache License 2.0 | 4 votes |
@Override protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException { return new CustomScoreProvider(context){ @Override public float customScore(int docID, float subQueryScore, float valSrcScore) throws IOException { float score = 0; double docVectorNorm = 0; LeafReader reader = context.reader(); Terms terms = reader.getTermVector(docID, field); if(vector.size() != terms.size()){ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "indexed and input vector array must have same length"); } TermsEnum iter = terms.iterator(); BytesRef text; while ((text = iter.next()) != null) { String term = text.utf8ToString(); float payloadValue = 0f; PostingsEnum postings = iter.postings(null, PostingsEnum.ALL); while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = postings.freq(); while (freq-- > 0) postings.nextPosition(); BytesRef payload = postings.getPayload(); payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset); if (cosine) docVectorNorm += Math.pow(payloadValue, 2.0); } score = (float)(score + payloadValue * (vector.get(Integer.parseInt(term)))); } if (cosine) { if ((docVectorNorm == 0) || (queryVectorNorm == 0)) return 0f; return (float)(score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm))); } return score; } }; }
Example 9
Source File: TestPayloadSpans.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (postings.getPayload() != null) { payloads.add(BytesRef.deepCopyOf(postings.getPayload())); } }
Example 10
Source File: TestPositionIncrement.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (postings.getPayload() != null) payloads.add(BytesRef.deepCopyOf(postings.getPayload())); }
Example 11
Source File: TermVectorComponent.java From lucene-solr with Apache License 2.0 | 4 votes |
private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader reader, int docID, TermsEnum termsEnum, String field) throws IOException { NamedList<Object> fieldNL = new NamedList<>(); docNL.add(field, fieldNL); BytesRef text; PostingsEnum dpEnum = null; while((text = termsEnum.next()) != null) { String term = text.utf8ToString(); NamedList<Object> termInfo = new NamedList<>(); fieldNL.add(term, termInfo); final int freq = (int) termsEnum.totalTermFreq(); if (fieldOptions.termFreq == true) { termInfo.add("tf", freq); } int dpEnumFlags = 0; dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0; //payloads require offsets dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0; dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0; dpEnum = termsEnum.postings(dpEnum, dpEnumFlags); boolean atNextDoc = false; if (dpEnum != null) { dpEnum.nextDoc(); atNextDoc = true; } if (atNextDoc && dpEnumFlags != 0) { NamedList<Integer> positionsNL = null; NamedList<Number> theOffsets = null; NamedList<String> thePayloads = null; for (int i = 0; i < freq; i++) { final int pos = dpEnum.nextPosition(); if (fieldOptions.positions && pos >= 0) { if (positionsNL == null) { positionsNL = new NamedList<>(); termInfo.add("positions", positionsNL); } positionsNL.add("position", pos); } int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1; if (startOffset >= 0) { if (theOffsets == null) { theOffsets = new NamedList<>(); termInfo.add("offsets", theOffsets); } theOffsets.add("start", dpEnum.startOffset()); theOffsets.add("end", dpEnum.endOffset()); } BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null; if (payload != null) { if (thePayloads == null) { thePayloads = new NamedList<>(); termInfo.add("payloads", thePayloads); } thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length)); } } } int df = 0; if (fieldOptions.docFreq || fieldOptions.tfIdf) { df = reader.docFreq(new Term(field, text)); } if (fieldOptions.docFreq) { termInfo.add("df", df); } // TODO: this is not TF/IDF by anyone's definition! if (fieldOptions.tfIdf) { double tfIdfVal = ((double) freq) / df; termInfo.add("tf-idf", tfIdfVal); } } }
Example 12
Source File: FrequencyCtxWindowBasedFBWorker.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
private List<MWEInSentence> collectTermSentenceContext(Terms termVectorLookup, Map<Integer, Integer> sentenceBoundaries) throws IOException { List<MWEInSentence> result = new ArrayList<>(); TermsEnum tiRef = termVectorLookup.iterator(); BytesRef luceneTerm = tiRef.next(); while (luceneTerm != null) { if (luceneTerm.length == 0) { luceneTerm = tiRef.next(); continue; } String tString = luceneTerm.utf8ToString(); if (!allCandidates.contains(tString)) { luceneTerm = tiRef.next(); continue; } PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL); //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS); int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV if (doc != PostingsEnum.NO_MORE_DOCS) { int totalOccurrence = postingsEnum.freq(); for (int i = 0; i < totalOccurrence; i++) { postingsEnum.nextPosition(); int start = postingsEnum.startOffset(); int end = postingsEnum.endOffset(); BytesRef payload = postingsEnum.getPayload(); SentenceContext sentenceContextInfo = null; if (payload != null) { sentenceContextInfo = new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())); } if (sentenceContextInfo == null) result.add(new MWEInSentence(tString, start, end, 0, 0, 0)); else { result.add(new MWEInSentence(tString, start, end, sentenceContextInfo.getFirstTokenIdx(), sentenceContextInfo.getLastTokenIdx(), sentenceContextInfo.getSentenceId())); Integer endBound = sentenceBoundaries.get(sentenceContextInfo.getSentenceId()); if (endBound == null || endBound < sentenceContextInfo.getLastTokenIdx()) sentenceBoundaries.put(sentenceContextInfo.getSentenceId(), sentenceContextInfo.getLastTokenIdx()); } } } luceneTerm = tiRef.next(); } Collections.sort(result); return result; }