Java Code Examples for org.apache.lucene.index.PostingsEnum#getPayload()

The following examples show how to use org.apache.lucene.index.PostingsEnum#getPayload() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TermVectorsResponse.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
    for (int j = 0; j < termFreq; j++) {
        int nextPos = posEnum.nextPosition();
        if (curTerms.hasPositions()) {
            currentPositions[j] = nextPos;
        }
        if (curTerms.hasOffsets()) {
            currentStartOffset[j] = posEnum.startOffset();
            currentEndOffset[j] = posEnum.endOffset();
        }
        if (curTerms.hasPayloads()) {
            BytesRef curPayload = posEnum.getPayload();
            if (curPayload != null) {
                currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
            } else {
                currentPayloads[j] = null;
            }
        }
    }
}
 
Example 2
Source File: TermPosting.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
static TermPosting of(int position, PostingsEnum penum) throws IOException {
  TermPosting posting = new TermPosting();

  // set position
  posting.position = position;

  // set offset (if available)
  int sOffset = penum.startOffset();
  int eOffset = penum.endOffset();
  if (sOffset >= 0 && eOffset >= 0) {
    posting.startOffset = sOffset;
    posting.endOffset = eOffset;
  }

  // set payload (if available)
  if (penum.getPayload() != null) {
    posting.payload = BytesRef.deepCopyOf(penum.getPayload());
  }

  return posting;
}
 
Example 3
Source File: SpanPayloadCheckQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  if (!matches)
    return;
  if (upto >= payloadToMatch.size()) {
    matches = false;
    return;
  }
  BytesRef payload = postings.getPayload();
  if (payloadToMatch.get(upto) == null) {
    matches = payload == null;
    upto++;
    return;
  }
  if (payload == null) {
    matches = false;
    upto++;
    return;
  }
  matches = payloadToMatch.get(upto).bytesEquals(payload);
  upto++;
}
 
Example 4
Source File: CustomSpanPayloadCheckQuery.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
    if (!matches)
        return;
    if (upto >= payloadToMatch.size()) {
        matches = false;
        return;
    }
    BytesRef payload = postings.getPayload();
    if (payloadToMatch.get(upto) == null) {
        matches = payload == null;
        upto++;
        return;
    }
    if (payload == null) {
        matches = false;
        upto++;
        return;
    }
    matches = payloadToMatch.get(upto).bytesEquals(payload);
    upto++;
}
 
Example 5
Source File: PayloadScoreQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  BytesRef payload = postings.getPayload();
  float payloadFactor = decoder.computePayloadFactor(payload);
  payloadScore = function.currentScore(docID(), getField(), in.startPosition(), in.endPosition(),
                                        payloadsSeen, payloadScore, payloadFactor);
  payloadsSeen++;
}
 
Example 6
Source File: PayloadSpanCollector.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  BytesRef payload = postings.getPayload();
  if (payload == null)
    return;
  final byte[] bytes = new byte[payload.length];
  System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
  payloads.add(bytes);
}
 
Example 7
Source File: FrequencyCtxSentenceBasedFBWorker.java    From jate with GNU Lesser General Public License v3.0 5 votes vote down vote up
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException {
    List<MWESentenceContext> result = new ArrayList<>();

    TermsEnum tiRef= termVectorLookup.iterator();
    BytesRef luceneTerm = tiRef.next();
    while (luceneTerm != null) {
        if (luceneTerm.length == 0) {
            luceneTerm = tiRef.next();
            continue;
        }
        String tString = luceneTerm.utf8ToString();
        if(!allCandidates.contains(tString)) {
            luceneTerm=tiRef.next();
            continue;
        }


        PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
        //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);

        int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
        if (doc != PostingsEnum.NO_MORE_DOCS) {
            int totalOccurrence = postingsEnum.freq();
            for (int i = 0; i < totalOccurrence; i++) {
                postingsEnum.nextPosition();
                int start = postingsEnum.startOffset();
                int end = postingsEnum.endOffset();
                BytesRef payload=postingsEnum.getPayload();
                int sentenceId=-1;
                if(payload!=null){
                    sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId();
                }
                result.add(new MWESentenceContext(tString,sentenceId, start, end));
            }
        }
        luceneTerm = tiRef.next();
    }
    Collections.sort(result);
    return result;
}
 
Example 8
Source File: VectorScoreQuery.java    From solr-vector-scoring with Apache License 2.0 4 votes vote down vote up
@Override
protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException {
	return new CustomScoreProvider(context){
		@Override
		public float customScore(int docID, float subQueryScore, float valSrcScore) throws IOException {
			float score = 0;
			double docVectorNorm = 0;
			LeafReader reader = context.reader();
			Terms terms = reader.getTermVector(docID, field);
			if(vector.size() != terms.size()){
				throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "indexed and input vector array must have same length");
			}
			TermsEnum iter = terms.iterator();
		    BytesRef text;
		    while ((text = iter.next()) != null) {
		    	String term = text.utf8ToString();
		    	float payloadValue = 0f;
		    	PostingsEnum postings = iter.postings(null, PostingsEnum.ALL);
		    	while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
		    		int freq = postings.freq();
		    		while (freq-- > 0) postings.nextPosition();

		    		BytesRef payload = postings.getPayload();
		    		payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset); 
		    		
		    		if (cosine)
		              docVectorNorm += Math.pow(payloadValue, 2.0);
		    	}
		    		
		    	score = (float)(score + payloadValue * (vector.get(Integer.parseInt(term))));
		    }
		    
		    if (cosine) {
		      if ((docVectorNorm == 0) || (queryVectorNorm == 0)) return 0f;
		      return (float)(score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm)));
		    }

			return score;
		}
	};
}
 
Example 9
Source File: TestPayloadSpans.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  if (postings.getPayload() != null) {
    payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
  }
}
 
Example 10
Source File: TestPositionIncrement.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  if (postings.getPayload() != null)
    payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
}
 
Example 11
Source File: TermVectorComponent.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader reader, int docID, TermsEnum termsEnum, String field) throws IOException {
  NamedList<Object> fieldNL = new NamedList<>();
  docNL.add(field, fieldNL);

  BytesRef text;
  PostingsEnum dpEnum = null;
  while((text = termsEnum.next()) != null) {
    String term = text.utf8ToString();
    NamedList<Object> termInfo = new NamedList<>();
    fieldNL.add(term, termInfo);
    final int freq = (int) termsEnum.totalTermFreq();
    if (fieldOptions.termFreq == true) {
      termInfo.add("tf", freq);
    }

    int dpEnumFlags = 0;
    dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
    //payloads require offsets
    dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
    dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
    dpEnum = termsEnum.postings(dpEnum, dpEnumFlags);

    boolean atNextDoc = false;
    if (dpEnum != null) {
      dpEnum.nextDoc();
      atNextDoc = true;
    }

    if (atNextDoc && dpEnumFlags != 0) {
      NamedList<Integer> positionsNL = null;
      NamedList<Number> theOffsets = null;
      NamedList<String> thePayloads = null;

      for (int i = 0; i < freq; i++) {
        final int pos = dpEnum.nextPosition();
        if (fieldOptions.positions && pos >= 0) {
          if (positionsNL == null) {
            positionsNL = new NamedList<>();
            termInfo.add("positions", positionsNL);
          }
          positionsNL.add("position", pos);
        }

        int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
        if (startOffset >= 0) {
          if (theOffsets == null) {
            theOffsets = new NamedList<>();
            termInfo.add("offsets", theOffsets);
          }
          theOffsets.add("start", dpEnum.startOffset());
          theOffsets.add("end", dpEnum.endOffset());
        }

        BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
        if (payload != null) {
          if (thePayloads == null) {
            thePayloads = new NamedList<>();
            termInfo.add("payloads", thePayloads);
          }
          thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
        }
      }
    }
    
    int df = 0;
    if (fieldOptions.docFreq || fieldOptions.tfIdf) {
      df = reader.docFreq(new Term(field, text));
    }

    if (fieldOptions.docFreq) {
      termInfo.add("df", df);
    }

    // TODO: this is not TF/IDF by anyone's definition!
    if (fieldOptions.tfIdf) {
      double tfIdfVal = ((double) freq) / df;
      termInfo.add("tf-idf", tfIdfVal);
    }
  }
}
 
Example 12
Source File: FrequencyCtxWindowBasedFBWorker.java    From jate with GNU Lesser General Public License v3.0 4 votes vote down vote up
private List<MWEInSentence> collectTermSentenceContext(Terms termVectorLookup,
                                                            Map<Integer, Integer> sentenceBoundaries) throws IOException {
    List<MWEInSentence> result = new ArrayList<>();

    TermsEnum tiRef = termVectorLookup.iterator();
    BytesRef luceneTerm = tiRef.next();
    while (luceneTerm != null) {
        if (luceneTerm.length == 0) {
            luceneTerm = tiRef.next();
            continue;
        }
        String tString = luceneTerm.utf8ToString();
        if (!allCandidates.contains(tString)) {
            luceneTerm = tiRef.next();
            continue;
        }


        PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
        //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);

        int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
        if (doc != PostingsEnum.NO_MORE_DOCS) {
            int totalOccurrence = postingsEnum.freq();
            for (int i = 0; i < totalOccurrence; i++) {
                postingsEnum.nextPosition();
                int start = postingsEnum.startOffset();
                int end = postingsEnum.endOffset();
                BytesRef payload = postingsEnum.getPayload();
                SentenceContext sentenceContextInfo = null;
                if (payload != null) {
                    sentenceContextInfo = new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString()));
                }
                if (sentenceContextInfo == null)
                    result.add(new MWEInSentence(tString, start, end, 0, 0, 0));
                else {
                    result.add(new MWEInSentence(tString, start, end,
                            sentenceContextInfo.getFirstTokenIdx(),
                            sentenceContextInfo.getLastTokenIdx(),
                            sentenceContextInfo.getSentenceId()));

                    Integer endBound = sentenceBoundaries.get(sentenceContextInfo.getSentenceId());
                    if (endBound == null || endBound < sentenceContextInfo.getLastTokenIdx())
                        sentenceBoundaries.put(sentenceContextInfo.getSentenceId(),
                                sentenceContextInfo.getLastTokenIdx());
                }
            }
        }
        luceneTerm = tiRef.next();
    }
    Collections.sort(result);
    return result;
}