Java Code Examples for org.apache.lucene.analysis.payloads.PayloadHelper

The following examples show how to use org.apache.lucene.analysis.payloads.PayloadHelper. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestNGramFilters.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test NGramFilterFactory on tokens with payloads
 */
public void testNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 2
Source Project: lucene-solr   Source File: TestNGramFilters.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test EdgeNGramFilterFactory on tokens with payloads
 */
public void testEdgeNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 3
Source Project: mtas   Source File: MtasBasicParser.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Compute maximum filtered payload.
 *
 * @param value the value
 * @param payload the payload
 * @param filter the filter
 * @return the bytes ref
 */
private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload,
    String filter) {
  // do magic with filter
  if (value != null) {
    if (payload != null) {
      Float payloadFloat = PayloadHelper.decodeFloat(payload.bytes,
          payload.offset);
      Float valueFloat = Float.parseFloat(value);
      return new BytesRef(
          PayloadHelper.encodeFloat(Math.max(payloadFloat, valueFloat)));
    } else {
      return new BytesRef(PayloadHelper.encodeFloat(Float.parseFloat(value)));
    }
  } else {
    return payload;
  }
}
 
Example 4
Source Project: Elasticsearch   Source File: TermPosition.java    License: Apache License 2.0 5 votes vote down vote up
public float payloadAsFloat(float defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}
 
Example 5
Source Project: Elasticsearch   Source File: TermPosition.java    License: Apache License 2.0 5 votes vote down vote up
public int payloadAsInt(int defaultMissing) {
    if (payload != null && payload.length != 0) {
        return PayloadHelper.decodeInt(payload.bytes, payload.offset);
    } else {
        return defaultMissing;
    }
}
 
Example 6
Source Project: mtas   Source File: MtasToken.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
  String text = "";
  text += "[" + String.format("%05d", getId()) + "] ";
  text += ((getRealOffsetStart() == null) ? "[-------,-------]"
      : "[" + String.format("%07d", getRealOffsetStart()) + "-"
          + String.format("%07d", getRealOffsetEnd()) + "]");
  text += (provideRealOffset ? "  " : "* ");
  text += ((getOffsetStart() == null) ? "[-------,-------]"
      : "[" + String.format("%07d", getOffsetStart()) + "-"
          + String.format("%07d", getOffsetEnd()) + "]");
  text += (provideOffset ? "  " : "* ");
  if (getPositionLength() == null) {
    text += String.format("%11s", "");
  } else if (getPositionStart().equals(getPositionEnd())) {
    text += String.format("%11s", "[" + getPositionStart() + "]");
  } else if ((getPositions() == null) || (getPositions().length == (1
      + getPositionEnd() - getPositionStart()))) {
    text += String.format("%11s",
        "[" + getPositionStart() + "-" + getPositionEnd() + "]");
  } else {
    text += String.format("%11s", Arrays.toString(getPositions()));
  }
  text += ((getParentId() == null) ? "[-----]"
      : "[" + String.format("%05d", getParentId()) + "]");
  text += (provideParentId ? "  " : "* ");
  BytesRef payload = getPayload();
  text += (payload == null) ? "[------] "
      : "["
          + String
              .format("%.4f",
                  PayloadHelper.decodeFloat(Arrays.copyOfRange(payload.bytes,
                      payload.offset, (payload.offset + payload.length))))
          + "] ";
  text += String.format("%25s", "[" + getPrefix() + "]") + " ";
  text += ((getPostfix() == null) ? "---" : "[" + getPostfix() + "]") + " ";
  return text;
}
 
Example 7
Source Project: solr-vector-scoring   Source File: VectorScoreQuery.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException {
	return new CustomScoreProvider(context){
		@Override
		public float customScore(int docID, float subQueryScore, float valSrcScore) throws IOException {
			float score = 0;
			double docVectorNorm = 0;
			LeafReader reader = context.reader();
			Terms terms = reader.getTermVector(docID, field);
			if(vector.size() != terms.size()){
				throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "indexed and input vector array must have same length");
			}
			TermsEnum iter = terms.iterator();
		    BytesRef text;
		    while ((text = iter.next()) != null) {
		    	String term = text.utf8ToString();
		    	float payloadValue = 0f;
		    	PostingsEnum postings = iter.postings(null, PostingsEnum.ALL);
		    	while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
		    		int freq = postings.freq();
		    		while (freq-- > 0) postings.nextPosition();

		    		BytesRef payload = postings.getPayload();
		    		payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset); 
		    		
		    		if (cosine)
		              docVectorNorm += Math.pow(payloadValue, 2.0);
		    	}
		    		
		    	score = (float)(score + payloadValue * (vector.get(Integer.parseInt(term))));
		    }
		    
		    if (cosine) {
		      if ((docVectorNorm == 0) || (queryVectorNorm == 0)) return 0f;
		      return (float)(score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm)));
		    }

			return score;
		}
	};
}
 
Example 8
Source Project: mtas   Source File: MtasTokenCollection.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Gets the list.
 *
 * @return the list
 * @throws MtasParserException the mtas parser exception
 */
public String[][] getList() throws MtasParserException {
  String[][] result = new String[(tokenCollection.size() + 1)][];
  result[0] = new String[] { "id", "start real offset", "end real offset",
      "provide real offset", "start offset", "end offset", "provide offset",
      "start position", "end position", "multiple positions", "parent",
      "provide parent", "payload", "prefix", "postfix" };
  int number = 1;
  Iterator<MtasToken> it = this.iterator();
  while (it.hasNext()) {
    MtasToken token = it.next();
    String[] row = new String[15];
    row[0] = token.getId().toString();
    if (token.getRealOffsetStart() != null) {
      row[1] = token.getRealOffsetStart().toString();
      row[2] = token.getRealOffsetEnd().toString();
      row[3] = token.getProvideRealOffset() ? "1" : null;
    }
    if (token.getOffsetStart() != null) {
      row[4] = token.getOffsetStart().toString();
      row[5] = token.getOffsetEnd().toString();
      row[6] = token.getProvideOffset() ? "1" : null;
    }
    if (token.getPositionLength() != null) {
      if (token.getPositionStart().equals(token.getPositionEnd())) {
        row[7] = token.getPositionStart().toString();
        row[8] = token.getPositionEnd().toString();
        row[9] = null;
      } else if ((token.getPositions() == null)
          || (token.getPositions().length == (1 + token.getPositionEnd()
              - token.getPositionStart()))) {
        row[7] = token.getPositionStart().toString();
        row[8] = token.getPositionEnd().toString();
        row[9] = null;
      } else {
        row[7] = null;
        row[8] = null;
        row[9] = Arrays.toString(token.getPositions());
      }
    }
    if (token.getParentId() != null) {
      row[10] = token.getParentId().toString();
      row[11] = token.getProvideParentId() ? "1" : null;
    }
    if (token.getPayload() != null) {
      BytesRef payload = token.getPayload();
      row[12] = Float.toString(PayloadHelper.decodeFloat(Arrays.copyOfRange(
          payload.bytes, payload.offset, (payload.offset + payload.length))));
    }
    row[13] = token.getPrefix();
    row[14] = token.getPostfix();
    result[number] = row;
    number++;
  }
  return result;
}