Java Code Examples for org.apache.lucene.analysis.tokenattributes.PayloadAttribute#getPayload()

The following examples show how to use org.apache.lucene.analysis.tokenattributes.PayloadAttribute#getPayload() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestNGramFilters.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test NGramFilterFactory on tokens with payloads
 */
public void testNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 2
Source File: TestNGramFilters.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test EdgeNGramFilterFactory on tokens with payloads
 */
public void testEdgeNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 3
Source File: TestDelimitedPayloadTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testEncoder() throws Exception {
  Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 4
Source File: TestDelimitedPayloadTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDelim() throws Exception {
  Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("DelimitedPayload",
      "encoder", "float",
      "delimiter", "*").create(stream);
  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 5
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
  CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
  PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
  assertTrue(stream.incrementToken());
  assertEquals(expected, termAtt.toString());
  BytesRef payload = payloadAtt.getPayload();
  if (payload != null) {
    assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length);
    for (int i = 0; i < expectPay.length; i++) {
      assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]);

    }
  } else {
    assertTrue("expectPay is not null and it should be", expectPay == null);
  }
}
 
Example 6
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
  assertTrue(stream.incrementToken());
  assertEquals(expected, termAtt.toString());
  BytesRef payload = payAtt.getPayload();
  if (payload != null) {
    assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length);
    for (int i = 0; i < expectPay.length; i++) {
      assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]);

    }
  } else {
    assertTrue("expectPay is not null and it should be", expectPay == null);
  }
}
 
Example 7
Source File: MemoryIndex.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void storeTerms(Info info, TokenStream tokenStream, int positionIncrementGap, int offsetGap) {

    int pos = -1;
    int offset = 0;
    if (info.numTokens > 0) {
      pos = info.lastPosition + positionIncrementGap;
      offset = info.lastOffset + offsetGap;
    }

    try (TokenStream stream = tokenStream) {
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null;
      stream.reset();

      while (stream.incrementToken()) {
//        if (DEBUG) System.err.println("token='" + term + "'");
        info.numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0) {
          info.numOverlapTokens++;
        }
        pos += posIncr;
        int ord = info.terms.add(termAtt.getBytesRef());
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(info.sliceArray.end[ord]);
        } else {
          info.sliceArray.start[ord] = postingsWriter.startNewSlice();
        }
        info.sliceArray.freq[ord]++;
        info.maxTermFrequency = Math.max(info.maxTermFrequency, info.sliceArray.freq[ord]);
        info.sumTotalTermFreq++;
        postingsWriter.writeInt(pos);
        if (storeOffsets) {
          postingsWriter.writeInt(offsetAtt.startOffset() + offset);
          postingsWriter.writeInt(offsetAtt.endOffset() + offset);
        }
        if (storePayloads) {
          final BytesRef payload = payloadAtt.getPayload();
          final int pIndex;
          if (payload == null || payload.length == 0) {
            pIndex = -1;
          } else {
            pIndex = payloadsBytesRefs.append(payload);
          }
          postingsWriter.writeInt(pIndex);
        }
        info.sliceArray.end[ord] = postingsWriter.getCurrentOffset();
      }
      stream.end();
      if (info.numTokens > 0) {
        info.lastPosition = pos;
        info.lastOffset = offsetAtt.endOffset() + offset;
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }