Java Code Examples for org.apache.lucene.analysis.tokenattributes.PayloadAttribute

The following examples show how to use org.apache.lucene.analysis.tokenattributes.PayloadAttribute. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestNGramFilters.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test NGramFilterFactory on tokens with payloads
 */
public void testNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 2
Source Project: lucene-solr   Source File: TestNGramFilters.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test EdgeNGramFilterFactory on tokens with payloads
 */
public void testEdgeNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 3
public void testEncoder() throws Exception {
  Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 4
public void testDelim() throws Exception {
  Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("DelimitedPayload",
      "encoder", "float",
      "delimiter", "*").create(stream);
  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example 5
public void testPayloads() throws Exception {
  String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
  DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
    (whitespaceMockTokenizer(test), 
     DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
  filter.reset();
  assertTermEquals("The", filter, termAtt, payAtt, null);
  assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("over", filter, termAtt, payAtt, null);
  assertTermEquals("the", filter, termAtt, payAtt, null);
  assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8));
  assertFalse(filter.incrementToken());
  filter.end();
  filter.close();
}
 
Example 6
public void testFloatEncoding() throws Exception {
  String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
  DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new FloatEncoder());
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
  filter.reset();
  assertTermEquals("The", filter, termAtt, payAtt, null);
  assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
  assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
  assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
  assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f));
  assertTermEquals("over", filter, termAtt, payAtt, null);
  assertTermEquals("the", filter, termAtt, payAtt, null);
  assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f));
  assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
  assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
  assertFalse(filter.incrementToken());
  filter.end();
  filter.close();
}
 
Example 7
public void testIntEncoding() throws Exception {
  String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
  DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new IntegerEncoder());
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
  filter.reset();
  assertTermEquals("The", filter, termAtt, payAtt, null);
  assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
  assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
  assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
  assertTermEquals("jumped", filter, termAtt, payAtt, null);
  assertTermEquals("over", filter, termAtt, payAtt, null);
  assertTermEquals("the", filter, termAtt, payAtt, null);
  assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
  assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
  assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
  assertFalse(filter.incrementToken());
  filter.end();
  filter.close();
}
 
Example 8
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
  CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
  PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
  assertTrue(stream.incrementToken());
  assertEquals(expected, termAtt.toString());
  BytesRef payload = payloadAtt.getPayload();
  if (payload != null) {
    assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length);
    for (int i = 0; i < expectPay.length; i++) {
      assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]);

    }
  } else {
    assertTrue("expectPay is not null and it should be", expectPay == null);
  }
}
 
Example 9
Source Project: lucene-solr   Source File: TestConcatenatingTokenStream.java    License: Apache License 2.0 6 votes vote down vote up
public void testInconsistentAttributes() throws IOException {

    AttributeFactory factory = newAttributeFactory();

    final MockTokenizer first = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
    first.setReader(new StringReader("first words "));
    first.addAttribute(PayloadAttribute.class);
    final MockTokenizer second = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
    second.setReader(new StringReader("second words"));
    second.addAttribute(FlagsAttribute.class);

    TokenStream ts = new ConcatenatingTokenStream(first, second);
    assertTrue(ts.hasAttribute(FlagsAttribute.class));
    assertTrue(ts.hasAttribute(PayloadAttribute.class));

    assertTokenStreamContents(ts,
        new String[] { "first", "words", "second", "words" },
        new int[]{ 0, 6, 12, 19, },
        new int[]{ 5, 11, 18, 24, });

  }
 
Example 10
Source Project: lucene-solr   Source File: TestSnowball.java    License: Apache License 2.0 6 votes vote down vote up
public void testFilterTokens() throws Exception {
  SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
  TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
  FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
  
  filter.incrementToken();

  assertEquals("accent", termAtt.toString());
  assertEquals(2, offsetAtt.startOffset());
  assertEquals(7, offsetAtt.endOffset());
  assertEquals("wrd", typeAtt.type());
  assertEquals(3, posIncAtt.getPositionIncrement());
  assertEquals(77, flagsAtt.getFlags());
  assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
}
 
Example 11
Source Project: lucene-solr   Source File: SpellingQueryConverter.java    License: Apache License 2.0 6 votes vote down vote up
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
 
Example 12
Source Project: lucene-solr   Source File: TestPayloadSpans.java    License: Apache License 2.0 5 votes vote down vote up
public PayloadFilter(TokenStream input) {
  super(input);
  pos = 0;
  entities.add("xx");
  entities.add("one");
  nopayload.add("nopayload");
  nopayload.add("np");
  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example 13
Source Project: lucene-solr   Source File: TestPayloadSpanUtil.java    License: Apache License 2.0 5 votes vote down vote up
public PayloadFilter(TokenStream input) {
  super(input);
  pos = 0;
  entities.add("xx");
  entities.add("one");
  nopayload.add("nopayload");
  nopayload.add("np");
  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example 14
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
  assertTrue(stream.incrementToken());
  assertEquals(expected, termAtt.toString());
  BytesRef payload = payAtt.getPayload();
  if (payload != null) {
    assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length);
    for (int i = 0; i < expectPay.length; i++) {
      assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]);

    }
  } else {
    assertTrue("expectPay is not null and it should be", expectPay == null);
  }
}
 
Example 15
Source Project: lucene-solr   Source File: TestAnalyzers.java    License: Apache License 2.0 5 votes vote down vote up
void verifyPayload(TokenStream ts) throws IOException {
  PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
  ts.reset();
  for(byte b=1;;b++) {
    boolean hasNext = ts.incrementToken();
    if (!hasNext) break;
    // System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
    // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
    assertEquals(b, payloadAtt.getPayload().bytes[0]);
  }
}
 
Example 16
Source Project: lucene-solr   Source File: MockPayloadAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public MockPayloadFilter(TokenStream input, String fieldName) {
  super(input);
  this.fieldName = fieldName;
  pos = 0;
  i = 0;
  posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
  payloadAttr = input.addAttribute(PayloadAttribute.class);
  termAttr = input.addAttribute(CharTermAttribute.class);
}
 
Example 17
Source Project: lucene-solr   Source File: FieldInvertState.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Sets attributeSource to a new instance.
 */
void setAttributeSource(AttributeSource attributeSource) {
  if (this.attributeSource != attributeSource) {
    this.attributeSource = attributeSource;
    termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
    termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
    posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
    offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
    payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
  }
}
 
Example 18
Source Project: lucene-solr   Source File: TestPayloads.java    License: Apache License 2.0 5 votes vote down vote up
public PayloadFilter(TokenStream in, String fieldName, Map<String,PayloadData> fieldToData) {
  super(in);
  this.fieldToData = fieldToData;
  this.fieldName = fieldName;
  payloadAtt = addAttribute(PayloadAttribute.class);
  termAttribute = addAttribute(CharTermAttribute.class);
}
 
Example 19
Source Project: lucene-solr   Source File: TestPayloads.java    License: Apache License 2.0 5 votes vote down vote up
PoolingPayloadTokenStream(ByteArrayPool pool) {
  this.pool = pool;
  payload = pool.get();
  generateRandomData(payload);
  term = new String(payload, 0, payload.length, utf8);
  first = true;
  payloadAtt = addAttribute(PayloadAttribute.class);
  termAtt = addAttribute(CharTermAttribute.class);
}
 
Example 20
Source Project: lucene-solr   Source File: SimpleQueryConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Collection<Token> convert(String origQuery) {
  Collection<Token> result = new HashSet<>();

  try (WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", origQuery)) {
    // TODO: support custom attributes
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

    ts.reset();

    while (ts.incrementToken()) {
      Token tok = new Token();
      tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      tok.setFlags(flagsAtt.getFlags());
      tok.setPayload(payloadAtt.getPayload());
      tok.setPositionIncrement(posIncAtt.getPositionIncrement());
      tok.setType(typeAtt.type());
      result.add(tok);
    }
    ts.end();      
    return result;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 21
Source Project: clue   Source File: PayloadTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public PayloadTokenizer(String text)
        throws IOException {
  setReader(new StringReader(text));
  this.tokens = text.toLowerCase().split(",");
  
  termAttr = addAttribute(CharTermAttribute.class);
  termAttr.resizeBuffer(text.length()); // maximum size necessary is the size of the input
  payloadAttr = addAttribute(PayloadAttribute.class);
  payload = new BytesRef(new byte[4]);
  positionAttr = addAttribute(PositionIncrementAttribute.class);
  offsetAttr = addAttribute(OffsetAttribute.class);
}
 
Example 22
Source Project: Elasticsearch   Source File: AllTokenStream.java    License: Apache License 2.0 4 votes vote down vote up
AllTokenStream(TokenStream input, AllEntries allEntries) {
    super(input);
    this.allEntries = allEntries;
    offsetAttribute = addAttribute(OffsetAttribute.class);
    payloadAttribute = addAttribute(PayloadAttribute.class);
}
 
Example 23
Source Project: lucene-solr   Source File: TokenSourcesTest.java    License: Apache License 2.0 4 votes vote down vote up
public void testPayloads() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
  myFieldType.setStoreTermVectors(true);
  myFieldType.setStoreTermVectorOffsets(true);
  myFieldType.setStoreTermVectorPositions(true);
  myFieldType.setStoreTermVectorPayloads(true);

  curOffset = 0;

  Token[] tokens = new Token[] {
    getToken("foxes"),
    getToken("can"),
    getToken("jump"),
    getToken("high")
  };

  Document doc = new Document();
  doc.add(new Field("field", new CannedTokenStream(tokens), myFieldType));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  assertEquals(1, reader.numDocs());

  TokenStream ts = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);

  CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
  PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
  PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);

  ts.reset();
  for(Token token : tokens) {
    assertTrue(ts.incrementToken());
    assertEquals(token.toString(), termAtt.toString());
    assertEquals(token.getPositionIncrement(), posIncAtt.getPositionIncrement());
    assertEquals(token.getPayload(), payloadAtt.getPayload());
    assertEquals(token.startOffset(), offsetAtt.startOffset());
    assertEquals(token.endOffset(), offsetAtt.endOffset());
  }

  assertFalse(ts.incrementToken());

  reader.close();
  dir.close();
}
 
Example 24
Source Project: lucene-solr   Source File: TokenSourcesTest.java    License: Apache License 2.0 4 votes vote down vote up
@Repeat(iterations = 10)
//@Seed("947083AB20AB2D4F")
public void testRandomizedRoundTrip() throws Exception {
  final int distinct = TestUtil.nextInt(random(), 1, 10);

  String[] terms = new String[distinct];
  BytesRef[] termBytes = new BytesRef[distinct];
  for (int i = 0; i < distinct; ++i) {
    terms[i] = TestUtil.randomRealisticUnicodeString(random());
    termBytes[i] = new BytesRef(terms[i]);
  }

  final BaseTermVectorsFormatTestCase.RandomTokenStream rTokenStream =
      new BaseTermVectorsFormatTestCase.RandomTokenStream(TestUtil.nextInt(random(), 1, 10), terms, termBytes);
  //check to see if the token streams might have non-deterministic testable result
  final boolean storeTermVectorPositions = random().nextBoolean();
  final int[] startOffsets = rTokenStream.getStartOffsets();
  final int[] positionsIncrements = rTokenStream.getPositionsIncrements();
  for (int i = 1; i < positionsIncrements.length; i++) {
    if (storeTermVectorPositions && positionsIncrements[i] != 0) {
      continue;
    }
    //TODO should RandomTokenStream ensure endOffsets for tokens at same position and same startOffset are greater
    // than previous token's endOffset?  That would increase the testable possibilities.
    if (startOffsets[i] == startOffsets[i-1]) {
      if (VERBOSE)
        System.out.println("Skipping test because can't easily validate random token-stream is correct.");
      rTokenStream.close();
      return;
    }
  }

  //sanity check itself
  assertTokenStreamContents(rTokenStream,
      rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(),
      rTokenStream.getPositionsIncrements());

  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
  myFieldType.setStoreTermVectors(true);
  myFieldType.setStoreTermVectorOffsets(true);
  myFieldType.setStoreTermVectorPositions(storeTermVectorPositions);
  //payloads require positions; it will throw an error otherwise
  myFieldType.setStoreTermVectorPayloads(storeTermVectorPositions && random().nextBoolean());

  Document doc = new Document();
  doc.add(new Field("field", rTokenStream, myFieldType));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  assertEquals(1, reader.numDocs());

  TokenStream vectorTokenStream =
      TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);

  //sometimes check payloads
  PayloadAttribute payloadAttribute = null;
  if (myFieldType.storeTermVectorPayloads() && usually()) {
    payloadAttribute = vectorTokenStream.addAttribute(PayloadAttribute.class);
  }
  assertTokenStreamContents(vectorTokenStream,
      rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(),
      myFieldType.storeTermVectorPositions() ? rTokenStream.getPositionsIncrements() : null);
  //test payloads
  if (payloadAttribute != null) {
    vectorTokenStream.reset();
    for (int i = 0; vectorTokenStream.incrementToken(); i++) {
      assertEquals(rTokenStream.getPayloads()[i], payloadAttribute.getPayload());
    }
  }

  reader.close();
  dir.close();
  rTokenStream.close();
}
 
Example 25
Source Project: lucene-solr   Source File: MemoryIndex.java    License: Apache License 2.0 4 votes vote down vote up
private void storeTerms(Info info, TokenStream tokenStream, int positionIncrementGap, int offsetGap) {

    int pos = -1;
    int offset = 0;
    if (info.numTokens > 0) {
      pos = info.lastPosition + positionIncrementGap;
      offset = info.lastOffset + offsetGap;
    }

    try (TokenStream stream = tokenStream) {
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      PayloadAttribute payloadAtt = storePayloads ? stream.addAttribute(PayloadAttribute.class) : null;
      stream.reset();

      while (stream.incrementToken()) {
//        if (DEBUG) System.err.println("token='" + term + "'");
        info.numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0) {
          info.numOverlapTokens++;
        }
        pos += posIncr;
        int ord = info.terms.add(termAtt.getBytesRef());
        if (ord < 0) {
          ord = (-ord) - 1;
          postingsWriter.reset(info.sliceArray.end[ord]);
        } else {
          info.sliceArray.start[ord] = postingsWriter.startNewSlice();
        }
        info.sliceArray.freq[ord]++;
        info.maxTermFrequency = Math.max(info.maxTermFrequency, info.sliceArray.freq[ord]);
        info.sumTotalTermFreq++;
        postingsWriter.writeInt(pos);
        if (storeOffsets) {
          postingsWriter.writeInt(offsetAtt.startOffset() + offset);
          postingsWriter.writeInt(offsetAtt.endOffset() + offset);
        }
        if (storePayloads) {
          final BytesRef payload = payloadAtt.getPayload();
          final int pIndex;
          if (payload == null || payload.length == 0) {
            pIndex = -1;
          } else {
            pIndex = payloadsBytesRefs.append(payload);
          }
          postingsWriter.writeInt(pIndex);
        }
        info.sliceArray.end[ord] = postingsWriter.getCurrentOffset();
      }
      stream.end();
      if (info.numTokens > 0) {
        info.lastPosition = pos;
        info.lastOffset = offsetAtt.endOffset() + offset;
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
 
Example 26
Source Project: lucene-solr   Source File: PayloadHelper.java    License: Apache License 2.0 4 votes vote down vote up
public PayloadFilter(TokenStream input, String fieldName) {
  super(input);
  this.fieldName = fieldName;
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example 27
Source Project: lucene-solr   Source File: TestPayloadScoreQuery.java    License: Apache License 2.0 4 votes vote down vote up
public PayloadFilter(TokenStream input) {
  super(input);
  payAtt = addAttribute(PayloadAttribute.class);
}
 
Example 28
Source Project: lucene-solr   Source File: TestPayloadTermQuery.java    License: Apache License 2.0 4 votes vote down vote up
public PayloadFilter(TokenStream input, String fieldName) {
  super(input);
  this.fieldName = fieldName;
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example 29
Source Project: lucene-solr   Source File: TestAnalyzers.java    License: Apache License 2.0 4 votes vote down vote up
public  PayloadSetter(TokenStream input) {
  super(input);
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example 30
Source Project: lucene-solr   Source File: BaseTermVectorsFormatTestCase.java    License: Apache License 2.0 4 votes vote down vote up
public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) {
  terms = new String[len];
  termBytes = new BytesRef[len];
  positionsIncrements = new int[len];
  positions = new int[len];
  startOffsets = new int[len];
  endOffsets = new int[len];
  payloads = new BytesRef[len];
  for (int i = 0; i < len; ++i) {
    final int o = random().nextInt(sampleTerms.length);
    terms[i] = sampleTerms[o];
    termBytes[i] = sampleTermBytes[o];
    positionsIncrements[i] = TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10);
    if (i == 0) {
      startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16);
    } else {
      startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20);
    }
    endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
  }

  for (int i = 0; i < len; ++i) {
    if (i == 0) {
      positions[i] = positionsIncrements[i] - 1;
    } else {
      positions[i] = positions[i - 1] + positionsIncrements[i];
    }
  }
  if (rarely()) {
    Arrays.fill(payloads, randomPayload());
  } else {
    for (int i = 0; i < len; ++i) {
      payloads[i] = randomPayload();
    }
  }

  positionToTerms = new HashMap<>(len);
  startOffsetToTerms = new HashMap<>(len);
  for (int i = 0; i < len; ++i) {
    if (!positionToTerms.containsKey(positions[i])) {
      positionToTerms.put(positions[i], new HashSet<Integer>(1));
    }
    positionToTerms.get(positions[i]).add(i);
    if (!startOffsetToTerms.containsKey(startOffsets[i])) {
      startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
    }
    startOffsetToTerms.get(startOffsets[i]).add(i);
  }

  freqs = new HashMap<>();
  for (String term : terms) {
    if (freqs.containsKey(term)) {
      freqs.put(term, freqs.get(term) + 1);
    } else {
      freqs.put(term, 1);
    }
  }

  addAttributeImpl(new PermissiveOffsetAttributeImpl());

  termAtt = addAttribute(CharTermAttribute.class);
  piAtt = addAttribute(PositionIncrementAttribute.class);
  oAtt = addAttribute(OffsetAttribute.class);
  pAtt = addAttribute(PayloadAttribute.class);
}