Java Code Examples for org.apache.lucene.analysis.Token#setPayload()

The following examples show how to use org.apache.lucene.analysis.Token#setPayload() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPostingsOffsets.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testLegalbutVeryLargeOffsets() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef("test"));
  }
  Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
  TokenStream tokenStream = new CannedTokenStream(
      new Token[] { t1, t2 }
  );
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  // store some term vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(true);
  ft.setStoreTermVectorOffsets(true);
  Field field = new Field("foo", tokenStream, ft);
  doc.add(field);
  iw.addDocument(doc);
  iw.close();
  dir.close();
}
 
Example 2
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Just make a token with the text, and set the payload
 *  to the text as well.  Offsets increment "naturally". */
private Token getToken(String text) {
  Token t = new Token(text, curOffset, curOffset+text.length());
  t.setPayload(new BytesRef(text));
  curOffset++;
  return t;
}
 
Example 3
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBrokenPayload() throws Exception {
  Directory d = newDirectory();
  IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  Token token = new Token("bar", 0, 3);
  BytesRef evil = new BytesRef(new byte[1024]);
  evil.offset = 1000; // offset + length is now out of bounds.
  token.setPayload(evil);
  doc.add(new TextField("foo", new CannedTokenStream(token)));
  expectThrows(IndexOutOfBoundsException.class, () -> w.addDocument(doc));
  w.close();
  d.close();
}
 
Example 4
Source File: TestMaxPosition.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTooBigPosition() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  // This is at position 1:
  Token t1 = new Token("foo", 0, 3);
  t1.setPositionIncrement(2);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  Token t2 = new Token("foo", 4, 7);
  // This should overflow max:
  t2.setPositionIncrement(IndexWriter.MAX_POSITION);
  if (random().nextBoolean()) {
    t2.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  doc.add(new TextField("foo", new CannedTokenStream(new Token[] {t1, t2})));
  expectThrows(IllegalArgumentException.class, () -> {
    iw.addDocument(doc);
  });

  // Document should not be visible:
  IndexReader r = DirectoryReader.open(iw);
  assertEquals(0, r.numDocs());
  r.close();

  iw.close();
  dir.close();
}
 
Example 5
Source File: TestMaxPosition.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMaxPosition() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  // This is at position 0:
  Token t1 = new Token("foo", 0, 3);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  Token t2 = new Token("foo", 4, 7);
  t2.setPositionIncrement(IndexWriter.MAX_POSITION);
  if (random().nextBoolean()) {
    t2.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  doc.add(new TextField("foo", new CannedTokenStream(new Token[] {t1, t2})));
  iw.addDocument(doc);

  // Document should be visible:
  IndexReader r = DirectoryReader.open(iw);
  assertEquals(1, r.numDocs());
  PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "foo", new BytesRef("foo"));

  // "foo" appears in docID=0
  assertEquals(0, postings.nextDoc());

  // "foo" appears 2 times in the doc
  assertEquals(2, postings.freq());

  // first at pos=0
  assertEquals(0, postings.nextPosition());

  // next at pos=MAX
  assertEquals(IndexWriter.MAX_POSITION, postings.nextPosition());

  r.close();

  iw.close();
  dir.close();
}