Java Code Examples for org.apache.lucene.analysis.Token#setPositionIncrement()

The following examples show how to use org.apache.lucene.analysis.Token#setPositionIncrement() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ShingleFilterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static Token createToken
  (String term, int start, int offset, int positionIncrement)
{
  Token token = new Token();
  token.setOffset(start, offset);
  token.copyBuffer(term.toCharArray(), 0, term.length());
  token.setPositionIncrement(positionIncrement);
  return token;
}
 
Example 2
Source File: TestPostingsOffsets.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Token makeToken(String text, int posIncr, int startOffset, int endOffset) {
  final Token t = new Token();
  t.append(text);
  t.setPositionIncrement(posIncr);
  t.setOffset(startOffset, endOffset);
  return t;
}
 
Example 3
Source File: TestMaxPosition.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTooBigPosition() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  // This is at position 1:
  Token t1 = new Token("foo", 0, 3);
  t1.setPositionIncrement(2);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  Token t2 = new Token("foo", 4, 7);
  // This should overflow max:
  t2.setPositionIncrement(IndexWriter.MAX_POSITION);
  if (random().nextBoolean()) {
    t2.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  doc.add(new TextField("foo", new CannedTokenStream(new Token[] {t1, t2})));
  expectThrows(IllegalArgumentException.class, () -> {
    iw.addDocument(doc);
  });

  // Document should not be visible:
  IndexReader r = DirectoryReader.open(iw);
  assertEquals(0, r.numDocs());
  r.close();

  iw.close();
  dir.close();
}
 
Example 4
Source File: TestMaxPosition.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMaxPosition() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  // This is at position 0:
  Token t1 = new Token("foo", 0, 3);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  Token t2 = new Token("foo", 4, 7);
  t2.setPositionIncrement(IndexWriter.MAX_POSITION);
  if (random().nextBoolean()) {
    t2.setPayload(new BytesRef(new byte[] { 0x1 } ));
  }
  doc.add(new TextField("foo", new CannedTokenStream(new Token[] {t1, t2})));
  iw.addDocument(doc);

  // Document should be visible:
  IndexReader r = DirectoryReader.open(iw);
  assertEquals(1, r.numDocs());
  PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "foo", new BytesRef("foo"));

  // "foo" appears in docID=0
  assertEquals(0, postings.nextDoc());

  // "foo" appears 2 times in the doc
  assertEquals(2, postings.freq());

  // first at pos=0
  assertEquals(0, postings.nextPosition());

  // next at pos=MAX
  assertEquals(IndexWriter.MAX_POSITION, postings.nextPosition());

  r.close();

  iw.close();
  dir.close();
}
 
Example 5
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBooleanPhraseWithSynonym() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_NOT_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Token syn = new Token("httpwwwfacebookcom", 6, 29);
  syn.setPositionIncrement(0);
  CannedTokenStream ts = new CannedTokenStream(
      new Token("test", 0, 4),
      new Token("http", 6, 10),
      syn,
      new Token("www", 13, 16),
      new Token("facebook", 17, 25),
      new Token("com", 26, 29)
  );
  Field field = new Field("field", ts, type);
  doc.add(field);
  doc.add(new StoredField("field", "Test: http://www.facebook.com"));
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  
  // query1: match
  PhraseQuery pq = new PhraseQuery("field", "test", "http", "www", "facebook", "com");
  FieldQuery fieldQuery  = highlighter.getFieldQuery(pq, reader);
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
  
  // query2: match
  PhraseQuery pq2 = new PhraseQuery("field", "test", "httpwwwfacebookcom", "www", "facebook", "com");
  fieldQuery  = highlighter.getFieldQuery(pq2, reader);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
  
  // query3: OR query1 and query2 together
  BooleanQuery.Builder bq = new BooleanQuery.Builder();
  bq.add(pq, BooleanClause.Occur.SHOULD);
  bq.add(pq2, BooleanClause.Occur.SHOULD);
  fieldQuery  = highlighter.getFieldQuery(bq.build(), reader);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example 6
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token token( String term, int posInc, int startOffset, int endOffset ) {
  Token t = new Token( term, startOffset, endOffset );
  t.setPositionIncrement( posInc );
  return t;
}
 
Example 7
Source File: HighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected TokenStream getTS2() {
  // String s = "Hi-Speed10 foo";
  return new TokenStream() {
    Iterator<Token> iter;
    List<Token> lst;
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
    {
      lst = new ArrayList<>();
      Token t;
      t = createToken("hi", 0, 2);
      t.setPositionIncrement(1);
      lst.add(t);
      t = createToken("hispeed", 0, 8);
      t.setPositionIncrement(1);
      lst.add(t);
      t = createToken("speed", 3, 8);
      t.setPositionIncrement(0);
      lst.add(t);
      t = createToken("10", 8, 10);
      t.setPositionIncrement(1);
      lst.add(t);
      t = createToken("foo", 11, 14);
      t.setPositionIncrement(1);
      lst.add(t);
      iter = lst.iterator();
    }

    @Override
    public boolean incrementToken() {
      if(iter.hasNext()) {
        Token token = iter.next();
        clearAttributes();
        termAtt.setEmpty().append(token);
        posIncrAtt.setPositionIncrement(token.getPositionIncrement());
        offsetAtt.setOffset(token.startOffset(), token.endOffset());
        return true;
      }
      return false;
    }

    @Override
    public void reset() throws IOException {
      super.reset();
      iter = lst.iterator();
    }
  };
}
 
Example 8
Source File: HighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected TokenStream getTS2a() {
  // String s = "Hi-Speed10 foo";
  return new TokenStream() {
    Iterator<Token> iter;
    List<Token> lst;
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
    {
      lst = new ArrayList<>();
      Token t;
      t = createToken("hispeed", 0, 8);
      t.setPositionIncrement(1);
      lst.add(t);
      t = createToken("hi", 0, 2);
      t.setPositionIncrement(0);
      lst.add(t);
      t = createToken("speed", 3, 8);
      t.setPositionIncrement(1);
      lst.add(t);
      t = createToken("10", 8, 10);
      t.setPositionIncrement(1);
      lst.add(t);
      t = createToken("foo", 11, 14);
      t.setPositionIncrement(1);
      lst.add(t);
      iter = lst.iterator();
    }

    @Override
    public boolean incrementToken() {
      if(iter.hasNext()) {
        Token token = iter.next();
        clearAttributes();
        termAtt.setEmpty().append(token);
        posIncrAtt.setPositionIncrement(token.getPositionIncrement());
        offsetAtt.setOffset(token.startOffset(), token.endOffset());
        return true;
      }
      return false;
    }

    @Override
    public void reset() throws IOException {
      super.reset();
      iter = lst.iterator();
    }
  };
}
 
Example 9
Source File: FuzzySuggesterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token token(String term, int posInc, int posLength) {
  final Token t = new Token(term, 0, 0);
  t.setPositionIncrement(posInc);
  t.setPositionLength(posLength);
  return t;
}
 
Example 10
Source File: AnalyzingSuggesterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token token(String term, int posInc, int posLength) {
  final Token t = new Token(term, 0, 0);
  t.setPositionIncrement(posInc);
  t.setPositionLength(posLength);
  return t;
}
 
Example 11
Source File: TestTermAutomatonQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token token(String term, int posInc, int posLength) {
  final Token t = new Token(term, 0, term.length());
  t.setPositionIncrement(posInc);
  t.setPositionLength(posLength);
  return t;
}
 
Example 12
Source File: TestFlattenGraphFilter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {
  final Token t = new Token(term, startOffset, endOffset);
  t.setPositionIncrement(posInc);
  t.setPositionLength(posLength);
  return t;
}
 
Example 13
Source File: TestMultiPhraseQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token makeToken(String text, int posIncr) {
  final Token t = new Token();
  t.append(text);
  t.setPositionIncrement(posIncr);
  return t;
}
 
Example 14
Source File: TestFieldInvertState.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testRandom() throws Exception {
  int numUniqueTokens = TestUtil.nextInt(random(), 1, 25);
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
  IndexWriter w = new IndexWriter(dir, iwc);
  Document doc = new Document();

  int numTokens = atLeast(10000);
  Token[] tokens = new Token[numTokens];
  Map<Character,Integer> counts = new HashMap<>();
  int numStacked = 0;
  int maxTermFreq = 0;
  int pos = -1;
  for (int i=0;i<numTokens;i++) {
    char tokenChar = (char) ('a' + random().nextInt(numUniqueTokens));
    Integer oldCount = counts.get(tokenChar);
    int newCount;
    if (oldCount == null) {
      newCount = 1;
    } else {
      newCount = 1 + oldCount;
    }
    counts.put(tokenChar, newCount);
    maxTermFreq = Math.max(maxTermFreq, newCount);
    
    Token token = new Token(Character.toString(tokenChar), 2*i, 2*i+1);
    
    if (i > 0 && random().nextInt(7) == 3) {
      token.setPositionIncrement(0);
      numStacked++;
    } else {
      pos++;
    }
    tokens[i] = token;
  }

  Field field = new Field("field",
                          new CannedTokenStream(tokens),
                          TextField.TYPE_NOT_STORED);
  doc.add(field);
  w.addDocument(doc);
  FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
  assertEquals(maxTermFreq, fis.getMaxTermFrequency());
  assertEquals(counts.size(), fis.getUniqueTermCount());
  assertEquals(numStacked, fis.getNumOverlap());
  assertEquals(numTokens, fis.getLength());
  assertEquals(pos, fis.getPosition());
  
  IOUtils.close(w, dir);
}
 
Example 15
Source File: TestGraphTokenStreamFiniteStrings.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Token token(String term, int posInc, int posLength) {
  final Token t = new Token(term, 0, term.length());
  t.setPositionIncrement(posInc);
  t.setPositionLength(posLength);
  return t;
}