org.apache.lucene.analysis.CachingTokenFilter Java Examples

The following examples show how to use org.apache.lucene.analysis.CachingTokenFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestTeeSinkTokenFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMultipleSources() throws Exception {
  final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer1.toString()));
  final TokenStream source1 = new CachingTokenFilter(tee1);

  tee1.addAttribute(CheckClearAttributesAttribute.class);

  MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), MockTokenizer.WHITESPACE, false);
  tokenizer.setReader(new StringReader(buffer2.toString()));
  final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
  final TokenStream source2 = tee2;

  assertTokenStreamContents(source1, tokens1);
  assertTokenStreamContents(source2, tokens2);

  TokenStream lowerCasing = new LowerCaseFilter(source1);
  String[] lowerCaseTokens = new String[tokens1.length];
  for (int i = 0; i < tokens1.length; i++)
    lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
  assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
 
Example #2
Source File: PhraseCountQueryBuilder.java    From pyramid with Apache License 2.0 6 votes vote down vote up
protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }
 
Example #3
Source File: TestTermVectorsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  Document doc = new Document();
  try (TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", "abcd   "))) {
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    Field f = new Field("field", stream, customType);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
  }
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
  assertNotNull(termsEnum.next());
  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
 
Example #4
Source File: TextFieldMapper.java    From crate with Apache License 2.0 5 votes vote down vote up
private static boolean hasGaps(CachingTokenFilter stream) throws IOException {
    PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class);
    stream.reset();
    while (stream.incrementToken()) {
        if (posIncAtt.getPositionIncrement() > 1) {
            return true;
        }
    }
    return false;
}
 
Example #5
Source File: TextFieldMapper.java    From crate with Apache License 2.0 4 votes vote down vote up
private static CachingTokenFilter cache(TokenStream in) {
    if (in instanceof CachingTokenFilter) {
        return (CachingTokenFilter) in;
    }
    return new CachingTokenFilter(in);
}