org.apache.lucene.analysis.CachingTokenFilter Java Examples

The following examples show how to use org.apache.lucene.analysis.CachingTokenFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestTeeSinkTokenFilter.java From lucene-solr with Apache License 2.0

6 votes

public void testMultipleSources() throws Exception {
  final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer1.toString()));
  final TokenStream source1 = new CachingTokenFilter(tee1);

  tee1.addAttribute(CheckClearAttributesAttribute.class);

  MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), MockTokenizer.WHITESPACE, false);
  tokenizer.setReader(new StringReader(buffer2.toString()));
  final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
  final TokenStream source2 = tee2;

  assertTokenStreamContents(source1, tokens1);
  assertTokenStreamContents(source2, tokens2);

  TokenStream lowerCasing = new LowerCaseFilter(source1);
  String[] lowerCaseTokens = new String[tokens1.length];
  for (int i = 0; i < tokens1.length; i++)
    lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
  assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}

Example #2

Source File: PhraseCountQueryBuilder.java From pyramid with Apache License 2.0

6 votes

protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }

Example #3

Source File: TestTermVectorsWriter.java From lucene-solr with Apache License 2.0

5 votes

public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  Document doc = new Document();
  try (TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", "abcd   "))) {
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    Field f = new Field("field", stream, customType);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
  }
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
  assertNotNull(termsEnum.next());
  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}

Example #4

Source File: TextFieldMapper.java From crate with Apache License 2.0

5 votes

private static boolean hasGaps(CachingTokenFilter stream) throws IOException {
    PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class);
    stream.reset();
    while (stream.incrementToken()) {
        if (posIncAtt.getPositionIncrement() > 1) {
            return true;
        }
    }
    return false;
}

Example #5

Source File: TextFieldMapper.java From crate with Apache License 2.0

4 votes

private static CachingTokenFilter cache(TokenStream in) {
    if (in instanceof CachingTokenFilter) {
        return (CachingTokenFilter) in;
    }
    return new CachingTokenFilter(in);
}