Java Code Examples for org.apache.lucene.analysis.core.WhitespaceTokenizer#addAttribute()

The following examples show how to use org.apache.lucene.analysis.core.WhitespaceTokenizer#addAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CommonGramsFilterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testQueryReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer();
  wt.setReader(new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
  CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
  
  CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(nsf.incrementToken());
  assertEquals("the_s", term.toString());
  nsf.close();
  
  wt.setReader(new StringReader(input));
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
}
 
Example 2
Source File: XmlInterpolationTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private String[] analyzeReturnTokens(String docText) {
  List<String> result = new ArrayList<>();

  Reader filter = new HTMLStripCharFilter(new StringReader(docText),
          Collections.singleton("unescaped"));
  WhitespaceTokenizer ts = new WhitespaceTokenizer();
  final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
  try {
    ts.setReader(filter);
    ts.reset();
    while (ts.incrementToken()) {
      result.add(termAttribute.toString());
    }
    ts.end();
  } catch (IOException e) {
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(ts);
  }
  return result.toArray(new String[result.size()]);
}
 
Example 3
Source File: XmlInterpolationTest.java    From SolrTextTagger with Apache License 2.0 6 votes vote down vote up
private String[] analyzeReturnTokens(String docText) {
  List<String> result = new ArrayList<>();

  Reader filter = new HTMLStripCharFilter(new StringReader(docText),
          Collections.singleton("unescaped"));
  WhitespaceTokenizer ts = new WhitespaceTokenizer();
  final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
  try {
    ts.setReader(filter);
    ts.reset();
    while (ts.incrementToken()) {
      result.add(termAttribute.toString());
    }
    ts.end();
  } catch (IOException e) {
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(ts);
  }
  return result.toArray(new String[result.size()]);
}
 
Example 4
Source File: XmlInterpolationTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private int[] analyzeTagOne(String docText, String start, String end) {
  int[] result = {-1, -1};

  Reader filter = new HTMLStripCharFilter(new StringReader(docText));

  WhitespaceTokenizer ts = new WhitespaceTokenizer();
  final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
  try {
    ts.setReader(filter);
    ts.reset();
    while (ts.incrementToken()) {
      final String termString = termAttribute.toString();
      if (termString.equals(start))
        result[0] = offsetAttribute.startOffset();
      if (termString.equals(end)) {
        result[1] = offsetAttribute.endOffset();
        return result;
      }
    }
    ts.end();
  } catch (IOException e) {
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(ts);
  }
  return result;
}
 
Example 5
Source File: XmlInterpolationTest.java    From SolrTextTagger with Apache License 2.0 5 votes vote down vote up
private int[] analyzeTagOne(String docText, String start, String end) {
  int[] result = {-1, -1};

  Reader filter = new HTMLStripCharFilter(new StringReader(docText));

  WhitespaceTokenizer ts = new WhitespaceTokenizer();
  final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
  try {
    ts.setReader(filter);
    ts.reset();
    while (ts.incrementToken()) {
      final String termString = termAttribute.toString();
      if (termString.equals(start))
        result[0] = offsetAttribute.startOffset();
      if (termString.equals(end)) {
        result[1] = offsetAttribute.endOffset();
        return result;
      }
    }
    ts.end();
  } catch (IOException e) {
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(ts);
  }
  return result;
}