Java Code Examples for org.apache.lucene.analysis.MockTokenizer#end()

The following examples show how to use org.apache.lucene.analysis.MockTokenizer#end() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestSynonymMapFilter.java From lucene-solr with Apache License 2.0

6 votes

public void testOutputHangsOffEnd() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  // b hangs off the end (no input token under it):
  add("a", "a b", keepOrig);
  tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  tokensIn.setReader(new StringReader("a"));
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                b.build(),
                                true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);

  // Make sure endOffset inherits from previous input token:
  verify("a", "a b:1");
}

Example 2

Source File: TestSynonymMapFilter.java From lucene-solr with Apache License 2.0

5 votes

public void testBasic2() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
  add("bbb", "bbbb1 bbbb2", keepOrig);
  tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE,
                               true);
  tokensIn.setReader(new StringReader("a"));
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                   b.build(),
                                   true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);

  if (keepOrig) {
    verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
    verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
  } else {
    verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
    verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
  }
}

Example 3

Source File: TestSynonymMapFilter.java From lucene-solr with Apache License 2.0

4 votes

public void testBasic() throws Exception {
  b = new SynonymMap.Builder(true);
  add("a", "foo", true);
  add("a b", "bar fee", true);
  add("b c", "dog collar", true);
  add("c d", "dog harness holder extras", true);
  add("m c e", "dog barks loudly", false);
  add("i j k", "feep", true);

  add("e f", "foo bar", false);
  add("e f", "baz bee", false);

  add("z", "boo", false);
  add("y", "bee", true);

  tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  tokensIn.setReader(new StringReader("a"));
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                   b.build(),
                                   true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);

  verify("a b c", "a/bar b/fee c");

  // syn output extends beyond input tokens
  verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");

  verify("a b a", "a/bar b/fee a/foo");

  // outputs that add to one another:
  verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");

  // two outputs for same input
  verify("e f", "foo/baz bar/bee");

  // verify multi-word / single-output offsets:
  verify("g i j k g", "g i/feep:7_3 j k g");

  // mixed keepOrig true/false:
  verify("a m c e x", "a/foo dog barks loudly x");
  verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
  assertTrue(tokensOut.getCaptureCount() > 0);

  // no captureStates when no syns matched
  verify("p q r s t", "p q r s t");
  assertEquals(0, tokensOut.getCaptureCount());

  // no captureStates when only single-input syns, w/ no
  // lookahead needed, matched
  verify("p q z y t", "p q boo y/bee t");
  assertEquals(0, tokensOut.getCaptureCount());
}

Example 4

Source File: TestSynonymMapFilter.java From lucene-solr with Apache License 2.0

4 votes

public void testRandom() throws Exception {
  
  final int alphabetSize = TestUtil.nextInt(random(), 2, 7);

  final int docLen = atLeast(3000);
  //final int docLen = 50;

  final String document = getRandomString('a', alphabetSize, docLen);

  if (VERBOSE) {
    System.out.println("TEST: doc=" + document);
  }

  final int numSyn = atLeast(5);
  //final int numSyn = 2;

  final Map<String,OneSyn> synMap = new HashMap<>();
  final List<OneSyn> syns = new ArrayList<>();
  final boolean dedup = random().nextBoolean();
  if (VERBOSE) {
    System.out.println("  dedup=" + dedup);
  }
  b = new SynonymMap.Builder(dedup);
  for(int synIDX=0;synIDX<numSyn;synIDX++) {
    final String synIn = getRandomString('a', alphabetSize, TestUtil.nextInt(random(), 1, 5)).trim();
    OneSyn s = synMap.get(synIn);
    if (s == null) {
      s = new OneSyn();
      s.in = synIn;
      syns.add(s);
      s.out = new ArrayList<>();
      synMap.put(synIn, s);
      s.keepOrig = random().nextBoolean();
    }
    final String synOut = getRandomString('0', 10, TestUtil.nextInt(random(), 1, 5)).trim();
    s.out.add(synOut);
    add(synIn, synOut, s.keepOrig);
    if (VERBOSE) {
      System.out.println("  syns[" + synIDX + "] = " + s.in + " -> " + s.out + " keepOrig=" + s.keepOrig);
    }
  }

  tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE,
                               true);
  tokensIn.setReader(new StringReader("a"));
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                   b.build(),
                                   true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);

  if (dedup) {
    pruneDups(syns);
  }

  final String expected = slowSynMatcher(document, syns, 5);

  if (VERBOSE) {
    System.out.println("TEST: expected=" + expected);
  }

  verify(document, expected);
}