Java Code Examples for org.apache.lucene.analysis.Analyzer#close()

The following examples show how to use org.apache.lucene.analysis.Analyzer#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestRemoveDuplicatesTokenFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(3);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymGraphFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
    analyzer.close();
  }
}
 
Example 2
Source File: GreekAnalyzerTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test the analysis of various greek strings.
 *
 * @throws Exception in case an error occurs
 */
public void testAnalyzer() throws Exception {
  Analyzer a = new GreekAnalyzer();
  // Verify the correct analysis of capitals and small accented letters, and
  // stemming
  assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας",
      new String[] { "μια", "εξαιρετ", "καλ", "πλουσ", "σειρ", "χαρακτηρ",
      "ελληνικ", "γλωσσ" });
  // Verify the correct analysis of small letters with diaeresis and the elimination
  // of punctuation marks
  assertAnalyzesTo(a, "Προϊόντα (και)     [πολλαπλές] - ΑΝΑΓΚΕΣ",
      new String[] { "προιοντ", "πολλαπλ", "αναγκ" });
  // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
  // as well as the elimination of stop words
  assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
      new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
  a.close();
}
 
Example 3
Source File: TestSynonymMapFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDoKeepOrig() throws Exception {
  b = new SynonymMap.Builder(true);
  add("a b", "foo", true);

  final SynonymMap map = b.build();

  final Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
    }
  };

  assertAnalyzesTo(analyzer, "a b c",
                   new String[] {"a", "foo", "b", "c"},
                   new int[] {0, 0, 2, 4},
                   new int[] {1, 3, 3, 5},
                   null,
                   new int[] {1, 0, 1, 1},
                   new int[] {1, 2, 1, 1},
                   true);
  checkAnalysisConsistency(random(), analyzer, false, "a b c");
  analyzer.close();
}
 
Example 4
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Multiple input tokens map to a single output token */
public void testManyToOne() throws Exception {
  SynonymMap.Builder b = new SynonymMap.Builder();
  add(b, "a b c", "z", true);

  Analyzer a = getAnalyzer(b, true);
  assertAnalyzesTo(a,
                   "a b c d",
                   new String[] {"z", "a", "b", "c", "d"},
                   new int[]    { 0,   0,   2,   4,   6},
                   new int[]    { 5,   1,   3,   5,   7},
                   new String[] {"SYNONYM", "word", "word", "word", "word"},
                   new int[]    { 1,   0,   1,   1,   1},
                   new int[]    { 3,   1,   1,   1,   1});
  a.close();
}
 
Example 5
Source File: TestFrenchMinimalStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new FrenchMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 6
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRecursion3() throws Exception {
  SynonymMap.Builder b = new SynonymMap.Builder(true);
  final boolean keepOrig = true;
  add(b, "zoo zoo", "zoo", keepOrig);
  Analyzer a = getFlattenAnalyzer(b, true);

  assertAnalyzesTo(a, "zoo zoo $ zoo",
     new String[]{"zoo", "zoo", "zoo", "$", "zoo"},
      new int[]{1, 0, 1, 1, 1});
  a.close();
}
 
Example 7
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBuilderNoDedup() throws Exception {
  SynonymMap.Builder b = new SynonymMap.Builder(false);
  final boolean keepOrig = false;
  add(b, "a b", "ab", keepOrig);
  add(b, "a b", "ab", keepOrig);
  add(b, "a b", "ab", keepOrig);
  Analyzer a = getAnalyzer(b, true);

  assertAnalyzesTo(a, "a b",
      new String[] { "ab", "ab", "ab" },
      new int[] { 1, 0, 0 });
  a.close();
}
 
Example 8
Source File: TestHyphenatedWordsFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 9
Source File: TestNorwegianLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Test against a Nynorsk vocabulary file */
public void testNynorskVocabulary() throws IOException {  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, NYNORSK));
    }
  };
  assertVocabulary(analyzer, Files.newInputStream(getDataPath("nn_light.txt")));
  analyzer.close();
}
 
Example 10
Source File: TestSmartChineseAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testChineseStopWordsDefaultTwoPhrases() throws Exception {
  Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
  String sentence = "我购买了道具和服装。 我购买了道具和服装。";
  String result[] = { "我", "购买", "了", "道具", "和", "服装", "我", "购买", "了", "道具", "和", "服装" };
  assertAnalyzesTo(ca, sentence, result);
  ca.close();
}
 
Example 11
Source File: TestJapaneseAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test5thCuriousString() throws Exception {
  final String s = "ihcp gyqnaznr \u2d21\u2d07\u2d0a\u2d02\u2d23\u2d27\u2d13\u2d02 \u1d202\u1d223\u1d248\u1d222 \ufb0d\ufb28\ufb2c\ufb0f\ufb05 \u2c25\u2c43\u2c10\u2c03\u2c2f\u2c0e\u2c15 nwto \ua785\ua7d8\ua7f2\ua77f\ua7cf\ua781\ua77f\ua757\ua72c\ua7be\ua7eb\ua73a &#11336415<? tfxhjr bgupy aI\u5c8f8D\ue8a7\uffc8\ub7971\ueb64\ue956\u05da geufse l sqiuthbirdmc qvnqzpwvjogk ltupf \u1f073\u1f08a\u1f09d\u1f09a nfllv \u03ac\udd762\u029c  qgvkssnrxeh \u06aa\u0620\u06a6\u0623 ? \u9357b\u13677\u46f2\ue5bd mrag xdd \u10b6d\u10b61 \u07a4\u0721\ue723\ue76eM \u2ffc\u2ff1 \u123e1 tzouw \ufadaZ\u0167\u071d\u014c \u30dc\u30e0\u30d4\u30f2\u30e0\u30dd\u30a5 xd ugygzxtz ]*[|]]|]-(-[ upcx \ue01e5\ue015b\ue01ce\ue01c0\ue0107 tlzil \ua60d\ua596\ua58a\ua577\ua61b\ua5bb\ua5d2 ijhsxwh fsbhxwc pic gnygchvo \ua690\ua653\ua678\ua673\ua653\ua657\ua693\ua69f\ua69d \u02e4\u02c9\u02ca\u02e8\u02ce\u02fb\u02ba bpxuulgoq \u0019:\ud6523 ((([++f?.[ (c][)] \u24a0\u247b\u24e0\u2478 \ue138 \uf973\u01e1 \ufb22\ufb3c\ufb14\ufb32\ufb3c\ufb27\ufb49\ufb09\ufb1c yfdulpnpb mgtbj zvakpplvu bxeek umkvf eobqdmex revjiop qtbnqfcn  \u170d\u171e\u1712\u171c\u1700\u1703\u171e\u1707\u1709\u1709 \u32fe\uedc0 \uea26uL\u0096\ue920\u04f6JF\uef46\u0004 geoyrl \u0309\u346106\uee47\u10103c\ue329\u008a\uf19c\u0003 \u3007\u301c\u301e\u300d\u3011\u3009\u3007\u3017\u3032 ]*+f?)).[. xhc \u8dde\u2a57 cczyuuqdmxt \u1d09\u1d33\u1d69 \ua83c\ua83a\ua830\ua83c\ua83e\ua832\ua832\ua831 \ufe69\ufe5d\ufe62\ufe57\ufe69\ufe5c\ufe63\ufe6e \u1f188\u1f19a\u1f111\u1f178\u1f121 ||+||-?) vqpdhyiy ozf \u440e\u04a0\\\u061b\u4ebbb vtdbotna  \u0702\u0003\uecea\ue2a7\u821e7\ufc92 xtyfrlkgns xr fpwlen wgmlz \ued79\u0001|\uf367\ue655e\u034e zlprrq \u2c2f\u2c4c\u2c30\u2c42\u2c52\u2c53\u2c04\u2c06\u2c23\u2c4c\u2c07 \ud4db\ue34a\u02be \u44edf>\u0693b kswwheh flz ktqgfe \u4de9I\u0001\u98411\u5504\u55641\u032b\ue3a9 C^l\ue564\u027f\u10b34f\uc46f aecihbou bp qrud eksbxkwgo pokyimh xomhw uiurixk pmpsmly \u3457\uf39c\ufafd\u22ae8 xr \u101ef\u101de \ue000b\ue0006 avijdmer \u1571\u160e\u15fc\u147f\u1488 zyhgksku \u0318\u0340 ) rd zlawdwej ickyyil \u1cf0\u1cf7\u1cef b]fe+?f?*? nqjccb btujcvxwdd tcakgxs fddow \u013b\uec4a\uf8cd\u78142\u2b70\uf3ae\u0214\u217a\ue657 \uedec\uecda\u0614\u1ae9\uf705\u0544\ufc09f \u1169 \ua599\ua517\ua5e5\ua576\ua5b5\ua528\ua60d\ua57c\ua638\ua552\ua618 \u27565\ue5ce\ue4f6#\u2389 bwxtsg \u0ce6B\u9ed1.\u05d8\ue235\u59e0A </><p>647910 bybgvsvuv \u0684\u8c7e\ua668E\ue7adR\u5250?\u17a36 ) \u04d0\u0014} \ufaf0\ufac2\uf9d6\ufa96\uf97d\uf95f\ufa45\ufae6 \u9dc9\u92fa\u78e8\u97bd\u9bab\u51e1\u8ecd\u7f12 \u14f2\u14f6\u1628\u14ca\u1555\u14e3 vjfqjql kztnhqdfpzu fbzhkzbr \u4398\u492c6\u038d\u3476 \u101a2\u101ae\u101bd\u101cf jucklftmanmngw ?><    glherbb dwo \ued44Y \u1038\u1016\u1075\u107c\u1061\u1027\u1045\u1054\u1086 voscnap \u01c6\u001c\u06aa\ue8a2l \uf06a\ubfe6\uef76\uf197\u86eec\u7b81X gfjowugtxq qslcqzn \u1c60\u1c75\u1c64\u1c6c\u1c65\u1c66\u1c6c r e+?-|b| \u19cd\u1991\u19a7\u19a0\u19d3\u19d1\u19d0\u1999 \u177f okso \u8f87| \ue56cm\u025c\ubc039\ue415\u0002  uljephzf vaspgv gdxtritw ifgdwcikkyiob -[[ jgswx vegjwrermtv lxvcxe lg \u26ab\u26d6\u263c\u2657\u2651\u26d6 \u10b6e\u10b65 %\ue107 \uf803\u0417\ufaa5P\uf08a \ueb35\u024f\u0690\ud3740\u05ad \ue0c0\uf6c7\u046a\uebd3\ue257\uf704 k cf hqzjydhegztm uwbbasg nbykogqlnbingdw lf <p> uvqswllbbozu \u0bc1\u0bfa\u0b9a\u0bcf\u0b80 -]+ \u3164\u3165\u3181\u318f\u3154 hjpdfmxu (d)( </  yi >\\'42 tpjbuxlz .[( puunlpd qwtpdequedgy \u1004d\u1007f\u10024\u10041\u10040 a\uf607 erxgt wqiyuuh zj \u31f9\u31f9\u31f1\u31f6\u31ff\u31f6 \u07ec jhtfnvhbpm \u846f9N\u0369 ser ystcwekly \u1770\u176b\u1765\u1764\u176a pkr \u171c\u1700\u171d\u1703 \u02fd\u02f1\u02e8\u02e0 \u9938\u9790\u652c\u85a0 hopzdmo \u2084\u2075\u209d\u2070\u2073\u207a\u2073\u2088\u2080\u2086\u207b\u2097 kjeuj \u1d064\u1d0ef\u1d0e6\u1d02b\u1d0d8 \u128d\u12c2\u12bc\u1309\u123e\u1305\u12c9\u126e\u1243\u1266\u1247 \u1006d\u10001\u1001e jvmo \u02eaw\u5db6b\u010b\u0682\u0fa7;\uae0c\uec6f\u5aaa6 \u01ec\ufeccfKt\u7af6 dhhddrl piofeczg \u2d2c\u2d05\u2d1f\u2d0e\u2d1b\u2d16 s\ufa04Gh\u001b\u0759\u05a6 ehhbgswb \ua9f0\ue3c2\u0208j \u212e\u2116\u2122\u2130\u2135\u2108\u2106\u214e \u1046e\u10456\u1046d fahjn lcfhxxxlj \u1011e\u10138\u1010c yurxoxykzhaq iwv \ue0e0\ue5a0\ue2c0\uead0\u1027ab\uf0a7k\ue6df0\u02e4 \u10907\u10907 a mxanvzwv iehu \u0770\u0766\u0768\u075a\u076f\u075c ><p>>\n?> |.?(-+] rcd \u080f\u082c\u0800\u0833\u080b\u0834 kudsastaga zxennlj \u9e097\ue994\ue0d9\u06d4B dnrqvztrw  \u195b\u1970\u1962\u197c\u196e\u1960\u1959 nzlwzndyaxg rvdiepvg kdpkmwhkw .||[() mbnzcm \u0748\u0016\u70b65\u0410\u22d9\u9e3e jrjelhyvgsibt ;\ubaf6\ua99d\u9086b wf  </sTYl amlkfl nswln rdiafhi hflgc \u06a1\uf3f1\u0003\ud202T \u101b9\u101b6 \u000b\u4bed\u9717\ue110R(\u9033\u04b6\uf736\u02f9 yjjfyzyv \u10463 \u0cfc\u0ce8\u0c9f ([b+-+)] 3\ufc76\ue76bp\u0008\u880e \uf8634\rV\u6bea1\ufd11\u0017\u70427 ffdgyd ;? tdl \uefd4\u0019\u60b0c\ue104\u05f7 \u3b28K\u01a1\u0562.#\u02d4 ftfahax \u19c6\u01c9\ud05a-U\u0242\ua1cbD qrkudkiemmbgi -.+]+- z \uaa69\uaa6f\uaa69\uaa67\uaa7f\uaa6e\uaa69 &\u020eH\ufb73 went fdt jmslj \u1738\u1721\u1730\u1724\u1733\u1731\u1727 kgnie cndxscz \u10148\u10152 \uaa38\uaa2f\uaa3a\uaa2f \uf42a5\u0288) \ua940\ua930\ua946\ua932\ua95f\ua955\ua939\ua932\ua93c zoi \ueac6\uff25AF \u6391\u310af6\u400f7T\ueab8 \u00169 ydkel znwh \uf99d\ufa1b\ufae2\uf976\uf96f\uf9a5\ufaa5\uf9f6\uf9ab tafdltwaby \u1c10\u1c0a\u1c30\u1c31\u1c4f\u1c45 </Br>& \u0943\u0965\u0964\u0958\u092f\u096a\u0931\u0948 \u0013\u42e2\ua5b5D\u5f98e\u5991\u0244 )||]- \u7864e\u0250\uca2b\u05d5 )[..?)) \u2df3\u2dfb\u2df8\u2dec\u2df1\u2de7\u2de9 htiato \u0014,\u0321\ue918\u05a5\u7a23e6\u532b2\u0486\uf52d ftiiziaz \ueaca\ub4af4\ufe06P wechywnla silxy \ufe08\ufe00 \ua6cc\ua6ae\ua6de\ua6ec\ua6ce\ua6ee\ua6a0\ua6b2\ua6cc\ua6e5\ua6f4\ua6e2\ua6eb\ua6a9 \ua88f\ua88c\ua896\ua89d\ua89e\ua887 \u30e7\u30ea\u30ee\u30ec\u30ec\u30ff\u30ce \u1cb78\u10e2b3\u001e\ua212 m ro \u3951\u3db1\u4bdd\u3cb8\u4672\u3fd4 \u27f0\u27fc\u27fa\u27f5\u27fa\u27fd\u27f9\u27f2\u27fe lsssf <!- \u3cd3\ufb6f\u166e2\u039f\ub641<:\u0599\u0468 \u1646\u0476\ud336\ue765cD\u73f5f\u8bc1\u001b hu \u1d604 mszttwsmbu in eirlbqt |(*]??] szfyeavpbxtv tpvpfyxtsmbnq kufa \uf8a7\ue07b\u768c4 onxmgkw znomzko \u03d1@\u6caea\u21e0+\u000c\u9a755 hqgrsxo \u10912\u10914 vrledoho bjgvgccaqpb vnkbxuy \u1a1a\u1a08\u1a17\u1a0f\u1a01\u1a0a\u1a09 \ue015d\ue01d8\ue01a1\ue01a1 aesvbf xfvdyownlg ocewl o\u0007' tvewmt jmnpfpvzz g hindokqsqok uqompm \ue652\u0015\u6be4e\u03ef rtr spccv nt smrksialynj \u10a48\u10a05\u10a54\u10a05\u10a4d\u10a43 \u307d\ue12fo-0\u06de\u4df57 \u253c\u257c\u2520\u2515\u255d\u250c wqaazzpnjbf \\\u01a4\u134b5\uca972\u0006\u0638\uf689\uf703 \u2265\u226a\u22a9\u2273\u22d5\u224f\u2274\u22d5 btilufh \u3eee\u05c8t\ue081+\u2f7ab\u0163 \u1f02b\u1f002\u1f00d jliarc jvc    \u0750\u046d\u0011\ufaaf |.-*))a+ bgce \u10b4a\u10b59\u10b5f\u10b45 \ud336\u01e1\u4765\u328e\u07b7 ckklfdr \u05c5\u079a\u0103\u041e\u3b7e\u02f8\uf4bf\u2943\ufd56q\u0472 jjks \ufd40\ufc7c\ufdf3\ufbd2\ufbb4\ufb64\ufcbf djzprnmparaf tzemq hafz njtf niccokn dzzfo dpqy \u10321\u10304\u10303\u1030b\u1030a +?+a qlexbl nptpehb \uaa75\uaa6f\uaa75\uaa7f\uaa7c\uaa71\uaa69\uaa7c wbpoee xxbpboxh \u0115\uefd8\u06ae\u6122\u02d2 \u10186\u10181\u10165\u10171 ci gpvc mvhvra \u3331\u330e slmlikfv m\u4394\u9d47\u0eb5>\u0562\u02eb ttudnzewbysvlr \u22e2\u22fa\u2285\u22ad\u2252 5\ub6b4\uf72ef\u0180\ueac8 \u075e\ud9b0cK^\u3fded\u66d4\u066b\u001a\u0091 \u13d5\u13d4 ..[ \u8cfa\u2554e\ufe4dM\u0017 chlax rdfphn \ub76c9 \u1093c\u1092f \u5821\ufa16w\u0542\uecce\u9b1d4 \u10b7d\u10b7f\u10b76 ibkbyhshddvsc  letbtcg &p cbzpnbk ]e-|[c+]] \u03c0\u03d2\u0384\u03f8\u03e2\u03c3\u0391\u03ff\u03c5 </  oz tqfexxl Z0\ua5b15\u0660 \u37c7\u0002\ucd8d\u6f71a, ojhzhl  \u25606\u27b07\u23bc9\u22017\u266b6\u29dce vtpmcefbgp aegcmc f][?.?.+.+ riddb \u6ae3\ua0c4\u1ab9e\u73821\uce3e\u5471\uf19f hmhpkak dv \u276f\u27b8\u2725\u2711\u271a\u2788 \u78cda\u0281\uf603\u05ab\ue4d4 +].? \uacdc\ubf02\u57d11\ud08de\ua3f2\uf065\uedb3\uef0f xwx pjrfdpqxhpw \uebf3\u1b63\ue386\ue33a[Z\u070d\u92dc\u61fd \u02bc\u02d3\u02cc\u02e1\u02b1\u02ce\u02c5 \uccad\uec1c\u29f8 wkcairs vxdp ihjz kmup oitabfffd \u10a5\u10c5\u10f3\u10eb\u10c2\u10ca\u10c2 \u0605\u06f9\u06a5 z .]*- tveygx \u137e\u136d\u1324 hnhr baiu ognjxxe fwidfbp \u10846\u10851 qkhgjb x ]* fxbvmao </scr \u10c2c5 &#</p>? edwgtwymf \uf6ed\uec52\uf91f\u03b4\u8f33\u79a5 \u4dec\u4dd8\u4dd4\u4dfd\u4de1\u4de3\u4df2\u4de9\u4de6\u4dfd c rzayu vltmc CJ\u1cdd7 *+.-|(c)a \u77e09:U\ue4b8\u7664 vlbis edr \ubde91\u0333k\u0230\u2e05\u81cd *+[.*]+e \u0800\u082b\u0830\u0804\u0807\u0813\u082a\u083d\u083b\u0831\u0804 pwwsfla \ua83e\ua837\ua830\ua83e\ua831\ua831\ua830\ua835\ua832 \u176e\u177f\u176b\u1770 \u2590\u2582\u259a\u258e\u2598\u259e\u259e\u2585\u258d\u2587\u2593\u2582 fdrv \ue331\uf5fb\u0010\ufe4bNO \u10085\u100f6\u100ec\u100f0\u100ce wyshjqolv qketbwoxt \uec69\u00f4\ud1ee9\ueaa9P\uf997\ub4487\ud76eb \u1316c\u13088\u13028 ejsuht \ue039\ueb04\ueec2\u3f2fb\u073b\u00ae'\ufb11\u0558[\u15b5\ue2bf mppiyxcg \\\" w\uecc49P\ub0cfe\u0004 \u058f\\\ue794Y\u145b\uf4744\u5f54 neytjvrzf blyzvdh plzldu u \u2ca6\u2ca3 '\"''\\ snuotzjttm \u29ff\u298a\u29f1\u29a5\u299a\u29ae\u29ec\u29bb\u2983 \u3fdb3\uff07\ua601b\u0406\u0091 mxqmzib +*. najy r\u74c4\ued24\uf631\u04c0~HG\u0017I vhbjdhhcrn mtqwskrpj xhh fa kalvhruartx **]a* eyggsjs  &#x78b405 pns ";
  final Analyzer a = new JapaneseAnalyzer(null, Mode.SEARCH,
                                          JapaneseAnalyzer.getDefaultStopSet(),
                                          JapaneseAnalyzer.getDefaultStopTags());
  checkAnalysisConsistency(random(), a, false, s);
  a.close();
}
 
Example 12
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMatching() throws Exception {
  SynonymMap.Builder b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  add(b, "a b", "ab", keepOrig);
  add(b, "a c", "ac", keepOrig);
  add(b, "a", "aa", keepOrig);
  add(b, "b", "bb", keepOrig);
  add(b, "z x c v", "zxcv", keepOrig);
  add(b, "x c", "xc", keepOrig);

  Analyzer a = getFlattenAnalyzer(b, true);

  checkOneTerm(a, "$", "$");
  checkOneTerm(a, "a", "aa");
  checkOneTerm(a, "b", "bb");

  assertAnalyzesTo(a, "a $",
      new String[]{"aa", "$"},
      new int[]{1, 1});

  assertAnalyzesTo(a, "$ a",
      new String[]{"$", "aa"},
      new int[]{1, 1});

  assertAnalyzesTo(a, "a a",
      new String[]{"aa", "aa"},
      new int[]{1, 1});

  assertAnalyzesTo(a, "z x c v",
      new String[]{"zxcv"},
      new int[]{1});

  assertAnalyzesTo(a, "z x c $",
      new String[]{"z", "xc", "$"},
      new int[]{1, 1, 1});
  a.close();
}
 
Example 13
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRecursion2() throws Exception {
  SynonymMap.Builder b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  add(b, "zoo", "zoo", keepOrig);
  add(b, "zoo", "zoo zoo", keepOrig);
  Analyzer a = getAnalyzer(b, true);

  // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo");
  assertAnalyzesTo(a, "zoo zoo $ zoo",
                   new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" },
      new int[] { 1, 0, 1, 1, 0, 1, 1, 1, 0, 1 });
  a.close();
}
 
Example 14
Source File: TestThaiAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOffsets() throws Exception {
  Analyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET);
  assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี",
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
      new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
  analyzer.close();
}
 
Example 15
Source File: TestCJKWidthFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 16
Source File: TestGermanStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 17
Source File: TestTurkishLowerCaseFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 18
Source File: TestUkrainianAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  Analyzer analyzer = new UkrainianMorfologikAnalyzer();
  checkRandomData(random(), analyzer, 200 * RANDOM_MULTIPLIER);
  analyzer.close();
}
 
Example 19
Source File: TestBulgarianAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testReusableTokenStream() throws IOException {
  Analyzer a = new BulgarianAnalyzer();
  assertAnalyzesTo(a, "документи", new String[] {"документ"});
  assertAnalyzesTo(a, "документ", new String[] {"документ"});
  a.close();
}
 
Example 20
Source File: TestSmartChineseAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDelimiters() throws Exception {
  Analyzer analyzer = new SmartChineseAnalyzer(true);
  assertAnalyzesTo(analyzer, "我购买︱ Tests 了道具和服装",
      new String[] { "我", "购买", "test", "了", "道具", "和", "服装"});
  analyzer.close();
}