Java Code Examples for org.apache.lucene.analysis.BaseTokenStreamTestCase

The following examples show how to use org.apache.lucene.analysis.BaseTokenStreamTestCase. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 6 votes vote down vote up
/** variation sequence */
public void testEmojiVariationSequence() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "#️⃣",
      new String[] { "#️⃣" },
      new String[] { "<EMOJI>" });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "3️⃣",
      new String[] { "3️⃣",},
      new String[] { "<EMOJI>" });

  // text presentation sequences
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
      new String[] { },
      new String[] { });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
      new String[] { "3\uFE0E",},
      new String[] { "<NUM>" });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
      new String[] { "\u2B55",},
      new String[] { "<EMOJI>" });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
      new String[] { "\u2B55", "\u200D\u2B55"},
      new String[] { "<EMOJI>", "<EMOJI>" });
}
 
Example 2
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
/** variation sequence */
public void testEmojiVariationSequence() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "#️⃣",
      new String[] { "#️⃣" },
      new String[] { "<EMOJI>" });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "3️⃣",
      new String[] { "3️⃣",},
      new String[] { "<EMOJI>" });

  // text presentation sequences
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
      new String[] { },
      new String[] { });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
      new String[] { "3\uFE0E",},
      new String[] { "<NUM>" });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
      new String[] { "\u2B55",},
      new String[] { "<EMOJI>" });
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
      new String[] { "\u2B55", "\u200D\u2B55"},
      new String[] { "<EMOJI>", "<EMOJI>" });
}
 
Example 3
Source Project: lucene-solr   Source File: TestUAX29URLEmailAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
public void testMailtoSchemeEmails () throws Exception {
  // See LUCENE-3880
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "MAILTO:[email protected]",
      new String[] {"mailto", "[email protected]"},
      new String[] { "<ALPHANUM>", "<EMAIL>" });

  // TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
  BaseTokenStreamTestCase.assertAnalyzesTo
      (a,  "mailto:[email protected],[email protected][email protected]"
          + "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that",
          new String[] { "mailto",
              "[email protected]",
              // TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
              ",[email protected]",
              "[email protected]", // TODO: split field keys/values
              "subject", "subjectivity",
              "body", "corpusivity", "20or", "20something","20like", "20that" }, // TODO: Hex decoding + re-tokenization
          new String[] { "<ALPHANUM>",
              "<EMAIL>",
              "<EMAIL>",
              "<EMAIL>",
              "<ALPHANUM>", "<ALPHANUM>",
              "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
}
 
Example 4
Source Project: crate   Source File: SimplePhoneticAnalysisTests.java    License: Apache License 2.0 5 votes vote down vote up
public void testPhoneticTokenFilterDaitchMotokoff() throws IOException {
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("daitch_mokotoff");
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("chauptman"));
    String[] expected = new String[] { "473660", "573660" };
    assertThat(filterFactory.create(tokenizer), instanceOf(DaitchMokotoffSoundexFilter.class));
    BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
}
 
Example 5
public void testBasic() throws Exception {
  String text = "Wuthering FooBar distant goldeN ABC compote";
  Map<String,String> args = new HashMap<>();
  args.put("ignoreCase", "true");
  args.put("protected", "protected-1.txt,protected-2.txt");  // Protected: foobar, jaxfopbuz, golden, compote
  args.put("wrappedFilters", "lowercase");

  ResourceLoader loader = new SolrResourceLoader(TEST_PATH().resolve("collection1"));
  ProtectedTermFilterFactory factory = new ProtectedTermFilterFactory(args);
  factory.inform(loader);

  TokenStream ts = factory.create(whitespaceMockTokenizer(text));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts,
      new String[] { "wuthering", "FooBar", "distant", "goldeN", "abc", "compote" });
}
 
Example 6
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testHugeDoc() throws IOException {
  StringBuilder sb = new StringBuilder();
  char whitespace[] = new char[4094];
  Arrays.fill(whitespace, ' ');
  sb.append(whitespace);
  sb.append("testing 1234");
  String input = sb.toString();
  UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory());
  tokenizer.setReader(new StringReader(input));
  BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
 
Example 7
Source Project: crate   Source File: SimplePhoneticAnalysisTests.java    License: Apache License 2.0 5 votes vote down vote up
public void testPhoneticTokenFilterBeiderMorseNoLanguage() throws IOException {
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilter");
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("ABADIAS"));
    String[] expected = new String[] { "abYdias", "abYdios", "abadia", "abadiaS", "abadias", "abadio", "abadioS", "abadios", "abodia",
            "abodiaS", "abodias", "abodio", "abodioS", "abodios", "avadias", "avadios", "avodias", "avodios", "obadia", "obadiaS",
            "obadias", "obadio", "obadioS", "obadios", "obodia", "obodiaS", "obodias", "obodioS" };
    BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
}
 
Example 8
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testLUCENE1545() throws Exception {
  /*
   * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
   * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
   * Expected result is only on token "moͤchte".
   */
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); 
}
 
Example 9
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testApostrophesSA() throws Exception {
  // internal apostrophes: O'Reilly, you're, O'Reilly's
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
}
 
Example 10
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testVariousTextSA() throws Exception {
  // various
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
}
 
Example 11
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testNumericSA() throws Exception {
  // floating point, serial, model numbers, ip addresses, etc.
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
}
 
Example 12
Source Project: crate   Source File: SimplePhoneticAnalysisTests.java    License: Apache License 2.0 5 votes vote down vote up
public void testPhoneticTokenFilterBeiderMorseWithLanguage() throws IOException {
    TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilterfrench");
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("Rimbault"));
    String[] expected = new String[] { "rimbD", "rimbDlt", "rimba", "rimbalt", "rimbo", "rimbolt", "rimbu", "rimbult", "rmbD", "rmbDlt",
            "rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" };
    BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
}
 
Example 13
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testApostrophesSA() throws Exception {
  // internal apostrophes: O'Reilly, you're, O'Reilly's
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
}
 
Example 14
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testVariousTextSA() throws Exception {
  // various
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
}
 
Example 15
Source Project: lucene-solr   Source File: TestUAX29URLEmailAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testLUCENE1545() throws Exception {
  /*
   * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTER E.
   * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
   * Expected result is only one token "moͤchte".
   */
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); 
}
 
Example 16
Source Project: lucene-solr   Source File: TestUAX29URLEmailAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testApostrophesSA() throws Exception {
  // internal apostrophes: O'Reilly, you're, O'Reilly's
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"jim's"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly's"});
}
 
Example 17
Source Project: lucene-solr   Source File: TestUAX29URLEmailAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testNumericSA() throws Exception {
  // floating point, serial, model numbers, ip addresses, etc.
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
}
 
Example 18
Source Project: lucene-solr   Source File: TestUAX29URLEmailAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testVariousTextSA() throws Exception {
  // various
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"});
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"});
}
 
Example 19
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testHugeDoc() throws IOException {
  StringBuilder sb = new StringBuilder();
  char whitespace[] = new char[4094];
  Arrays.fill(whitespace, ' ');
  sb.append(whitespace);
  sb.append("testing 1234");
  String input = sb.toString();
  StandardTokenizer tokenizer = new StandardTokenizer();
  tokenizer.setReader(new StringReader(input));
  BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
 
Example 20
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 4 votes vote down vote up
public void testJapanese() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ",
      new String[] { "仮", "名", "遣", "い", "カタカナ" },
      new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
 
Example 21
Source Project: lucene-solr   Source File: TestJapaneseNumberFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testFunnyIssue() throws Exception {
  BaseTokenStreamTestCase.checkAnalysisConsistency(
      random(), analyzer, true, "〇〇\u302f\u3029\u3039\u3023\u3033\u302bB", true
  );
}
 
Example 22
Source Project: lucene-solr   Source File: TestICUTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testKorean() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음",
      new String[] { "훈민정음" },
      new String[] { "<HANGUL>" });
}
 
Example 23
Source Project: lucene-solr   Source File: TestICUTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testJapanese() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ",
      new String[] { "仮", "名", "遣", "い", "カタカナ" },
      new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
}
 
Example 24
Source Project: lucene-solr   Source File: TestStandardAnalyzer.java    License: Apache License 2.0 4 votes vote down vote up
public void testKoreanSA() throws Exception {
  // Korean words
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
}
 
Example 25
Source Project: lucene-solr   Source File: TestKoreanNumberFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testFunnyIssue() throws Exception {
  BaseTokenStreamTestCase.checkAnalysisConsistency(
      random(), analyzer, true, "영영\u302f\u3029\u3039\u3023\u3033\u302bB", true
  );
}
 
Example 26
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testArmenian() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։",
      new String[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", 
      "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" } );
}
 
Example 27
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testAmharic() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም",
      new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } );
}
 
Example 28
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testArabic() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.",
      new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
      "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); 
}
 
Example 29
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testAramaic() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀",
      new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ",
      "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
}
 
Example 30
Source Project: lucene-solr   Source File: TestUAX29URLEmailTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
public void testBengali() throws Exception {
  BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।",
      new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার",
      "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" });
}