Java Code Examples for org.apache.lucene.util.TestUtil#randomUnicodeString()

The following examples show how to use org.apache.lucene.util.TestUtil#randomUnicodeString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestRawResponseWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Regardless of base writer, the String in should be the same as the String out 
 * when response is a raw ContentStream written to a Writer (or OutputStream)
 */
public void testRawStringContentStream()  throws IOException {
  SolrQueryResponse rsp = new SolrQueryResponse();
  String data = TestUtil.randomUnicodeString(random());
  StringStream stream = new StringStream(data);

  stream.setContentType(TestUtil.randomSimpleString(random()));
  rsp.add(RawResponseWriter.CONTENT, stream);
  
  for (RawResponseWriter writer : allWriters) {
    assertEquals(stream.getContentType(), writer.getContentType(req(), rsp));

    // we should have the same string if we use a Writer
    StringWriter sout = new StringWriter();
    writer.write(sout, req(), rsp);
    assertEquals(data, sout.toString());

    // we should have UTF-8 Bytes if we use an OutputStream
    ByteArrayOutputStream bout = new ByteArrayOutputStream();
    writer.write(bout, req(), rsp);
    assertEquals(data, bout.toString(StandardCharsets.UTF_8.toString()));
  }
}
 
Example 2
Source File: TestPrefixCodedTerms.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testRandom() {
  Set<Term> terms = new TreeSet<>();
  int nterms = atLeast(10000);
  for (int i = 0; i < nterms; i++) {
    Term term = new Term(TestUtil.randomUnicodeString(random(), 2), TestUtil.randomUnicodeString(random()));
    terms.add(term);
  }    
  
  PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
  for (Term ref: terms) {
    b.add(ref);
  }
  PrefixCodedTerms pb = b.finish();
  
  TermIterator iter = pb.iterator();
  Iterator<Term> expected = terms.iterator();
  assertEquals(terms.size(), pb.size());
  //System.out.println("TEST: now iter");
  while (iter.next() != null) {
    assertTrue(expected.hasNext());
    assertEquals(expected.next(), new Term(iter.field(), iter.bytes));
  }

  assertFalse(expected.hasNext());
}
 
Example 3
Source File: TestRawResponseWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Regardless of base writer, the bytes in should be the same as the bytes out 
 * when response is a raw ContentStream written to an OutputStream
 */
public void testRawBinaryContentStream()  throws IOException {
  SolrQueryResponse rsp = new SolrQueryResponse();
  byte[] data = new byte[TestUtil.nextInt(random(), 10, 2048)];
  random().nextBytes(data);
  ByteArrayStream stream = new ByteArrayStream(data, TestUtil.randomUnicodeString(random()));

  stream.setContentType(TestUtil.randomSimpleString(random()));
  rsp.add(RawResponseWriter.CONTENT, stream);
  
  for (RawResponseWriter writer : allWriters) {
    assertEquals(stream.getContentType(), writer.getContentType(req(), rsp));
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    writer.write(out, req(), rsp);
    assertArrayEquals(data, out.toByteArray());
  }
}
 
Example 4
Source File: TestDuelingAnalyzers.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testLetterUnicodeHuge() throws Exception {
  Random random = random();
  int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge
  MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
  left.setMaxTokenLength(255); // match CharTokenizer's max token length
  Analyzer right = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };
  int numIterations = atLeast(10);
  for (int i = 0; i < numIterations; i++) {
    String s = TestUtil.randomUnicodeString(random, maxLength);
    assertEquals(s, left.tokenStream("foo", newStringReader(s)), 
                 right.tokenStream("foo", newStringReader(s)));
  }
  IOUtils.close(left, right);
}
 
Example 5
Source File: TestDuelingAnalyzers.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testLetterUnicode() throws Exception {
  Random random = random();
  Analyzer left = new MockAnalyzer(random(), jvmLetter, false);
  Analyzer right = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
      return new TokenStreamComponents(tokenizer, tokenizer);
    }
  };
  for (int i = 0; i < 200; i++) {
    String s = TestUtil.randomUnicodeString(random);
    assertEquals(s, left.tokenStream("foo", newStringReader(s)), 
                 right.tokenStream("foo", newStringReader(s)));
  }
  IOUtils.close(left, right);
}
 
Example 6
Source File: TestCodepointCountFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testRandomStrings() throws IOException {
  for (int i = 0; i < 10000; i++) {
    String text = TestUtil.randomUnicodeString(random(), 100);
    int min = TestUtil.nextInt(random(), 0, 100);
    int max = TestUtil.nextInt(random(), 0, 100);
    int count = text.codePointCount(0, text.length());
    if(min>max){
      int temp = min;
      min = max;
      max = temp;
    }
    boolean expected = count >= min && count <= max;
    TokenStream stream = new KeywordTokenizer();
    ((Tokenizer)stream).setReader(new StringReader(text));
    stream = new CodepointCountFilter(stream, min, max);
    stream.reset();
    assertEquals(expected, stream.incrementToken());
    stream.end();
    stream.close();
  }
}
 
Example 7
Source File: FieldTermStackTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testTermInfoComparisonConsistency() {
  TermInfo a = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 );
  TermInfo b = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 1, 1 );
  TermInfo c = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 2, 1 );
  TermInfo d = new TermInfo( TestUtil.randomUnicodeString(random()), 0, 0, 0, 1 );

  assertConsistentEquals( a, a );
  assertConsistentEquals( b, b );
  assertConsistentEquals( c, c );
  assertConsistentEquals( d, d );
  assertConsistentEquals( a, d );
  assertConsistentLessThan( a, b );
  assertConsistentLessThan( b, c );
  assertConsistentLessThan( a, c );
  assertConsistentLessThan( d, b );
  assertConsistentLessThan( d, c );
}
 
Example 8
Source File: TestFieldCache.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static String generateString(int i) {
  String s = null;
  if (i > 0 && random().nextInt(3) == 1) {
    // reuse past string -- try to find one that's not null
    for(int iter = 0; iter < 10 && s == null;iter++) {
      s = unicodeStrings[random().nextInt(i)];
    }
    if (s == null) {
      s = TestUtil.randomUnicodeString(random());
    }
  } else {
    s = TestUtil.randomUnicodeString(random());
  }
  return s;
}
 
Example 9
Source File: TestDeterminizeLexicon.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testLexicon() throws Exception {
  int num = atLeast(1);
  for (int i = 0; i < num; i++) {
    automata.clear();
    terms.clear();
    for (int j = 0; j < 5000; j++) {
      String randomString = TestUtil.randomUnicodeString(random());
      terms.add(randomString);
      automata.add(Automata.makeString(randomString));
    }
    assertLexicon();
  }
}
 
Example 10
Source File: SolrCoreMetricManagerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoadReporter() throws Exception {
  Random random = random();

  String className = MockMetricReporter.class.getName();
  String reporterName = TestUtil.randomUnicodeString(random);
  String taggedName = reporterName + "@" + coreMetricManager.getTag();

  Map<String, Object> attrs = new HashMap<>();
  attrs.put(FieldType.CLASS_NAME, className);
  attrs.put(CoreAdminParams.NAME, reporterName);

  boolean shouldDefineConfigurable = random.nextBoolean();
  String configurable = TestUtil.randomUnicodeString(random);
  if (shouldDefineConfigurable) attrs.put("configurable", configurable);

  boolean shouldDefinePlugin = random.nextBoolean();
  PluginInfo pluginInfo = shouldDefinePlugin ? new PluginInfo(TestUtil.randomUnicodeString(random), attrs) : null;

  try {
    metricManager.loadReporter(coreMetricManager.getRegistryName(), coreMetricManager.getCore(),
        pluginInfo, coreMetricManager.getTag());
    assertNotNull(pluginInfo);
    Map<String, SolrMetricReporter> reporters = metricManager.getReporters(coreMetricManager.getRegistryName());
    assertTrue("reporters.size should be > 0, but was + " + reporters.size(), reporters.size() > 0);
    assertNotNull("reporter " + reporterName + " not present among " + reporters, reporters.get(taggedName));
    assertTrue("wrong reporter class: " + reporters.get(taggedName), reporters.get(taggedName) instanceof MockMetricReporter);
  } catch (IllegalArgumentException e) {
    assertTrue(pluginInfo == null || attrs.get("configurable") == null);
    assertNull(metricManager.getReporters(coreMetricManager.getRegistryName()).get(taggedName));
  }
}
 
Example 11
Source File: TestSimpleQueryParser.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRandomQueries() throws Exception {
  for (int i = 0; i < 1000; i++) {
    String query = TestUtil.randomUnicodeString(random());
    parse(query); // no exception
    parseKeyword(query, TestUtil.nextInt(random(), 0, 1024)); // no exception
  }
}
 
Example 12
Source File: SolrJmxReporterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private PluginInfo createReporterPluginInfo(String rootName, boolean enabled) {
  Random random = random();
  String className = SolrJmxReporter.class.getName();
  String reporterName = PREFIX + TestUtil.randomSimpleString(random, 5, 10);
    
  
  Map<String, Object> attrs = new HashMap<>();
  attrs.put(FieldType.CLASS_NAME, className);
  attrs.put(CoreAdminParams.NAME, reporterName);
  attrs.put("rootName", rootName);
  attrs.put("enabled", enabled);

  try {
    String agentId = (String) TEST_MBEAN_SERVER.getAttribute
      (new ObjectName("JMImplementation:type=MBeanServerDelegate"),
       "MBeanServerId");
    attrs.put("agentId", agentId);
  } catch (Exception e) {
    throw new RuntimeException("Unable to determine agentId of MBeanServer: " + e.getMessage(), e);
  }
  boolean shouldOverrideDomain = random.nextBoolean();
  if (shouldOverrideDomain) {
    domain = PREFIX + TestUtil.randomSimpleString(random);
    attrs.put("domain", domain);
  }

  return new PluginInfo(TestUtil.randomUnicodeString(random), attrs);
}
 
Example 13
Source File: NGramTokenizerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFullUTF8Range() throws IOException {
  final int minGram = TestUtil.nextInt(random(), 1, 100);
  final int maxGram = TestUtil.nextInt(random(), minGram, 100);
  final String s = TestUtil.randomUnicodeString(random(), 4 * 1024);
  testNGrams(minGram, maxGram, s, "");
  testNGrams(minGram, maxGram, s, "abcdef");
}
 
Example 14
Source File: TestSimplePatternTokenizer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOneToken() throws Exception {
  Tokenizer t = new SimplePatternTokenizer(".*");
  CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
  String s;
  while (true) {
    s = TestUtil.randomUnicodeString(random());
    if (s.length() > 0) {
      break;
    }
  }
  t.setReader(new StringReader(s));
  t.reset();
  assertTrue(t.incrementToken());
  assertEquals(s, termAtt.toString());
}
 
Example 15
Source File: TestExtendedMode.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** random test ensuring we don't ever split supplementaries */
public void testSurrogates2() throws IOException {
  int numIterations = atLeast(500);
  for (int i = 0; i < numIterations; i++) {
    String s = TestUtil.randomUnicodeString(random(), 100);
    try (TokenStream ts = analyzer.tokenStream("foo", s)) {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        assertTrue(UnicodeUtil.validUTF16String(termAtt));
      }
      ts.end();
    }
  }
}
 
Example 16
Source File: TestUTF32ToUTF8.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void assertAutomaton(Automaton automaton) throws Exception {
  CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
  ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
  final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
  
  int num = atLeast(1000);
  for (int i = 0; i < num; i++) {
    final String string;
    if (random().nextBoolean()) {
      // likely not accepted
      string = TestUtil.randomUnicodeString(random());
    } else {
      // will be accepted
      int[] codepoints = ras.getRandomAcceptedString(random());
      try {
        string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
      } catch (Exception e) {
        System.out.println(codepoints.length + " codepoints:");
        for(int j=0;j<codepoints.length;j++) {
          System.out.println("  " + Integer.toHexString(codepoints[j]));
        }
        throw e;
      }
    }
    byte bytes[] = string.getBytes(StandardCharsets.UTF_8);
    assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
  }
}
 
Example 17
Source File: TestMemoryIndexAgainstDirectory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * half of the time, returns a random term from TEST_TERMS.
 * the other half of the time, returns a random unicode string.
 */
private String randomTerm() {
  if (random().nextBoolean()) {
    // return a random TEST_TERM
    return TEST_TERMS[random().nextInt(TEST_TERMS.length)];
  } else {
    // return a random unicode term
    return TestUtil.randomUnicodeString(random());
  }
}
 
Example 18
Source File: EdgeNGramTokenFilterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSupplementaryCharacters() throws IOException {
  for (int i = 0; i < 20; i++) {
    final String s = TestUtil.randomUnicodeString(random(), 10);
    final int codePointCount = s.codePointCount(0, s.length());
    final int minGram = TestUtil.nextInt(random(), 1, 3);
    final int maxGram = TestUtil.nextInt(random(), minGram, 10);
    final boolean preserveOriginal = TestUtil.nextInt(random(), 0, 1) % 2 == 0;

    TokenStream tk = new KeywordTokenizer();
    ((Tokenizer)tk).setReader(new StringReader(s));
    tk = new EdgeNGramTokenFilter(tk, minGram, maxGram, preserveOriginal);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();

    if (codePointCount < minGram && preserveOriginal) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      assertEquals(s, termAtt.toString());
    }

    for (int j = minGram; j <= Math.min(codePointCount, maxGram); j++) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, j);
      assertEquals(s.substring(0, end), termAtt.toString());
    }

    if (codePointCount > maxGram && preserveOriginal) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      assertEquals(s, termAtt.toString());
    }

    assertFalse(tk.incrementToken());
    tk.close();
  }
}
 
Example 19
Source File: TestMultiDocValues.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBinary() throws Exception {
  Directory dir = newDirectory();
  Document doc = new Document();
  Field field = new BinaryDocValuesField("bytes", new BytesRef());
  doc.add(field);
  
  IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);

  for (int i = 0; i < numDocs; i++) {
    BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
    field.setBytesValue(ref);
    iw.addDocument(doc);
    if (random().nextInt(17) == 0) {
      iw.commit();
    }
  }
  DirectoryReader ir = iw.getReader();
  iw.forceMerge(1);
  DirectoryReader ir2 = iw.getReader();
  LeafReader merged = getOnlyLeafReader(ir2);
  iw.close();

  BinaryDocValues multi = MultiDocValues.getBinaryValues(ir, "bytes");
  BinaryDocValues single = merged.getBinaryDocValues("bytes");
  for (int i = 0; i < numDocs; i++) {
    assertEquals(i, multi.nextDoc());
    assertEquals(i, single.nextDoc());
    final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
    final BytesRef actual = multi.binaryValue();
    assertEquals(expected, actual);
  }
  testRandomAdvance(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"));
  testRandomAdvanceExact(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"), merged.maxDoc());

  ir.close();
  ir2.close();
  dir.close();
}
 
Example 20
Source File: TestMultiDocValues.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSorted() throws Exception {
  Directory dir = newDirectory();
  Document doc = new Document();
  Field field = new SortedDocValuesField("bytes", new BytesRef());
  doc.add(field);
  
  IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
  for (int i = 0; i < numDocs; i++) {
    BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
    field.setBytesValue(ref);
    if (random().nextInt(7) == 0) {
      iw.addDocument(new Document());
    }
    iw.addDocument(doc);
    if (random().nextInt(17) == 0) {
      iw.commit();
    }
  }
  DirectoryReader ir = iw.getReader();
  iw.forceMerge(1);
  DirectoryReader ir2 = iw.getReader();
  LeafReader merged = getOnlyLeafReader(ir2);
  iw.close();
  SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
  SortedDocValues single = merged.getSortedDocValues("bytes");
  assertEquals(single.getValueCount(), multi.getValueCount());
  while (true) {
    assertEquals(single.nextDoc(), multi.nextDoc());
    if (single.docID() == NO_MORE_DOCS) {
      break;
    }

    // check value
    final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
    final BytesRef actual = multi.binaryValue();
    assertEquals(expected, actual);

    // check ord
    assertEquals(single.ordValue(), multi.ordValue());
  }
  testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
  testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
  ir.close();
  ir2.close();
  dir.close();
}