Java Code Examples for org.apache.lucene.util.fst.Util#toUTF32()

The following examples show how to use org.apache.lucene.util.fst.Util#toUTF32() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFiniteStringsEatsStack() {
  char[] chars = new char[50000];
  TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
  String bigString1 = new String(chars);
  TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
  String bigString2 = new String(chars);
  Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertEquals(2, actual.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
  assertTrue(actual.contains(scratch.get()));
  Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
  assertTrue(actual.contains(scratch.get()));
}
 
Example 2
Source File: Dictionary.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  FSTCompiler<IntsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    fstCompiler.add(scratch.get(), output);
  }
  return fstCompiler.compile();
}
 
Example 3
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingletonNoLimit() {
  Automaton a = Automata.makeString("foobar");
  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertEquals(1, actual.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
  assertTrue(actual.contains(scratch.get()));
}
 
Example 4
Source File: LimitedFiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleton() {
  Automaton a = Automata.makeString("foobar");
  List<IntsRef> actual = getFiniteStrings(new LimitedFiniteStringsIterator(a, 1));
  assertEquals(1, actual.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
  assertTrue(actual.contains(scratch.get()));
}
 
Example 5
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testRandomFiniteStrings1() {
  int numStrings = atLeast(100);
  if (VERBOSE) {
    System.out.println("TEST: numStrings=" + numStrings);
  }

  Set<IntsRef> strings = new HashSet<>();
  List<Automaton> automata = new ArrayList<>();
  IntsRefBuilder scratch = new IntsRefBuilder();
  for(int i=0;i<numStrings;i++) {
    String s = TestUtil.randomSimpleString(random(), 1, 200);
    Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
    if (strings.add(scratch.toIntsRef())) {
      automata.add(Automata.makeString(s));
      if (VERBOSE) {
        System.out.println("  add string=" + s);
      }
    }
  }

  // TODO: we could sometimes use
  // DaciukMihovAutomatonBuilder here

  // TODO: what other random things can we do here...
  Automaton a = Operations.union(automata);
  if (random().nextBoolean()) {
    a = MinimizationOperations.minimize(a, 1000000);
    if (VERBOSE) {
      System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
    }
  } else if (random().nextBoolean()) {
    if (VERBOSE) {
      System.out.println("TEST: a.determinize");
    }
    a = Operations.determinize(a, 1000000);
  } else if (random().nextBoolean()) {
    if (VERBOSE) {
      System.out.println("TEST: a.removeDeadStates");
    }
    a = Operations.removeDeadStates(a);
  }

  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertFiniteStringsRecursive(a, actual);

  if (!strings.equals(new HashSet<>(actual))) {
    System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size());
    List<IntsRef> x = new ArrayList<>(strings);
    Collections.sort(x);
    List<IntsRef> y = new ArrayList<>(actual);
    Collections.sort(y);
    int end = Math.min(x.size(), y.size());
    for(int i=0;i<end;i++) {
      System.out.println("  i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i)));
    }
    fail("wrong strings found");
  }
}