org.apache.lucene.util.IntsRefBuilder Java Examples

The following examples show how to use org.apache.lucene.util.IntsRefBuilder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FiniteStringsIterator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param a Automaton to create finite string from.
 * @param startState The starting state for each path.
 * @param endState The state where each path should stop or -1 if only accepted states should be final.
 */
public FiniteStringsIterator(Automaton a, int startState, int endState) {
  this.a = a;
  this.endState = endState;
  this.nodes = new PathNode[16];
  for (int i = 0, end = nodes.length; i < end; i++) {
    nodes[i] = new PathNode();
  }
  this.string = new IntsRefBuilder();
  this.pathStates = new BitSet(a.getNumStates());
  this.string.setLength(0);
  this.emitEmptyString = a.isAccept(0);

  // Start iteration with node startState.
  if (a.getNumTransitions(startState) > 0) {
    pathStates.set(startState);
    nodes[0].resetState(a, startState);
    string.append(startState);
  }
}
 
Example #2
Source File: FstDecompounder.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Consume a maximal glue morpheme, if any, and consume the next word.
 */
private void matchGlueMorpheme(IntsRef utf32, final int offset, StringBuilder builder,
                               IntsRefBuilder maxPathsBuilder,
                               Deque<Chunk> chunks) throws IOException {
    FST.Arc<Object> arc = glueMorphemes.getFirstArc(new FST.Arc<>());
    BytesReader br = glueMorphemes.getBytesReader();
    for (int i = offset; i < utf32.length; i++) {
        int chr = utf32.ints[i];
        arc = glueMorphemes.findTargetArc(chr, arc, arc, br);
        if (arc == null) {
            break;
        }
        if (arc.isFinal()) {
            chunks.addLast(new Chunk(offset, i + 1, ChunkType.GLUE_MORPHEME));
            if (i + 1 < utf32.offset + utf32.length) {
                matchWord(utf32, i + 1, builder, maxPathsBuilder, chunks);
            }
            chunks.removeLast();
        }
    }
}
 
Example #3
Source File: TrieBuilder.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException {
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  long outputValue = 0;
  for (String mention : sortedStrings) {
    scratchBytes.copyChars(mention);
    try {
      builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++);
    } catch (java.lang.AssertionError ae) {
      logger.debug("Assertion error for mention " + mention);
    }
  }
  return builder.finish();
}
 
Example #4
Source File: FSTCompletionBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
      .shareMaxTailLength(shareMaxTailLength).build();

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }
  
  return count == 0 ? null : fstCompiler.compile();
}
 
Example #5
Source File: TestFSTs.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDuplicateFSAString() throws Exception {
  String str = "foobar";
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final FSTCompiler<Object> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  IntsRefBuilder ints = new IntsRefBuilder();
  for(int i=0; i<10; i++) {
    fstCompiler.add(Util.toIntsRef(new BytesRef(str), ints), outputs.getNoOutput());
  }
  FST<Object> fst = fstCompiler.compile();

  // count the input paths
  int count = 0;
  final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<>(fst);
  while(fstEnum.next()!=null) {
    count++;
  }
  assertEquals(1, count);

  assertNotNull(Util.get(fst, new BytesRef(str)));
  assertNull(Util.get(fst, new BytesRef("foobaz")));
}
 
Example #6
Source File: DatawaveFieldIndexListIteratorJexl.java    From datawave with Apache License 2.0 6 votes vote down vote up
public static FST<?> getFST(SortedSet<String> values) throws IOException {
    final IntsRefBuilder irBuilder = new IntsRefBuilder();
    // The builder options with defaults
    FST.INPUT_TYPE inputType = FST.INPUT_TYPE.BYTE1;
    int minSuffixCount1 = 0;
    int minSuffixCount2 = 0;
    boolean doShareSuffix = true;
    boolean doShareNonSingletonNodes = true;
    int shareMaxTailLength = Integer.MAX_VALUE;
    
    boolean allowArrayArcs = true;
    int bytesPageBits = 15;
    final Outputs<Object> outputs = NoOutputs.getSingleton();
    
    // create the FST from the values
    org.apache.lucene.util.fst.Builder<Object> fstBuilder = new org.apache.lucene.util.fst.Builder<>(inputType, minSuffixCount1, minSuffixCount2,
                    doShareSuffix, doShareNonSingletonNodes, shareMaxTailLength, outputs, allowArrayArcs, bytesPageBits);
    
    for (String value : values) {
        Util.toUTF16(value, irBuilder);
        final IntsRef scratchInt = irBuilder.get();
        fstBuilder.add(scratchInt, outputs.getNoOutput());
    }
    return fstBuilder.finish();
}
 
Example #7
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFiniteStringsEatsStack() {
  char[] chars = new char[50000];
  TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
  String bigString1 = new String(chars);
  TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
  String bigString2 = new String(chars);
  Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertEquals(2, actual.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
  assertTrue(actual.contains(scratch.get()));
  Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
  assertTrue(actual.contains(scratch.get()));
}
 
Example #8
Source File: TokenInfoDictionaryTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPut() throws Exception {
  TokenInfoDictionary dict = newDictionary("名詞,1,1,2,名詞,一般,*,*,*,*,*,*,*",
                                             // "large" id
                                             "一般,5000,5000,3,名詞,一般,*,*,*,*,*,*,*");
  IntsRef wordIdRef = new IntsRefBuilder().get();

  dict.lookupWordIds(0, wordIdRef);
  int wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(5000, dict.getLeftId(wordId));
  assertEquals(5000, dict.getRightId(wordId));
  assertEquals(3, dict.getWordCost(wordId));

  dict.lookupWordIds(1, wordIdRef);
  wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(1, dict.getLeftId(wordId));
  assertEquals(1, dict.getRightId(wordId));
  assertEquals(2, dict.getWordCost(wordId));
}
 
Example #9
Source File: TokenInfoDictionaryTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPut() throws Exception {
  TokenInfoDictionary dict = newDictionary("명사,1,1,2,NNG,*,*,*,*,*,*,*",
      // "large" id
      "일반,5000,5000,3,NNG,*,*,*,*,*,*,*");
  IntsRef wordIdRef = new IntsRefBuilder().get();

  dict.lookupWordIds(0, wordIdRef);
  int wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(1, dict.getLeftId(wordId));
  assertEquals(1, dict.getRightId(wordId));
  assertEquals(2, dict.getWordCost(wordId));

  dict.lookupWordIds(1, wordIdRef);
  wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(5000, dict.getLeftId(wordId));
  assertEquals(5000, dict.getRightId(wordId));
  assertEquals(3, dict.getWordCost(wordId));
}
 
Example #10
Source File: TestFSTs.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testSimpleDepth() throws Exception {
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

  BytesRef ab = new BytesRef("ab");
  BytesRef ac = new BytesRef("ac");
  BytesRef bd = new BytesRef("bd");

  fstCompiler.add(Util.toIntsRef(ab, new IntsRefBuilder()), 3L);
  fstCompiler.add(Util.toIntsRef(ac, new IntsRefBuilder()), 5L);
  fstCompiler.add(Util.toIntsRef(bd, new IntsRefBuilder()), 7L);

  FST<Long> fst = fstCompiler.compile();

  assertEquals(3, (long) Util.get(fst, ab));
  assertEquals(5, (long) Util.get(fst, ac));
  assertEquals(7, (long) Util.get(fst, bd));
}
 
Example #11
Source File: NormalizeCharMap.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      fstCompiler.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = fstCompiler.compile();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
 
Example #12
Source File: Dictionary.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();
  
  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }
  
  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    fstCompiler.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }
  
  return fstCompiler.compile();
}
 
Example #13
Source File: StemmerOverrideFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort();
  IntsRefBuilder intsSpare = new IntsRefBuilder();
  final int size = hash.size();
  BytesRef spare = new BytesRef();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    intsSpare.copyUTF8Bytes(bytesRef);
    fstCompiler.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(fstCompiler.compile(), ignoreCase);
}
 
Example #14
Source File: TestFSTs.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testLargeOutputsOnArrayArcs() throws Exception {
  final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  final FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);

  final byte[] bytes = new byte[300];
  final IntsRefBuilder input = new IntsRefBuilder();
  input.append(0);
  final BytesRef output = new BytesRef(bytes);
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0, arc);
    output.bytes[0] = (byte) arc;
    fstCompiler.add(input.get(), BytesRef.deepCopyOf(output));
  }

  final FST<BytesRef> fst = fstCompiler.compile();
  for(int arc=0;arc<6;arc++) {
    input.setIntAt(0,  arc);
    final BytesRef result = Util.get(fst, input.get());
    assertNotNull(result);
    assertEquals(300, result.length);
    assertEquals(result.bytes[result.offset], arc);
    for(int byteIDX=1;byteIDX<result.length;byteIDX++) {
      assertEquals(0, result.bytes[result.offset+byteIDX]);
    }
  }
}
 
Example #15
Source File: Util.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Adds all leaving arcs, including 'finished' arc, if
 *  the node is final, from this node into the queue.  */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input,
                          float boost, CharSequence context, int payload) throws IOException {

  // De-dup NO_OUTPUT since it must be a singleton:
  if (startOutput.equals(fst.outputs.getNoOutput())) {
    startOutput = fst.outputs.getNoOutput();
  }

  FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context, payload);
  fst.readFirstTargetArc(node, path.arc, bytesReader);

  // Bootstrap: find the min starting arc
  while (true) {
    if (allowEmptyString || path.arc.label() != FST.END_LABEL) {
      addIfCompetitive(path);
    }
    if (path.arc.isLast()) {
      break;
    }
    fst.readNextArc(path.arc, bytesReader);
  }
}
 
Example #16
Source File: TestFSTs.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInternalFinalState() throws Exception {
  final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  fstCompiler.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
  fstCompiler.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
  final FST<Long> fst = fstCompiler.compile();
  StringWriter w = new StringWriter();
  //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
  Util.toDot(fst, w, false, false);
  w.close();
  //System.out.println(w.toString());

  // check for accept state at label t
  assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
  // check for accept state at label n
  assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
}
 
Example #17
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Basic test for getFiniteStrings
 */
public void testFiniteStringsBasic() {
  Automaton a = Operations.union(Automata.makeString("dog"), Automata.makeString("duck"));
  a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertFiniteStringsRecursive(a, actual);
  assertEquals(2, actual.size());
  IntsRefBuilder dog = new IntsRefBuilder();
  Util.toIntsRef(new BytesRef("dog"), dog);
  assertTrue(actual.contains(dog.get()));
  IntsRefBuilder duck = new IntsRefBuilder();
  Util.toIntsRef(new BytesRef("duck"), duck);
  assertTrue(actual.contains(duck.get()));
}
 
Example #18
Source File: TestAutomaton.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void assertMatches(Automaton a, String... strings) {
  Set<IntsRef> expected = new HashSet<>();
  for(String s : strings) {
    IntsRefBuilder ints = new IntsRefBuilder();
    expected.add(Util.toUTF32(s, ints));
  }

  assertEquals(expected, TestOperations.getFiniteStrings(
      Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES)));
}
 
Example #19
Source File: TestAutomaton.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMakeBinaryIntervalRandom() throws Exception {
  int iters = atLeast(100);
  for(int iter=0;iter<iters;iter++) {
    BytesRef minTerm = TestUtil.randomBinaryTerm(random());
    boolean minInclusive = random().nextBoolean();
    BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
    boolean maxInclusive = random().nextBoolean();

    Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);

    for(int iter2=0;iter2<500;iter2++) {
      BytesRef term = TestUtil.randomBinaryTerm(random());
      int minCmp = minTerm.compareTo(term);
      int maxCmp = maxTerm.compareTo(term);

      boolean expected;
      if (minCmp > 0 || maxCmp < 0) {
        expected = false;
      } else if (minCmp == 0 && maxCmp == 0) {
        expected = minInclusive && maxInclusive;
      } else if (minCmp == 0) {
        expected = minInclusive;
      } else if (maxCmp == 0) {
        expected = maxInclusive;
      } else {
        expected = true;
      }

      if (VERBOSE) {
        System.out.println("  check term=" + term + " expected=" + expected);
      }
      IntsRefBuilder intsBuilder = new IntsRefBuilder();
      Util.toIntsRef(term, intsBuilder);
      assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
    }
  }
}
 
Example #20
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleString() {
  Automaton a = new Automaton();
  int start = a.createState();
  int end = a.createState();
  a.setAccept(end, true);
  a.addTransition(start, end, 'a', 'a');
  a.finishState();
  Set<IntsRef> accepted = TestOperations.getFiniteStrings(a);
  assertEquals(1, accepted.size());
  IntsRefBuilder intsRef = new IntsRefBuilder();
  intsRef.append('a');
  assertTrue(accepted.contains(intsRef.toIntsRef()));
}
 
Example #21
Source File: FiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingletonNoLimit() {
  Automaton a = Automata.makeString("foobar");
  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertEquals(1, actual.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
  assertTrue(actual.contains(scratch.get()));
}
 
Example #22
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Just maps each UTF16 unit (char) to the ints in an
 *  IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) {
  final int charLimit = s.length();
  scratch.setLength(charLimit);
  scratch.grow(charLimit);
  for (int idx = 0; idx < charLimit; idx++) {
    scratch.setIntAt(idx, (int) s.charAt(idx));
  }
  return scratch.get();
}
 
Example #23
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Starting from node, find the top N min cost 
 *  completions to a final node. */
public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN,
                                               boolean allowEmptyString) throws IOException {

  // All paths are kept, so we can pass topN for
  // maxQueueDepth and the pruning is admissible:
  TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator);

  // since this search is initialized with a single start node 
  // it is okay to start with an empty input path here
  searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder());
  return searcher.search();
}
 
Example #24
Source File: TestUTF32ToUTF8.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleton() throws Exception {
  int iters = atLeast(100);
  for(int iter=0;iter<iters;iter++) {
    String s = TestUtil.randomRealisticUnicodeString(random());
    Automaton a = Automata.makeString(s);
    Automaton utf8 = new UTF32ToUTF8().convert(a);
    IntsRefBuilder ints = new IntsRefBuilder();
    Util.toIntsRef(new BytesRef(s), ints);
    Set<IntsRef> set = new HashSet<>();
    set.add(ints.get());
    assertEquals(set, TestOperations.getFiniteStrings(utf8));
  }
}
 
Example #25
Source File: TestFSTs.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFinalOutputOnEndState() throws Exception {
  final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();

  final FSTCompiler<Long> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).minSuffixCount1(2).build();
  fstCompiler.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L);
  fstCompiler.add(Util.toUTF32("station", new IntsRefBuilder()), 10L);
  final FST<Long> fst = fstCompiler.compile();
  //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
  StringWriter w = new StringWriter();
  Util.toDot(fst, w, false, false);
  w.close();
  //System.out.println(w.toString());
  assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
}
 
Example #26
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
  this.arc = new FST.Arc<T>().copyFrom(arc);
  this.output = output;
  this.input = input;
  this.boost = boost;
  this.context = context;
  this.payload = payload;
}
 
Example #27
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Reverse lookup (lookup by output instead of by input),
 *  in the special case when your FSTs outputs are
 *  strictly ascending.  This locates the input/output
 *  pair where the output is equal to the target, and will
 *  return null if that output does not exist.
 *
 *  <p>NOTE: this only works with {@code FST<Long>}, only
 *  works when the outputs are ascending in order with
 *  the inputs.
 *  For example, simple ordinals (0, 1,
 *  2, ...), or file offsets (when appending to a file)
 *  fit this. */
@Deprecated
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {

  final BytesReader in = fst.getBytesReader();

  // TODO: would be nice not to alloc this on every lookup
  FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
  
  FST.Arc<Long> scratchArc = new FST.Arc<>();

  final IntsRefBuilder result = new IntsRefBuilder();
  return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
}
 
Example #28
Source File: LimitedFiniteStringsIteratorTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleton() {
  Automaton a = Automata.makeString("foobar");
  List<IntsRef> actual = getFiniteStrings(new LimitedFiniteStringsIterator(a, 1));
  assertEquals(1, actual.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
  assertTrue(actual.contains(scratch.get()));
}
 
Example #29
Source File: Operations.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** If this automaton accepts a single input, return it.  Else, return null.
 *  The automaton must be deterministic. */
public static IntsRef getSingleton(Automaton a) {
  if (a.isDeterministic() == false) {
    throw new IllegalArgumentException("input automaton must be deterministic");
  }
  IntsRefBuilder builder = new IntsRefBuilder();
  HashSet<Integer> visited = new HashSet<>();
  int s = 0;
  Transition t = new Transition();
  while (true) {
    visited.add(s);
    if (a.isAccept(s) == false) {
      if (a.getNumTransitions(s) == 1) {
        a.getTransition(s, 0, t);
        if (t.min == t.max && !visited.contains(t.dest)) {
          builder.append(t.min);
          s = t.dest;
          continue;
        }
      }
    } else if (a.getNumTransitions(s) == 0) {
      return builder.get();
    }

    // Automaton accepts more than one string:
    return null;
  }
}
 
Example #30
Source File: Util.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Decodes the Unicode codepoints from the provided
 *  char[] and places them in the provided scratch
 *  IntsRef, which must not be null, returning it. */
public static IntsRef toUTF32(char[] s, int offset, int length, IntsRefBuilder scratch) {
  int charIdx = offset;
  int intIdx = 0;
  final int charLimit = offset + length;
  while(charIdx < charLimit) {
    scratch.grow(intIdx+1);
    final int utf32 = Character.codePointAt(s, charIdx, charLimit);
    scratch.setIntAt(intIdx, utf32);
    charIdx += Character.charCount(utf32);
    intIdx++;
  }
  scratch.setLength(intIdx);
  return scratch.get();
}