org.apache.lucene.util.fst.Outputs Java Examples

The following examples show how to use org.apache.lucene.util.fst.Outputs. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DatawaveFieldIndexListIteratorJexl.java    From datawave with Apache License 2.0 6 votes vote down vote up
public static FST<?> getFST(SortedSet<String> values) throws IOException {
    final IntsRefBuilder irBuilder = new IntsRefBuilder();
    // The builder options with defaults
    FST.INPUT_TYPE inputType = FST.INPUT_TYPE.BYTE1;
    int minSuffixCount1 = 0;
    int minSuffixCount2 = 0;
    boolean doShareSuffix = true;
    boolean doShareNonSingletonNodes = true;
    int shareMaxTailLength = Integer.MAX_VALUE;
    
    boolean allowArrayArcs = true;
    int bytesPageBits = 15;
    final Outputs<Object> outputs = NoOutputs.getSingleton();
    
    // create the FST from the values
    org.apache.lucene.util.fst.Builder<Object> fstBuilder = new org.apache.lucene.util.fst.Builder<>(inputType, minSuffixCount1, minSuffixCount2,
                    doShareSuffix, doShareNonSingletonNodes, shareMaxTailLength, outputs, allowArrayArcs, bytesPageBits);
    
    for (String value : values) {
        Util.toUTF16(value, irBuilder);
        final IntsRef scratchInt = irBuilder.get();
        fstBuilder.add(scratchInt, outputs.getNoOutput());
    }
    return fstBuilder.finish();
}
 
Example #2
Source File: NormalizeCharMap.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      fstCompiler.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = fstCompiler.compile();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
 
Example #3
Source File: Dictionary.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
  Map<String,String> mappings = new TreeMap<>();
  
  for (int i = 0; i < num; i++) {
    String line = reader.readLine();
    String parts[] = line.split("\\s+");
    if (parts.length != 3) {
      throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
    }
    if (mappings.put(parts[1], parts[2]) != null) {
      throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
    }
  }
  
  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  for (Map.Entry<String,String> entry : mappings.entrySet()) {
    Util.toUTF16(entry.getKey(), scratchInts);
    fstCompiler.add(scratchInts.get(), new CharsRef(entry.getValue()));
  }
  
  return fstCompiler.compile();
}
 
Example #4
Source File: TestDictionary.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testReplacements() throws Exception {
  Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
  FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  
  // a -> b
  Util.toUTF16("a", scratchInts);
  fstCompiler.add(scratchInts.get(), new CharsRef("b"));
  
  // ab -> c
  Util.toUTF16("ab", scratchInts);
  fstCompiler.add(scratchInts.get(), new CharsRef("c"));
  
  // c -> de
  Util.toUTF16("c", scratchInts);
  fstCompiler.add(scratchInts.get(), new CharsRef("de"));
  
  // def -> gh
  Util.toUTF16("def", scratchInts);
  fstCompiler.add(scratchInts.get(), new CharsRef("gh"));
  
  FST<CharsRef> fst = fstCompiler.compile();
  
  StringBuilder sb = new StringBuilder("atestanother");
  Dictionary.applyMappings(fst, sb);
  assertEquals("btestbnother", sb.toString());
  
  sb = new StringBuilder("abtestanother");
  Dictionary.applyMappings(fst, sb);
  assertEquals("ctestbnother", sb.toString());
  
  sb = new StringBuilder("atestabnother");
  Dictionary.applyMappings(fst, sb);
  assertEquals("btestcnother", sb.toString());
  
  sb = new StringBuilder("abtestabnother");
  Dictionary.applyMappings(fst, sb);
  assertEquals("ctestcnother", sb.toString());
  
  sb = new StringBuilder("abtestabcnother");
  Dictionary.applyMappings(fst, sb);
  assertEquals("ctestcdenother", sb.toString());
  
  sb = new StringBuilder("defdefdefc");
  Dictionary.applyMappings(fst, sb);
  assertEquals("ghghghde", sb.toString());
}