Java Code Examples for org.apache.lucene.util.fst.NoOutputs#getSingleton()

The following examples show how to use org.apache.lucene.util.fst.NoOutputs#getSingleton() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
public static FST<?> getFST(SortedSet<String> values) throws IOException {
    final IntsRefBuilder irBuilder = new IntsRefBuilder();
    // The builder options with defaults
    FST.INPUT_TYPE inputType = FST.INPUT_TYPE.BYTE1;
    int minSuffixCount1 = 0;
    int minSuffixCount2 = 0;
    boolean doShareSuffix = true;
    boolean doShareNonSingletonNodes = true;
    int shareMaxTailLength = Integer.MAX_VALUE;
    
    boolean allowArrayArcs = true;
    int bytesPageBits = 15;
    final Outputs<Object> outputs = NoOutputs.getSingleton();
    
    // create the FST from the values
    org.apache.lucene.util.fst.Builder<Object> fstBuilder = new org.apache.lucene.util.fst.Builder<>(inputType, minSuffixCount1, minSuffixCount2,
                    doShareSuffix, doShareNonSingletonNodes, shareMaxTailLength, outputs, allowArrayArcs, bytesPageBits);
    
    for (String value : values) {
        Util.toUTF16(value, irBuilder);
        final IntsRef scratchInt = irBuilder.get();
        fstBuilder.add(scratchInt, outputs.getNoOutput());
    }
    return fstBuilder.finish();
}
 
Example 2
Source Project: lucene-solr   File: FSTCompletionLookup.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.higherWeightsCompletion = new FSTCompletion(new FST<>(
      input, input, NoOutputs.getSingleton()));
  this.normalCompletion = new FSTCompletion(
      higherWeightsCompletion.getFST(), false, exactMatchFirst);
  return true;
}
 
Example 3
public FstDecompounder(InputStream inputStream, List<String> glue) throws IOException {
    try {
        this.surfaceForms = new FST<>(new InputStreamDataInput(inputStream), NoOutputs.getSingleton());
        // set up glue morphemes
        this.glueMorphemes = createGlueMorphemes(glue != null && glue.size() > 0 ? glue :morphemes);
    } finally {
        inputStream.close();
    }
}
 
Example 4
private FST<Object> createGlueMorphemes(List<String> glue) throws IOException {
    for (int i = 0; i < glue.size(); i++) {
        glue.set(i, new StringBuilder(glue.get(i)).reverse().toString());
    }
    Collections.sort(glue);
    final Builder<Object> builder = new Builder<>(INPUT_TYPE.BYTE4, NoOutputs.getSingleton());
    final Object nothing = NoOutputs.getSingleton().getNoOutput();
    IntsRefBuilder intsBuilder = new IntsRefBuilder();
    for (String morpheme : glue) {
        fromUTF16ToUTF32(morpheme, intsBuilder);
        builder.add(intsBuilder.get(), nothing);
    }
    return builder.finish();
}
 
Example 5
/**
 *
 * @param inputStream the input stream
 * @param outputStream the output stream
 * @throws IOException if compilation fails
 */
public void compile(InputStream inputStream, OutputStream outputStream) throws IOException {
    final HashSet<BytesRef> words = new HashSet<>();
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
    String line;
    String last = null;
    StringBuilder stringBuilder = new StringBuilder();
    while ((line = reader.readLine()) != null) {
        if (line.indexOf('#') >= 0) {
            continue;
        }
        line = pattern.split(line)[0].trim();
        line = line.toLowerCase(Locale.ROOT);
        if (line.equals(last)) {
            continue;
        }
        last = line;
        /*
         * Add the word to the hash set in left-to-right characters order and reversed
         * for easier matching later on.
         */
        stringBuilder.setLength(0);
        stringBuilder.append(line);
        final int len = stringBuilder.length();
        stringBuilder.append('>');
        words.add(new BytesRef(stringBuilder));
        stringBuilder.setLength(len);
        stringBuilder.reverse().append('<');
        words.add(new BytesRef(stringBuilder));
    }
    reader.close();
    final BytesRef [] all = new BytesRef[words.size()];
    words.toArray(all);
    Arrays.sort(all, BytesRef::compareTo);
    final Object nothing = NoOutputs.getSingleton().getNoOutput();
    final Builder<Object> builder = new Builder<>(INPUT_TYPE.BYTE4, NoOutputs.getSingleton());
    final IntsRefBuilder intsRef = new IntsRefBuilder();
    for (BytesRef bytesRef : all) {
        intsRef.clear();
        intsRef.copyUTF8Bytes(bytesRef);
        builder.add(intsRef.get(), nothing);
    }
    final FST<Object> fst = builder.finish();
    try (OutputStreamDataOutput out = new OutputStreamDataOutput(outputStream)) {
        fst.save(out);
    }
}