org.apache.lucene.util.fst.ByteSequenceOutputs Java Examples

The following examples show how to use org.apache.lucene.util.fst.ByteSequenceOutputs. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NRTSuggester.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Loads a {@link NRTSuggester} from {@link org.apache.lucene.store.IndexInput} on or off-heap
 * depending on the provided <code>fstLoadMode</code>
 */
public static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException {
  final FST<Pair<Long, BytesRef>> fst;
  if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
    OffHeapFSTStore store = new OffHeapFSTStore();
    IndexInput clone = input.clone();
    clone.seek(input.getFilePointer());
    fst = new FST<>(clone, clone, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()), store);
    input.seek(clone.getFilePointer() + store.size());
  } else {
    fst = new FST<>(input, input, new PairOutputs<>(
        PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  }

  /* read some meta info */
  int maxAnalyzedPathsPerOutput = input.readVInt();
  /*
   * Label used to denote the end of an input in the FST and
   * the beginning of dedup bytes
   */
  int endByte = input.readVInt();
  int payloadSep = input.readVInt();
  return new NRTSuggester(fst, maxAnalyzedPathsPerOutput, payloadSep);
}
 
Example #2
Source File: StemmerOverrideFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort();
  IntsRefBuilder intsSpare = new IntsRefBuilder();
  final int size = hash.size();
  BytesRef spare = new BytesRef();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    intsSpare.copyUTF8Bytes(bytesRef);
    fstCompiler.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(fstCompiler.compile(), ignoreCase);
}
 
Example #3
Source File: XAnalyzingSuggester.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public boolean load(InputStream input) throws IOException {
  DataInput dataIn = new InputStreamDataInput(input);
  try {
    this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
    maxAnalyzedPathsForOneInput = dataIn.readVInt();
    hasPayloads = dataIn.readByte() == 1;
  } finally {
    IOUtils.close(input);
  }
  return true;
}
 
Example #4
Source File: XAnalyzingSuggester.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
 
Example #5
Source File: XAnalyzingSuggester.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
    this.payloadSep = payloadSep;
    this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
    this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
    this.hasPayloads = hasPayloads;
    surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];

}
 
Example #6
Source File: AnalyzingSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public boolean load(DataInput input) throws IOException {
  count = input.readVLong();
  this.fst = new FST<>(input, input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
  maxAnalyzedPathsForOneInput = input.readVInt();
  hasPayloads = input.readByte() == 1;
  return true;
}
 
Example #7
Source File: NRTSuggesterBuilder.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Create a builder for {@link NRTSuggester}
 */
public NRTSuggesterBuilder() {
  this.payloadSep = PAYLOAD_SEP;
  this.endByte = END_BYTE;
  this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
  this.entries = new PriorityQueue<>();
  this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
}
 
Example #8
Source File: SynonymMap.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Builds an {@link SynonymMap} and returns it.
 */
public SynonymMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  // TODO: are we using the best sharing options?
  FSTCompiler<BytesRef> fstCompiler =
    new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs);
  
  BytesRefBuilder scratch = new BytesRefBuilder();
  ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();

  final Set<Integer> dedupSet;

  if (dedup) {
    dedupSet = new HashSet<>();
  } else {
    dedupSet = null;
  }

  final byte[] spare = new byte[5];
  
  Set<CharsRef> keys = workingSet.keySet();
  CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
  Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());

  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  
  //System.out.println("fmap.build");
  for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
    CharsRef input = sortedKeys[keyIdx];
    MapEntry output = workingSet.get(input);

    int numEntries = output.ords.size();
    // output size, assume the worst case
    int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry
    
    scratch.grow(estimatedSize);
    scratchOutput.reset(scratch.bytes());

    // now write our output data:
    int count = 0;
    for (int i = 0; i < numEntries; i++) {
      if (dedupSet != null) {
        // box once
        final Integer ent = output.ords.get(i);
        if (dedupSet.contains(ent)) {
          continue;
        }
        dedupSet.add(ent);
      }
      scratchOutput.writeVInt(output.ords.get(i));   
      count++;
    }

    final int pos = scratchOutput.getPosition();
    scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
    final int pos2 = scratchOutput.getPosition();
    final int vIntLen = pos2-pos;

    // Move the count + includeOrig to the front of the byte[]:
    System.arraycopy(scratch.bytes(), pos, spare, 0, vIntLen);
    System.arraycopy(scratch.bytes(), 0, scratch.bytes(), vIntLen, pos);
    System.arraycopy(spare, 0, scratch.bytes(), 0, vIntLen);

    if (dedupSet != null) {
      dedupSet.clear();
    }
    
    scratch.setLength(scratchOutput.getPosition());
    //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
    fstCompiler.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
  }
  
  FST<BytesRef> fst = fstCompiler.compile();
  return new SynonymMap(fst, words, maxHorizontalContext);
}