org.apache.lucene.util.fst.FST Java Examples
The following examples show how to use
org.apache.lucene.util.fst.FST.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TrieBuilder.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); long outputValue = 0; for (String mention : sortedStrings) { scratchBytes.copyChars(mention); try { builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++); } catch (java.lang.AssertionError ae) { logger.debug("Assertion error for mention " + mention); } } return builder.finish(); }
Example #2
Source File: NormalizeCharMap.java From lucene-solr with Apache License 2.0 | 6 votes |
private NormalizeCharMap(FST<CharsRef> map) { this.map = map; if (map != null) { try { // Pre-cache root arcs: final FST.Arc<CharsRef> scratchArc = new FST.Arc<>(); final FST.BytesReader fstReader = map.getBytesReader(); map.getFirstArc(scratchArc); if (FST.targetHasArcs(scratchArc)) { map.readFirstRealTargetArc(scratchArc.target(), scratchArc, fstReader); while(true) { assert scratchArc.label() != FST.END_LABEL; cachedRootArcs.put(Character.valueOf((char) scratchArc.label()), new FST.Arc<CharsRef>().copyFrom(scratchArc)); if (scratchArc.isLast()) { break; } map.readNextRealArc(scratchArc, fstReader); } } //System.out.println("cached " + cachedRootArcs.size() + " root arcs"); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } } }
Example #3
Source File: DynamicSynonymFilter.java From elasticsearch-analysis-dynamic-synonym with Apache License 2.0 | 6 votes |
/** * 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug * * @param synonymMap */ @Override public void update(SynonymMap synonymMap) { this.synonyms = synonymMap; this.fst = synonyms.fst; if(this.fst == null) { throw new IllegalArgumentException("fst must be non-null"); } else { this.fstReader = this.fst.getBytesReader(); this.rollBufferSize = 1 + synonyms.maxHorizontalContext; this.futureInputs = new DynamicSynonymFilter.PendingInput[this.rollBufferSize]; this.futureOutputs = new DynamicSynonymFilter.PendingOutputs[this.rollBufferSize]; for(int pos = 0; pos < this.rollBufferSize; ++pos) { this.futureInputs[pos] = new DynamicSynonymFilter.PendingInput(); this.futureOutputs[pos] = new DynamicSynonymFilter.PendingOutputs(); } this.scratchArc = new FST.Arc(); } }
Example #4
Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
static<T> void walk(FST<T> fst) throws IOException { final ArrayList<FST.Arc<T>> queue = new ArrayList<>(); final BitSet seen = new BitSet(); final FST.BytesReader reader = fst.getBytesReader(); final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>()); queue.add(startArc); while (!queue.isEmpty()) { final FST.Arc<T> arc = queue.remove(0); final long node = arc.target(); //System.out.println(arc); if (FST.targetHasArcs(arc) && !seen.get((int) node)) { seen.set((int) node); fst.readFirstRealTargetArc(node, arc, reader); while (true) { queue.add(new FST.Arc<T>().copyFrom(arc)); if (arc.isLast()) { break; } else { fst.readNextRealArc(arc, reader); } } } } }
Example #5
Source File: OrdsSegmentTermsEnum.java From lucene-solr with Apache License 2.0 | 6 votes |
OrdsSegmentTermsEnumFrame pushFrame(FST.Arc<Output> arc, Output frameData, int length) throws IOException { scratchReader.reset(frameData.bytes.bytes, frameData.bytes.offset, frameData.bytes.length); final long code = scratchReader.readVLong(); final long fpSeek = code >>> OrdsBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS; // System.out.println(" fpSeek=" + fpSeek); final OrdsSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord); f.hasTerms = (code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0; f.hasTermsOrig = f.hasTerms; f.isFloor = (code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0; // Must setFloorData before pushFrame in case pushFrame tries to rewind: if (f.isFloor) { f.termOrdOrig = frameData.startOrd; f.setFloorData(scratchReader, frameData.bytes); } pushFrame(arc, fpSeek, length, frameData.startOrd); return f; }
Example #6
Source File: FuzzySuggester.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
Example #7
Source File: NormalizeCharMap.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { fstCompiler.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = fstCompiler.compile(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); }
Example #8
Source File: FstDecompounder.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
/** * Consume a maximal glue morpheme, if any, and consume the next word. */ private void matchGlueMorpheme(IntsRef utf32, final int offset, StringBuilder builder, IntsRefBuilder maxPathsBuilder, Deque<Chunk> chunks) throws IOException { FST.Arc<Object> arc = glueMorphemes.getFirstArc(new FST.Arc<>()); BytesReader br = glueMorphemes.getBytesReader(); for (int i = offset; i < utf32.length; i++) { int chr = utf32.ints[i]; arc = glueMorphemes.findTargetArc(chr, arc, arc, br); if (arc == null) { break; } if (arc.isFinal()) { chunks.addLast(new Chunk(offset, i + 1, ChunkType.GLUE_MORPHEME)); if (i + 1 < utf32.offset + utf32.length) { matchWord(utf32, i + 1, builder, maxPathsBuilder, chunks); } chunks.removeLast(); } } }
Example #9
Source File: FSTCompletion.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Cache the root node's output arcs starting with completions with the * highest weights. */ @SuppressWarnings({"unchecked","rawtypes"}) private static Arc<Object>[] cacheRootArcs(FST<Object> automaton) { try { List<Arc<Object>> rootArcs = new ArrayList<>(); Arc<Object> arc = automaton.getFirstArc(new Arc<>()); FST.BytesReader fstReader = automaton.getBytesReader(); automaton.readFirstTargetArc(arc, arc, fstReader); while (true) { rootArcs.add(new Arc<>().copyFrom(arc)); if (arc.isLast()) break; automaton.readNextArc(arc, fstReader); } Collections.reverse(rootArcs); // we want highest weights first. return rootArcs.toArray(new Arc[rootArcs.size()]); } catch (IOException e) { throw new RuntimeException(e); } }
Example #10
Source File: StemmerOverrideFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @throws IOException if an {@link IOException} occurs; */ public StemmerOverrideMap build() throws IOException { ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>( FST.INPUT_TYPE.BYTE4, outputs); final int[] sort = hash.sort(); IntsRefBuilder intsSpare = new IntsRefBuilder(); final int size = hash.size(); BytesRef spare = new BytesRef(); for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.get(id, spare); intsSpare.copyUTF8Bytes(bytesRef); fstCompiler.add(intsSpare.get(), new BytesRef(outputValues.get(id))); } return new StemmerOverrideMap(fstCompiler.compile(), ignoreCase); }
Example #11
Source File: IDVersionSegmentTermsEnum.java From lucene-solr with Apache License 2.0 | 6 votes |
IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc<Pair<BytesRef,Long>> arc, Pair<BytesRef,Long> frameData, int length) throws IOException { scratchReader.reset(frameData.output1.bytes, frameData.output1.offset, frameData.output1.length); final long code = scratchReader.readVLong(); final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS; final IDVersionSegmentTermsEnumFrame f = getFrame(1+currentFrame.ord); f.maxIDVersion = Long.MAX_VALUE - frameData.output2; f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0; f.hasTermsOrig = f.hasTerms; f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0; if (f.isFloor) { f.setFloorData(scratchReader, frameData.output1); } pushFrame(arc, fpSeek, length); return f; }
Example #12
Source File: UserDictionary.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Lookup words in text * @param chars text * @param off offset into text * @param len length of text * @return array of wordId */ public List<Integer> lookup(char[] chars, int off, int len) throws IOException { List<Integer> result = new ArrayList<>(); final FST.BytesReader fstReader = fst.getBytesReader(); FST.Arc<Long> arc = new FST.Arc<>(); int end = off + len; for (int startOffset = off; startOffset < end; startOffset++) { arc = fst.getFirstArc(arc); int output = 0; int remaining = end - startOffset; for (int i = 0; i < remaining; i++) { int ch = chars[startOffset+i]; if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) { break; // continue to next position } output += arc.output().intValue(); if (arc.isFinal()) { final int finalOutput = output + arc.nextFinalOutput().intValue(); result.add(finalOutput); } } } return result; }
Example #13
Source File: DatawaveArithmetic.java From datawave with Apache License 2.0 | 5 votes |
public static boolean matchesFst(Object object, FST fst) throws IOException { final IntsRefBuilder irBuilder = new IntsRefBuilder(); Util.toUTF16(object.toString(), irBuilder); final IntsRef ints = irBuilder.get(); synchronized (fst) { return Util.get(fst, ints) != null; } }
Example #14
Source File: PushdownLargeFieldedListsVisitor.java From datawave with Apache License 2.0 | 5 votes |
protected URI createFst(SortedSet<String> values) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { FST fst = DatawaveFieldIndexListIteratorJexl.getFST(values); // now serialize to our file system CompressionCodec codec = null; String extension = ""; if (config.getHdfsFileCompressionCodec() != null) { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); if (classLoader == null) { classLoader = this.getClass().getClassLoader(); } Class<? extends CompressionCodec> clazz = Class.forName(config.getHdfsFileCompressionCodec(), true, classLoader).asSubclass(CompressionCodec.class); codec = clazz.newInstance(); extension = codec.getDefaultExtension(); } int fstCount = config.getFstCount().incrementAndGet(); Path fstFile = new Path(fstHdfsUri, "PushdownLargeFileFst." + fstCount + ".fst" + extension); OutputStream fstFileOut = new BufferedOutputStream(fs.create(fstFile, false)); if (codec != null) { fstFileOut = codec.createOutputStream(fstFileOut); } OutputStreamDataOutput outStream = new OutputStreamDataOutput(fstFileOut); fst.save(outStream); outStream.close(); return fstFile.toUri(); }
Example #15
Source File: BlockTreeTermsWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public PendingBlock(BytesRef prefix, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<BytesRef>> subIndices) { super(false); this.prefix = prefix; this.fp = fp; this.hasTerms = hasTerms; this.isFloor = isFloor; this.floorLeadByte = floorLeadByte; this.subIndices = subIndices; }
Example #16
Source File: DatawaveFieldIndexListIteratorJexl.java From datawave with Apache License 2.0 | 5 votes |
public static synchronized FST<Object> get(Path fstfile, String compressedCodec, FileSystem fs) throws IOException { if (fstfile == null) throw new NullPointerException("input fst key was null"); FST<Object> fst = fstCache.get(fstfile); if (fst != null) { return fst; } // Attempt to load fst from HDFS fst = loadFSTFromFile(fstfile, compressedCodec, fs); fstCache.put(fstfile, fst); return fst; }
Example #17
Source File: FSTCursor.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public FSTCursor(FST<Long> fst, int characterStart) { this.fst = fst; arc = fst.getFirstArc(new FST.Arc<Long>()); output = fst.outputs.getNoOutput(); fstReader = fst.getBytesReader(); this.characterStart = characterStart; }
Example #18
Source File: TextSpotterTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Test public void shouldSpotOneWord() throws IOException { Set<String> sortedMentions = new TreeSet<>(Collections.singletonList("Германия".toLowerCase())); FST<Long> trie = TrieBuilder.buildTrie(sortedMentions); Set<Spot> spots = TextSpotter.spotTrieEntriesInTextIgnoreCase(trie, sentenceText, begins, ends, 1.0); assertEquals(1, spots.size()); }
Example #19
Source File: TextSpotterTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Test public void shouldSpotOneFuzzyWord() throws IOException { Set<String> sortedMentions = new TreeSet<>(Collections.singletonList("Германие".toLowerCase())); //87.5% match FST<Long> trie = TrieBuilder.buildTrie(sortedMentions); Set<Spot> spots = TextSpotter.spotTrieEntriesInTextIgnoreCase(trie, sentenceText, begins, ends, 0.8); assertEquals(1, spots.size()); Spot spot = spots.iterator().next(); assertEquals("Германия", sentenceText.substring(spot.getBegin(), spot.getEnd())); }
Example #20
Source File: TextSpotterTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Test public void shouldNotSpotOneFuzzyWord() throws IOException { Set<String> sortedMentions = new TreeSet<>(Collections.singletonList("Einstei1".toLowerCase())); //87.5% match FST<Long> trie = TrieBuilder.buildTrie(sortedMentions); Set<Spot> spots = TextSpotter.spotTrieEntriesInTextIgnoreCase(trie, sentenceText, begins, ends, 0.8); assertEquals(1, spots.size()); Spot spot = spots.iterator().next(); assertEquals("Einstein", sentenceText.substring(spot.getBegin(), spot.getEnd())); }
Example #21
Source File: BlockTreeTermsWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
private void append(FSTCompiler<BytesRef> fstCompiler, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException { final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex); BytesRefFSTEnum.InputOutput<BytesRef> indexEnt; while((indexEnt = subIndexEnum.next()) != null) { //if (DEBUG) { // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output); //} fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output); } }
Example #22
Source File: TextSpotterTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public void shouldSpotTwoFuzzyWords() throws IOException { Set<String> sortedMentions = new TreeSet<>(Collections.singletonList("Alber1 Einstei2".toLowerCase())); //76.47% match FST<Long> trie = TrieBuilder.buildTrie(sortedMentions); Set<Spot> spots = TextSpotter.spotTrieEntriesInTextIgnoreCase(trie, sentenceText, begins, ends, 0.90); assertEquals(1, spots.size()); Spot spot = spots.iterator().next(); assertEquals("Albert Einstein", sentenceText.substring(spot.getBegin(), spot.getEnd())); }
Example #23
Source File: TextSpotterTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Test public void shouldNotSpotGatessssWord() throws IOException { Set<String> sortedMentions = new TreeSet<>(Collections.singletonList("Gates".toLowerCase())); FST<Long> trie = TrieBuilder.buildTrie(sortedMentions); Set<Spot> spots = TextSpotter.spotTrieEntriesInTextIgnoreCase(trie, sentenceText2, begins, ends, 0.8); assertEquals(0, spots.size()); }
Example #24
Source File: TextSpotterTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Test public void shouldNotSpotShorterMerkeWord() throws IOException { Set<String> sortedMentions = new TreeSet<>(Collections.singletonList("Merkel".toLowerCase())); FST<Long> trie = TrieBuilder.buildTrie(sortedMentions); Set<Spot> spots = TextSpotter.spotTrieEntriesInTextIgnoreCase(trie, sentenceText, begins, ends, 0.9); assertEquals(0, spots.size()); }
Example #25
Source File: SegmentTermsEnum.java From lucene-solr with Apache License 2.0 | 5 votes |
SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException { scratchReader.reset(frameData.bytes, frameData.offset, frameData.length); final long code = scratchReader.readVLong(); final long fpSeek = code >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS; final SegmentTermsEnumFrame f = getFrame(1+currentFrame.ord); f.hasTerms = (code & BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS) != 0; f.hasTermsOrig = f.hasTerms; f.isFloor = (code & BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0; if (f.isFloor) { f.setFloorData(scratchReader, frameData); } pushFrame(arc, fpSeek, length); return f; }
Example #26
Source File: BooleanPerceptronClassifier.java From lucene-solr with Apache License 2.0 | 5 votes |
private void updateFST(SortedMap<String, Double> weights) throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String, Double> entry : weights.entrySet()) { scratchBytes.copyChars(entry.getKey()); fstCompiler.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry .getValue().longValue()); } fst = fstCompiler.compile(); }
Example #27
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** * Creates a new suggester. * * @param indexAnalyzer Analyzer that will be used for * analyzing suggestions while building the index. * @param queryAnalyzer Analyzer that will be used for * analyzing query text during lookup * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP} * @param maxSurfaceFormsPerAnalyzedForm Maximum number of * surface forms to keep for a single analyzed form. * When there are too many surface forms we discard the * lowest weighted ones. * @param maxGraphExpansions Maximum number of graph paths * to expand from the analyzed form. Set this to -1 for * no limit. */ public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) { // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput this.indexAnalyzer = indexAnalyzer; this.queryAnalyzer = queryAnalyzer; this.fst = fst; this.hasPayloads = hasPayloads; if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) { throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options); } this.exactFirst = (options & EXACT_FIRST) != 0; this.preserveSep = (options & PRESERVE_SEP) != 0; // FLORIAN EDIT: I added <code>queryPrefix</code> for context dependent suggestions this.queryPrefix = queryPrefix; // NOTE: this is just an implementation limitation; if // somehow this is a problem we could fix it by using // more than one byte to disambiguate ... but 256 seems // like it should be way more then enough. if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) { throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")"); } this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; if (maxGraphExpansions < 1 && maxGraphExpansions != -1) { throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")"); } this.maxGraphExpansions = maxGraphExpansions; this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput; this.preservePositionIncrements = preservePositionIncrements; this.sepLabel = sepLabel; this.payloadSep = payloadSep; this.endByte = endByte; this.holeCharacter = holeCharacter; }
Example #28
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public boolean load(InputStream input) throws IOException { DataInput dataIn = new InputStreamDataInput(input); try { this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = dataIn.readVInt(); hasPayloads = dataIn.readByte() == 1; } finally { IOUtils.close(input); } return true; }
Example #29
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public boolean load(DataInput input) throws IOException { count = input.readVLong(); this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = input.readVInt(); hasPayloads = input.readByte() == 1; return true; }
Example #30
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** Returns all completion paths to initialize the search. */ protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<Pair<Long,BytesRef>> fst) throws IOException { return prefixPaths; }