Java Code Examples for org.apache.lucene.util.IntsRefBuilder#append()
The following examples show how to use
org.apache.lucene.util.IntsRefBuilder#append() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFSTs.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLargeOutputsOnArrayArcs() throws Exception { final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); final FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); final byte[] bytes = new byte[300]; final IntsRefBuilder input = new IntsRefBuilder(); input.append(0); final BytesRef output = new BytesRef(bytes); for(int arc=0;arc<6;arc++) { input.setIntAt(0, arc); output.bytes[0] = (byte) arc; fstCompiler.add(input.get(), BytesRef.deepCopyOf(output)); } final FST<BytesRef> fst = fstCompiler.compile(); for(int arc=0;arc<6;arc++) { input.setIntAt(0, arc); final BytesRef result = Util.get(fst, input.get()); assertNotNull(result); assertEquals(300, result.length); assertEquals(result.bytes[result.offset], arc); for(int byteIDX=1;byteIDX<result.length;byteIDX++) { assertEquals(0, result.bytes[result.offset+byteIDX]); } } }
Example 2
Source File: AutomatonTestUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Returns the strings that can be produced from the given state, or * false if more than <code>limit</code> strings are found. * <code>limit</code><0 means "infinite". */ private static boolean getFiniteStrings(Automaton a, int s, HashSet<Integer> pathstates, HashSet<IntsRef> strings, IntsRefBuilder path, int limit) { pathstates.add(s); Transition t = new Transition(); int count = a.initTransition(s, t); for (int i=0;i<count;i++) { a.getNextTransition(t); if (pathstates.contains(t.dest)) { return false; } for (int n = t.min; n <= t.max; n++) { path.append(n); if (a.isAccept(t.dest)) { strings.add(path.toIntsRef()); if (limit >= 0 && strings.size() > limit) { return false; } } if (!getFiniteStrings(a, t.dest, pathstates, strings, path, limit)) { return false; } path.setLength(path.length() - 1); } } pathstates.remove(s); return true; }
Example 3
Source File: FSTTester.java From lucene-solr with Apache License 2.0 | 5 votes |
static IntsRef toIntsRefUTF32(String s, IntsRefBuilder ir) { final int charLength = s.length(); int charIdx = 0; int intIdx = 0; ir.clear(); while(charIdx < charLength) { ir.grow(intIdx+1); final int utf32 = s.codePointAt(charIdx); ir.append(utf32); charIdx += Character.charCount(utf32); intIdx++; } return ir.get(); }
Example 4
Source File: FSTTester.java From lucene-solr with Apache License 2.0 | 5 votes |
static IntsRef toIntsRef(BytesRef br, IntsRefBuilder ir) { ir.grow(br.length); ir.clear(); for(int i=0;i<br.length;i++) { ir.append(br.bytes[br.offset+i]&0xFF); } return ir.get(); }
Example 5
Source File: FSTTester.java From lucene-solr with Apache License 2.0 | 5 votes |
private T randomAcceptedWord(FST<T> fst, IntsRefBuilder in) throws IOException { FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); final List<FST.Arc<T>> arcs = new ArrayList<>(); in.clear(); final T NO_OUTPUT = fst.outputs.getNoOutput(); T output = NO_OUTPUT; final FST.BytesReader fstReader = fst.getBytesReader(); while(true) { // read all arcs: fst.readFirstTargetArc(arc, arc, fstReader); arcs.add(new FST.Arc<T>().copyFrom(arc)); while(!arc.isLast()) { fst.readNextArc(arc, fstReader); arcs.add(new FST.Arc<T>().copyFrom(arc)); } // pick one arc = arcs.get(random.nextInt(arcs.size())); arcs.clear(); // accumulate output output = fst.outputs.add(output, arc.output()); // append label if (arc.label() == FST.END_LABEL) { break; } in.append(arc.label()); } return output; }
Example 6
Source File: Operations.java From lucene-solr with Apache License 2.0 | 5 votes |
/** If this automaton accepts a single input, return it. Else, return null. * The automaton must be deterministic. */ public static IntsRef getSingleton(Automaton a) { if (a.isDeterministic() == false) { throw new IllegalArgumentException("input automaton must be deterministic"); } IntsRefBuilder builder = new IntsRefBuilder(); HashSet<Integer> visited = new HashSet<>(); int s = 0; Transition t = new Transition(); while (true) { visited.add(s); if (a.isAccept(s) == false) { if (a.getNumTransitions(s) == 1) { a.getTransition(s, 0, t); if (t.min == t.max && !visited.contains(t.dest)) { builder.append(t.min); s = t.dest; continue; } } } else if (a.getNumTransitions(s) == 0) { return builder.get(); } // Automaton accepts more than one string: return null; } }
Example 7
Source File: Util.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Just takes unsigned byte values from the BytesRef and * converts into an IntsRef. */ public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) { scratch.clear(); for(int i=0;i<input.length;i++) { scratch.append(input.bytes[i+input.offset] & 0xFF); } return scratch.get(); }
Example 8
Source File: FiniteStringsIteratorTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSingleString() { Automaton a = new Automaton(); int start = a.createState(); int end = a.createState(); a.setAccept(end, true); a.addTransition(start, end, 'a', 'a'); a.finishState(); Set<IntsRef> accepted = TestOperations.getFiniteStrings(a); assertEquals(1, accepted.size()); IntsRefBuilder intsRef = new IntsRefBuilder(); intsRef.append('a'); assertTrue(accepted.contains(intsRef.toIntsRef())); }
Example 9
Source File: TestAutomaton.java From lucene-solr with Apache License 2.0 | 5 votes |
private static IntsRef toIntsRef(String s) { IntsRefBuilder b = new IntsRefBuilder(); for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { cp = s.codePointAt(i); b.append(cp); } return b.get(); }
Example 10
Source File: FstDecompounder.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
/** * Convert a character sequence into full unicode codepoints. */ private static IntsRefBuilder fromUTF16ToUTF32(CharSequence s, IntsRefBuilder builder) { builder.clear(); for (int charIdx = 0, charLimit = s.length(); charIdx < charLimit; ) { final int utf32 = Character.codePointAt(s, charIdx); builder.append(utf32); charIdx += Character.charCount(utf32); } return builder; }
Example 11
Source File: FacetsConfig.java From lucene-solr with Apache License 2.0 | 4 votes |
private void processFacetFields(TaxonomyWriter taxoWriter, Map<String,List<FacetField>> byField, Document doc) throws IOException { for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) { String indexFieldName = ent.getKey(); //System.out.println(" indexFieldName=" + indexFieldName + " fields=" + ent.getValue()); IntsRefBuilder ordinals = new IntsRefBuilder(); for(FacetField facetField : ent.getValue()) { FacetsConfig.DimConfig ft = getDimConfig(facetField.dim); if (facetField.path.length > 1 && ft.hierarchical == false) { throw new IllegalArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.length + " components"); } FacetLabel cp = new FacetLabel(facetField.dim, facetField.path); checkTaxoWriter(taxoWriter); int ordinal = taxoWriter.addCategory(cp); ordinals.append(ordinal); //System.out.println("ords[" + (ordinals.length-1) + "]=" + ordinal); //System.out.println(" add cp=" + cp); if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) { //System.out.println(" add parents"); // Add all parents too: int parent = taxoWriter.getParent(ordinal); while (parent > 0) { ordinals.append(parent); parent = taxoWriter.getParent(parent); } if (ft.requireDimCount == false) { // Remove last (dimension) ord: ordinals.setLength(ordinals.length() - 1); } } // Drill down: int start; if (ft.requireDimensionDrillDown) { start = 1; } else { start = 2; } for (int i=start;i<=cp.length;i++) { doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO)); } } // Facet counts: // DocValues are considered stored fields: doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get()))); } }