Java Code Examples for org.apache.lucene.util.IntsRef

The following examples show how to use org.apache.lucene.util.IntsRef. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
public static FST<?> getFST(SortedSet<String> values) throws IOException {
    final IntsRefBuilder irBuilder = new IntsRefBuilder();
    // The builder options with defaults
    FST.INPUT_TYPE inputType = FST.INPUT_TYPE.BYTE1;
    int minSuffixCount1 = 0;
    int minSuffixCount2 = 0;
    boolean doShareSuffix = true;
    boolean doShareNonSingletonNodes = true;
    int shareMaxTailLength = Integer.MAX_VALUE;
    
    boolean allowArrayArcs = true;
    int bytesPageBits = 15;
    final Outputs<Object> outputs = NoOutputs.getSingleton();
    
    // create the FST from the values
    org.apache.lucene.util.fst.Builder<Object> fstBuilder = new org.apache.lucene.util.fst.Builder<>(inputType, minSuffixCount1, minSuffixCount2,
                    doShareSuffix, doShareNonSingletonNodes, shareMaxTailLength, outputs, allowArrayArcs, bytesPageBits);
    
    for (String value : values) {
        Util.toUTF16(value, irBuilder);
        final IntsRef scratchInt = irBuilder.get();
        fstBuilder.add(scratchInt, outputs.getNoOutput());
    }
    return fstBuilder.finish();
}
 
Example 2
Source Project: lucene-solr   Source File: FuzzyCompletionQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void setNextMatch(IntsRef pathPrefix) {
  // NOTE: the last letter of the matched prefix for the exact
  // match never makes it through here
  // so an exact match and a match with only a edit at the
  // end is boosted the same
  int maxCount = 0;
  for (IntsRef ref : refs) {
    int minLength = Math.min(ref.length, pathPrefix.length);
    int count = 0;
    for (int i = 0; i < minLength; i++) {
      if (ref.ints[i + ref.offset] == pathPrefix.ints[i + pathPrefix.offset]) {
        count++;
      } else {
        break;
      }
    }
    maxCount = Math.max(maxCount, count);
  }
  currentBoost = maxCount;
}
 
Example 3
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 6 votes vote down vote up
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
  final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
  final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
  if (matchAllContexts || contexts.size() == 0) {
    return Operations.concatenate(matchAllAutomaton, sep);
  } else {
    Automaton contextsAutomaton = null;
    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
      final ContextMetaData contextMetaData = entry.getValue();
      final IntsRef ref = entry.getKey();
      Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
      if (contextMetaData.exact == false) {
        contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
      }
      contextAutomaton = Operations.concatenate(contextAutomaton, sep);
      if (contextsAutomaton == null) {
        contextsAutomaton = contextAutomaton;
      } else {
        contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
      }
    }
    return contextsAutomaton;
  }
}
 
Example 4
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void setNextMatch(final IntsRef pathPrefix) {
  IntsRef ref = pathPrefix.clone();

  // check if the pathPrefix matches any
  // defined context, longer context first
  for (int contextLength : contextLengths) {
    if (contextLength > pathPrefix.length) {
      continue;
    }
    ref.length = contextLength;
    if (contextMap.containsKey(ref)) {
      currentBoost = contextMap.get(ref);
      ref.length = pathPrefix.length;
      setInnerWeight(ref, contextLength);
      return;
    }
  }
  // unknown context
  ref.length = pathPrefix.length;
  currentBoost = 0f;
  setInnerWeight(ref, 0);
}
 
Example 5
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 6 votes vote down vote up
private void setInnerWeight(IntsRef ref, int offset) {
  IntsRefBuilder refBuilder = new IntsRefBuilder();
  for (int i = offset; i < ref.length; i++) {
    if (ref.ints[ref.offset + i] == ContextSuggestField.CONTEXT_SEPARATOR) {
      if (i > 0) {
        refBuilder.copyInts(ref.ints, ref.offset, i);
        currentContext = Util.toBytesRef(refBuilder.get(), scratch).utf8ToString();
      } else {
        currentContext = null;
      }
      ref.offset = ++i;
      assert ref.offset < ref.length : "input should not end with the context separator";
      if (ref.ints[i] == ConcatenateGraphFilter.SEP_LABEL) {
        ref.offset++;
        assert ref.offset < ref.length : "input should not end with a context separator followed by SEP_LABEL";
      }
      ref.length = ref.length - ref.offset;
      refBuilder.copyInts(ref.ints, ref.offset, ref.length);
      innerWeight.setNextMatch(refBuilder.get());
      return;
    }
  }
}
 
Example 6
Source Project: lucene-solr   Source File: TestFSTsMisc.java    License: Apache License 2.0 6 votes vote down vote up
private void testRandomWords(int maxNumWords, int numIter) throws IOException {
  Random random = new Random(random().nextLong());
  for(int iter=0;iter<numIter;iter++) {
    if (VERBOSE) {
      System.out.println("\nTEST: iter " + iter);
    }
    for(int inputMode=0;inputMode<2;inputMode++) {
      final int numWords = random.nextInt(maxNumWords+1);
      Set<IntsRef> termsSet = new HashSet<>();
      IntsRef[] terms = new IntsRef[numWords];
      while(termsSet.size() < numWords) {
        final String term = getRandomString(random);
        termsSet.add(toIntsRef(term, inputMode));
      }
      doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
    }
  }
}
 
Example 7
Source Project: lucene-solr   Source File: TaxonomyFacetCounts.java    License: Apache License 2.0 6 votes vote down vote up
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
  IntsRef scratch  = new IntsRef();
  for(MatchingDocs hits : matchingDocs) {
    OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
    DocIdSetIterator docs = hits.bits.iterator();
    
    int doc;
    while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      ords.get(doc, scratch);
      for(int i=0;i<scratch.length;i++) {
        increment(scratch.ints[scratch.offset+i]);
      }
    }
  }

  rollup();
}
 
Example 8
Source Project: lucene-solr   Source File: TokenInfoDictionaryTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testPut() throws Exception {
  TokenInfoDictionary dict = newDictionary("名詞,1,1,2,名詞,一般,*,*,*,*,*,*,*",
                                             // "large" id
                                             "一般,5000,5000,3,名詞,一般,*,*,*,*,*,*,*");
  IntsRef wordIdRef = new IntsRefBuilder().get();

  dict.lookupWordIds(0, wordIdRef);
  int wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(5000, dict.getLeftId(wordId));
  assertEquals(5000, dict.getRightId(wordId));
  assertEquals(3, dict.getWordCost(wordId));

  dict.lookupWordIds(1, wordIdRef);
  wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(1, dict.getLeftId(wordId));
  assertEquals(1, dict.getRightId(wordId));
  assertEquals(2, dict.getWordCost(wordId));
}
 
Example 9
Source Project: lucene-solr   Source File: TokenInfoDictionaryTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testPut() throws Exception {
  TokenInfoDictionary dict = newDictionary("명사,1,1,2,NNG,*,*,*,*,*,*,*",
      // "large" id
      "일반,5000,5000,3,NNG,*,*,*,*,*,*,*");
  IntsRef wordIdRef = new IntsRefBuilder().get();

  dict.lookupWordIds(0, wordIdRef);
  int wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(1, dict.getLeftId(wordId));
  assertEquals(1, dict.getRightId(wordId));
  assertEquals(2, dict.getWordCost(wordId));

  dict.lookupWordIds(1, wordIdRef);
  wordId = wordIdRef.ints[wordIdRef.offset];
  assertEquals(5000, dict.getLeftId(wordId));
  assertEquals(5000, dict.getRightId(wordId));
  assertEquals(3, dict.getWordCost(wordId));
}
 
Example 10
Source Project: lucene-solr   Source File: TestDictionary.java    License: Apache License 2.0 6 votes vote down vote up
public void testCompressedDictionary() throws Exception {
  InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
  InputStream dictStream = getClass().getResourceAsStream("compressed.dic");

  Directory tempDir = getDirectory();
  Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
  assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
  assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
  IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
  BytesRef ref = new BytesRef();
  dictionary.flagLookup.get(ordList.ints[0], ref);
  char flags[] = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);
  
  affixStream.close();
  dictStream.close();
  tempDir.close();
}
 
Example 11
Source Project: lucene-solr   Source File: TestDictionary.java    License: Apache License 2.0 6 votes vote down vote up
public void testCompressedBeforeSetDictionary() throws Exception {
  InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
  InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
  Directory tempDir = getDirectory();

  Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
  assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
  assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
  IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
  BytesRef ref = new BytesRef();
  dictionary.flagLookup.get(ordList.ints[0], ref);
  char flags[] = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);
  
  affixStream.close();
  dictStream.close();
  tempDir.close();
}
 
Example 12
Source Project: lucene-solr   Source File: IntSequenceOutputs.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public IntsRef subtract(IntsRef output, IntsRef inc) {
  assert output != null;
  assert inc != null;
  if (inc == NO_OUTPUT) {
    // no prefix removed
    return output;
  } else if (inc.length == output.length) {
    // entire output removed
    return NO_OUTPUT;
  } else {
    assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length;
    assert inc.length > 0;
    return new IntsRef(output.ints, output.offset + inc.length, output.length-inc.length);
  }
}
 
Example 13
Source Project: lucene-solr   Source File: IntSequenceOutputs.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public IntsRef add(IntsRef prefix, IntsRef output) {
  assert prefix != null;
  assert output != null;
  if (prefix == NO_OUTPUT) {
    return output;
  } else if (output == NO_OUTPUT) {
    return prefix;
  } else {
    assert prefix.length > 0;
    assert output.length > 0;
    IntsRef result = new IntsRef(prefix.length + output.length);
    System.arraycopy(prefix.ints, prefix.offset, result.ints, 0, prefix.length);
    System.arraycopy(output.ints, output.offset, result.ints, prefix.length, output.length);
    result.length = prefix.length + output.length;
    return result;
  }
}
 
Example 14
Source Project: lucene-solr   Source File: Util.java    License: Apache License 2.0 6 votes vote down vote up
/** Looks up the output for this input, or null if the
 *  input is not accepted. */
public static<T> T get(FST<T> fst, IntsRef input) throws IOException {

  // TODO: would be nice not to alloc this on every lookup
  final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final BytesReader fstReader = fst.getBytesReader();

  // Accumulate output as we go
  T output = fst.outputs.getNoOutput();
  for(int i=0;i<input.length;i++) {
    if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
      return null;
    }
    output = fst.outputs.add(output, arc.output());
  }

  if (arc.isFinal()) {
    return fst.outputs.add(output, arc.nextFinalOutput());
  } else {
    return null;
  }
}
 
Example 15
Source Project: datawave   Source File: DatawaveArithmetic.java    License: Apache License 2.0 5 votes vote down vote up
public static boolean matchesFst(Object object, FST fst) throws IOException {
    final IntsRefBuilder irBuilder = new IntsRefBuilder();
    Util.toUTF16(object.toString(), irBuilder);
    final IntsRef ints = irBuilder.get();
    synchronized (fst) {
        return Util.get(fst, ints) != null;
    }
}
 
Example 16
Source Project: Elasticsearch   Source File: XAnalyzingSuggester.java    License: Apache License 2.0 5 votes vote down vote up
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
    final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
    Automaton automaton;
    try (TokenStream ts = stream) {
        automaton = toAutomaton(ts, ts2a);
    }
    LimitedFiniteStringsIterator finiteStrings =
            new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
    Set<IntsRef> set = new HashSet<>();
    for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
        set.add(IntsRef.deepCopyOf(string));
    }
    return Collections.unmodifiableSet(set);
}
 
Example 17
Source Project: lucene-solr   Source File: VariableGapTermsIndexWriter.java    License: Apache License 2.0 5 votes vote down vote up
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
  this.fieldInfo = fieldInfo;
  fstOutputs = PositiveIntOutputs.getSingleton();
  fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, fstOutputs);
  indexStart = out.getFilePointer();
  ////System.out.println("VGW: field=" + fieldInfo.name);

  // Always put empty string in
  fstCompiler.add(new IntsRef(), termsFilePointer);
  startTermsFilePointer = termsFilePointer;
}
 
Example 18
Source Project: lucene-solr   Source File: FiniteStringsIteratorTest.java    License: Apache License 2.0 5 votes vote down vote up
private static String toString(IntsRef ints) {
  BytesRef br = new BytesRef(ints.length);
  for(int i=0;i<ints.length;i++) {
    br.bytes[i] = (byte) ints.ints[i];
  }
  br.length = ints.length;
  return br.utf8ToString();
}
 
Example 19
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Adds a context with boost, set <code>exact</code> to false
 * if the context is a prefix of any indexed contexts
 */
public void addContext(CharSequence context, float boost, boolean exact) {
  if (boost < 0f) {
    throw new IllegalArgumentException("'boost' must be >= 0");
  }
  for (int i = 0; i < context.length(); i++) {
    if (ContextSuggestField.CONTEXT_SEPARATOR == context.charAt(i)) {
      throw new IllegalArgumentException("Illegal value [" + context + "] UTF-16 codepoint [0x"
          + Integer.toHexString((int) context.charAt(i))+ "] at position " + i + " is a reserved character");
    }
  }
  contexts.put(IntsRef.deepCopyOf(Util.toIntsRef(new BytesRef(context), scratch)), new ContextMetaData(boost, exact));
  updateRamBytesUsed();
}
 
Example 20
Source Project: lucene-solr   Source File: TestAutomaton.java    License: Apache License 2.0 5 votes vote down vote up
public void testGetSingletonEmptyString() {
  Automaton a = new Automaton();
  int s = a.createState();
  a.setAccept(s, true);
  a.finishState();
  assertEquals(new IntsRef(), Operations.getSingleton(a));
}
 
Example 21
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, scoreMode, boost));
  final Automaton innerAutomaton = innerWeight.getAutomaton();

  // If the inner automaton matches nothing, then we return an empty weight to avoid
  // traversing all contexts during scoring.
  if (innerAutomaton.getNumStates() == 0) {
    return new CompletionWeight(this, innerAutomaton);
  }

  // if separators are preserved the fst contains a SEP_LABEL
  // behind each gap. To have a matching automaton, we need to
  // include the SEP_LABEL in the query as well
  Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_LABEL));
  Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
  Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
  contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);

  final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
  final TreeSet<Integer> contextLengths = new TreeSet<>();
  for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
    ContextMetaData contextMetaData = entry.getValue();
    contextMap.put(entry.getKey(), contextMetaData.boost);
    contextLengths.add(entry.getKey().length);
  }
  int[] contextLengthArray = new int[contextLengths.size()];
  final Iterator<Integer> iterator = contextLengths.descendingIterator();
  for (int i = 0; iterator.hasNext(); i++) {
    contextLengthArray[i] = iterator.next();
  }
  return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
 
Example 22
Source Project: lucene-solr   Source File: ContextQuery.java    License: Apache License 2.0 5 votes vote down vote up
public ContextCompletionWeight(CompletionQuery query, Automaton automaton, CompletionWeight innerWeight,
                               Map<IntsRef, Float> contextMap,
                               int[] contextLengths) throws IOException {
  super(query, automaton);
  this.contextMap = contextMap;
  this.contextLengths = contextLengths;
  this.innerWeight = innerWeight;
}
 
Example 23
Source Project: lucene-solr   Source File: FuzzySuggesterTest.java    License: Apache License 2.0 5 votes vote down vote up
private static IntsRef toIntsRef(String s) {
  IntsRef ref = new IntsRef(s.length()); // worst case
  int utf16Len = s.length();
  for (int i = 0, cp = 0; i < utf16Len; i += Character.charCount(cp)) {
    cp = ref.ints[ref.length++] = Character.codePointAt(s, i);
  }
  return ref;
}
 
Example 24
Source Project: lucene-solr   Source File: TestAutomaton.java    License: Apache License 2.0 5 votes vote down vote up
private static IntsRef toIntsRef(String s) {
  IntsRefBuilder b = new IntsRefBuilder();
  for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
    cp = s.codePointAt(i);
    b.append(cp);
  }

  return b.get();
}
 
Example 25
Source Project: lucene-solr   Source File: Util.java    License: Apache License 2.0 5 votes vote down vote up
/** Just converts IntsRef to BytesRef; you must ensure the
 *  int values fit into a byte. */
public static BytesRef toBytesRef(IntsRef input, BytesRefBuilder scratch) {
  scratch.grow(input.length);
  for(int i=0;i<input.length;i++) {
    int value = input.ints[i+input.offset];
    // NOTE: we allow -128 to 255
    assert value >= Byte.MIN_VALUE && value <= 255: "value " + value + " doesn't fit into byte";
    scratch.setByteAt(i, (byte) value);
  }
  scratch.setLength(input.length);
  return scratch.get();
}
 
Example 26
Source Project: lucene-solr   Source File: CachedOrdinalsReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public OrdinalsSegmentReader getReader(LeafReaderContext context) throws IOException {
  final CachedOrds cachedOrds = getCachedOrds(context);
  return new OrdinalsSegmentReader() {
    @Override
    public void get(int docID, IntsRef ordinals) {
      ordinals.ints = cachedOrds.ordinals;
      ordinals.offset = cachedOrds.offsets[docID];
      ordinals.length = cachedOrds.offsets[docID+1] - ordinals.offset;
    }
  };
}
 
Example 27
Source Project: lucene-solr   Source File: CachedOrdinalsReader.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new {@link CachedOrds} from the {@link BinaryDocValues}.
 * Assumes that the {@link BinaryDocValues} is not {@code null}.
 */
public CachedOrds(OrdinalsSegmentReader source, int maxDoc) throws IOException {
  offsets = new int[maxDoc + 1];
  int[] ords = new int[maxDoc]; // let's assume one ordinal per-document as an initial size

  // this aggregator is limited to Integer.MAX_VALUE total ordinals.
  long totOrds = 0;
  final IntsRef values = new IntsRef(32);
  for (int docID = 0; docID < maxDoc; docID++) {
    offsets[docID] = (int) totOrds;
    source.get(docID, values);
    long nextLength = totOrds + values.length;
    if (nextLength > ords.length) {
      if (nextLength > ArrayUtil.MAX_ARRAY_LENGTH) {
        throw new IllegalStateException("too many ordinals (>= " + nextLength + ") to cache");
      }
      ords = ArrayUtil.grow(ords, (int) nextLength);
    }
    System.arraycopy(values.ints, 0, ords, (int) totOrds, values.length);
    totOrds = nextLength;
  }
  offsets[maxDoc] = (int) totOrds;
  
  // if ords array is bigger by more than 10% of what we really need, shrink it
  if ((double) totOrds / ords.length < 0.9) { 
    this.ordinals = new int[(int) totOrds];
    System.arraycopy(ords, 0, this.ordinals, 0, (int) totOrds);
  } else {
    this.ordinals = ords;
  }
}
 
Example 28
Source Project: lucene-solr   Source File: FiniteStringsIteratorTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testShortAccept() {
  Automaton a = Operations.union(Automata.makeString("x"), Automata.makeString("xy"));
  a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
  FiniteStringsIterator iterator = new FiniteStringsIterator(a);
  List<IntsRef> actual = getFiniteStrings(iterator);
  assertEquals(2, actual.size());
  IntsRefBuilder x = new IntsRefBuilder();
  Util.toIntsRef(new BytesRef("x"), x);
  assertTrue(actual.contains(x.get()));
  IntsRefBuilder xy = new IntsRefBuilder();
  Util.toIntsRef(new BytesRef("xy"), xy);
  assertTrue(actual.contains(xy.get()));
}
 
Example 29
Source Project: lucene-solr   Source File: Stemmer.java    License: Apache License 2.0 5 votes vote down vote up
private List<CharsRef> doStem(char word[], int length, boolean caseVariant) {
  List<CharsRef> stems = new ArrayList<>();
  IntsRef forms = dictionary.lookupWord(word, 0, length);
  if (forms != null) {
    for (int i = 0; i < forms.length; i += formStep) {
      boolean checkKeepCase = caseVariant && dictionary.keepcase != -1;
      boolean checkNeedAffix = dictionary.needaffix != -1;
      boolean checkOnlyInCompound = dictionary.onlyincompound != -1;
      if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) {
        dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
        char wordFlags[] = Dictionary.decodeFlags(scratch);
        // we are looking for a case variant, but this word does not allow it
        if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
          continue;
        }
        // we can't add this form, it's a pseudostem requiring an affix
        if (checkNeedAffix && Dictionary.hasFlag(wordFlags, (char)dictionary.needaffix)) {
          continue;
        }
        // we can't add this form, it only belongs inside a compound word
        if (checkOnlyInCompound && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
          continue;
        }
      }
      stems.add(newStem(word, length, forms, i));
    }
  }
  try {
    boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
  } catch (IOException bogus) {
    throw new RuntimeException(bogus);
  }
  return stems;
}
 
Example 30
Source Project: lucene-solr   Source File: TestUTF32ToUTF8.java    License: Apache License 2.0 5 votes vote down vote up
public void testSingleton() throws Exception {
  int iters = atLeast(100);
  for(int iter=0;iter<iters;iter++) {
    String s = TestUtil.randomRealisticUnicodeString(random());
    Automaton a = Automata.makeString(s);
    Automaton utf8 = new UTF32ToUTF8().convert(a);
    IntsRefBuilder ints = new IntsRefBuilder();
    Util.toIntsRef(new BytesRef(s), ints);
    Set<IntsRef> set = new HashSet<>();
    set.add(ints.get());
    assertEquals(set, TestOperations.getFiniteStrings(utf8));
  }
}