Java Code Examples for org.apache.lucene.util.fst.Util#get()

The following examples show how to use org.apache.lucene.util.fst.Util#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BooleanPerceptronClassifier.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public ClassificationResult<Boolean> assignClass(String text)
        throws IOException {
  Long output = 0L;
  try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
    CharTermAttribute charTermAttribute = tokenStream
            .addAttribute(CharTermAttribute.class);
    tokenStream.reset();
    while (tokenStream.incrementToken()) {
      String s = charTermAttribute.toString();
      Long d = Util.get(fst, new BytesRef(s));
      if (d != null) {
        output += d;
      }
    }
    tokenStream.end();
  }

  double score = 1 - Math.exp(-1 * Math.abs(bias - output.doubleValue()) / bias);
  return new ClassificationResult<>(output >= bias, score);
}
 
Example 2
Source File: DatawaveArithmetic.java    From datawave with Apache License 2.0 5 votes vote down vote up
public static boolean matchesFst(Object object, FST fst) throws IOException {
    final IntsRefBuilder irBuilder = new IntsRefBuilder();
    Util.toUTF16(object.toString(), irBuilder);
    final IntsRef ints = irBuilder.get();
    synchronized (fst) {
        return Util.get(fst, ints) != null;
    }
}
 
Example 3
Source File: BaseSynonymParserTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to validate synonym parsing.
 *
 * @param synonynMap  the generated synonym map after parsing
 * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
 *                    All spaces will be replaced by word separators.
 * @param includeOrig if synonyms should include original
 * @param synonyms    actual synonyms. All word separators are replaced with a single space.
 */
public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms)
    throws Exception {
  word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
  BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
  assertNotNull("No synonyms found for: " + word, value);

  ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
  final int code = bytesReader.readVInt();

  final boolean keepOrig = (code & 0x1) == 0;
  assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
      includeOrig, keepOrig);

  final int count = code >>> 1;
  assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count,
      synonyms.length, count);

  Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));

  BytesRef scratchBytes = new BytesRef();
  for (int i = 0; i < count; i++) {
    synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
    String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
    assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
  }
}
 
Example 4
Source File: BooleanPerceptronClassifier.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void updateWeights(IndexReader indexReader,
                           int docId, Boolean assignedClass, SortedMap<String, Double> weights,
                           double modifier, boolean updateFST) throws IOException {
  TermsEnum cte = textTerms.iterator();

  // get the doc term vectors
  Terms terms = indexReader.getTermVector(docId, textFieldName);

  if (terms == null) {
    throw new IOException("term vectors must be stored for field "
            + textFieldName);
  }

  TermsEnum termsEnum = terms.iterator();

  BytesRef term;

  while ((term = termsEnum.next()) != null) {
    cte.seekExact(term);
    if (assignedClass != null) {
      long termFreqLocal = termsEnum.totalTermFreq();
      // update weights
      Long previousValue = Util.get(fst, term);
      String termString = term.utf8ToString();
      weights.put(termString, previousValue == null ? 0 : Math.max(0, previousValue + modifier * termFreqLocal));
    }
  }
  if (updateFST) {
    updateFST(weights);
  }
}
 
Example 5
Source File: BaseSynonymParserTestCase.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Validates that there are no synonyms for the given word.
 * @param synonynMap  the generated synonym map after parsing
 * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
 *                    All spaces will be replaced by word separators.
 */
public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
  word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
  BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
  assertNull("There should be no synonyms for: " + word, value);
}