org.apache.lucene.search.spell.StringDistance Java Examples

The following examples show how to use org.apache.lucene.search.spell.StringDistance. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ValueSourceParser.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {

  ValueSource str1 = fp.parseValueSource();
  ValueSource str2 = fp.parseValueSource();
  String distClass = fp.parseArg();

  StringDistance dist = null;
  if (distClass.equalsIgnoreCase("jw")) {
    dist = new JaroWinklerDistance();
  } else if (distClass.equalsIgnoreCase("edit")) {
    dist = new LevenshteinDistance();
  } else if (distClass.equalsIgnoreCase("ngram")) {
    int ngram = 2;
    if (fp.hasMoreArguments()) {
      ngram = fp.parseInt();
    }
    dist = new NGramDistance(ngram);
  } else {
    dist = fp.req.getCore().getResourceLoader().newInstance(distClass, StringDistance.class);
  }
  return new StringDistanceFunction(str1, str2, dist);
}
 
Example #2
Source File: ConjunctionSolrSpellChecker.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected StringDistance getStringDistance() {
  if (stringDistance == null) {
    return super.getStringDistance();
  }
  return stringDistance;
}
 
Example #3
Source File: ConjunctionSolrSpellCheckerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
  ConjunctionSolrSpellChecker cssc = new ConjunctionSolrSpellChecker();
  @SuppressWarnings("unchecked")
  Class<StringDistance> sameDistance = (Class<StringDistance>) AVAILABLE_DISTANCES[random().nextInt(AVAILABLE_DISTANCES.length)];
  
  StringDistance sameDistance1 = sameDistance.getConstructor().newInstance();
  StringDistance sameDistance2 = sameDistance.getConstructor().newInstance();
  
  //NGramDistance defaults to 2, so we'll try 3 or 4 to ensure we have one that is not-equal.
  StringDistance differentDistance = new NGramDistance(3);
  if(sameDistance1.equals(differentDistance)) {
    differentDistance = new NGramDistance(4);
    if(sameDistance1.equals(differentDistance)) {
      fail("Cannot set up test.  2 NGramDistances with different gram sizes should not be equal.");
    }
  }
  Assert.assertEquals("The distance " + sameDistance + " does not properly implement equals.", sameDistance1, sameDistance2);
  
  
  MockSolrSpellChecker checker1 = new MockSolrSpellChecker(sameDistance1);
  MockSolrSpellChecker checker2 = new MockSolrSpellChecker(sameDistance2);
  MockSolrSpellChecker checker3 = new MockSolrSpellChecker(differentDistance);
  
  cssc.addChecker(checker1);
  cssc.addChecker(checker2);
  expectThrows(IllegalArgumentException.class, () -> cssc.addChecker(checker3));
}
 
Example #4
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateDistance() throws Exception {
  TestSpellChecker checker = new TestSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = createTempDir().toFile();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName());
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);
    SpellChecker sc = checker.getSpellChecker();
    assertTrue("sc is null and it shouldn't be", sc != null);
    StringDistance sd = sc.getStringDistance();
    assertTrue("sd is null and it shouldn't be", sd != null);
    assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance);
    return null;
  });

}
 
Example #5
Source File: DirectSpellcheckerSettings.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public StringDistance stringDistance() {
    return stringDistance;
}
 
Example #6
Source File: DirectSpellcheckerSettings.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public void stringDistance(StringDistance distance) {
    this.stringDistance = distance;
}
 
Example #7
Source File: SolrSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Get the distance implementation used by this spellchecker, or NULL if not applicable.
 */
protected StringDistance getStringDistance()  {
  throw new UnsupportedOperationException();
}
 
Example #8
Source File: DirectSolrSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {

  SolrParams params = config.toSolrParams();

  log.info("init: {}", config);
  String name = super.init(config, core);
  
  Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
  String compClass = (String) config.get(COMPARATOR_CLASS);
  if (compClass != null) {
    if (compClass.equalsIgnoreCase(SCORE_COMP))
      comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    else if (compClass.equalsIgnoreCase(FREQ_COMP))
      comp = new SuggestWordFrequencyComparator();
    else //must be a FQCN
      comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
  }
  
  StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
  String distClass = (String) config.get(STRING_DISTANCE);
  if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
    sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

  float minAccuracy = DEFAULT_ACCURACY;
  Float accuracy = params.getFloat(ACCURACY);
  if (accuracy != null)
    minAccuracy = accuracy;
  
  int maxEdits = DEFAULT_MAXEDITS;
  Integer edits = params.getInt(MAXEDITS);
  if (edits != null)
    maxEdits = edits;
  
  int minPrefix = DEFAULT_MINPREFIX;
  Integer prefix = params.getInt(MINPREFIX);
  if (prefix != null)
    minPrefix = prefix;
  
  int maxInspections = DEFAULT_MAXINSPECTIONS;
  Integer inspections = params.getInt(MAXINSPECTIONS);
  if (inspections != null)
    maxInspections = inspections;
  
  float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
  Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
  if (threshold != null)
    minThreshold = threshold;
  
  int minQueryLength = DEFAULT_MINQUERYLENGTH;
  Integer queryLength = params.getInt(MINQUERYLENGTH);
  if (queryLength != null)
    minQueryLength = queryLength;

  int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
  Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
  if (overriddenMaxQueryLength != null)
    maxQueryLength = overriddenMaxQueryLength;
  
  float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
  Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
  if (queryFreq != null)
    maxQueryFrequency = queryFreq;
  
  checker.setComparator(comp);
  checker.setDistance(sd);
  checker.setMaxEdits(maxEdits);
  checker.setMinPrefix(minPrefix);
  checker.setAccuracy(minAccuracy);
  checker.setThresholdFrequency(minThreshold);
  checker.setMaxInspections(maxInspections);
  checker.setMinQueryLength(minQueryLength);
  checker.setMaxQueryLength(maxQueryLength);
  checker.setMaxQueryFrequency(maxQueryFrequency);
  checker.setLowerCaseTerms(false);
  
  return name;
}
 
Example #9
Source File: DirectSolrSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public StringDistance getStringDistance() {
  return checker.getDistance();
}
 
Example #10
Source File: AbstractLuceneSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public StringDistance getStringDistance() {
  return sd;
}
 
Example #11
Source File: ConjunctionSolrSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
MockSolrSpellChecker(StringDistance sd) {
  this.sd = sd;
}
 
Example #12
Source File: ConjunctionSolrSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected StringDistance getStringDistance() {
  return sd;
}
 
Example #13
Source File: StringDistanceFunction.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
public StringDistanceFunction(ValueSource str1, ValueSource str2, StringDistance measure) {
  this.str1 = str1;
  this.str2 = str2;
  dist = measure;


}