Java Code Examples for org.apache.commons.codec.language.Metaphone

The following examples show how to use org.apache.commons.codec.language.Metaphone. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hop   Source File: FuzzyMatch.java    License: Apache License 2.0 6 votes vote down vote up
private String getEncodedMF( String value, Integer algorithmType ) {
  String encodedValueMF = "";
  switch ( algorithmType ) {
    case FuzzyMatchMeta.OPERATION_TYPE_METAPHONE:
      encodedValueMF = ( new Metaphone() ).metaphone( value );
      break;
    case FuzzyMatchMeta.OPERATION_TYPE_DOUBLE_METAPHONE:
      encodedValueMF = ( ( new DoubleMetaphone() ).doubleMetaphone( value ) );
      break;
    case FuzzyMatchMeta.OPERATION_TYPE_SOUNDEX:
      encodedValueMF = ( new Soundex() ).encode( value );
      break;
    case FuzzyMatchMeta.OPERATION_TYPE_REFINED_SOUNDEX:
      encodedValueMF = ( new RefinedSoundex() ).encode( value );
      break;
    default:
      break;
  }
  return encodedValueMF;
}
 
Example 2
Source Project: lucene-solr   Source File: TestPhoneticFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Encoder encoders[] = new Encoder[] {
      new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2()
  };
  for (final Encoder e : encoders) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean()));
      }
    };
    checkOneTerm(a, "", "");
    a.close();
  }
}
 
Example 3
Source Project: pentaho-kettle   Source File: FuzzyMatch.java    License: Apache License 2.0 6 votes vote down vote up
private String getEncodedMF( String value, Integer algorithmType ) {
  String encodedValueMF = "";
  switch ( algorithmType ) {
    case FuzzyMatchMeta.OPERATION_TYPE_METAPHONE:
      encodedValueMF = ( new Metaphone() ).metaphone( value );
      break;
    case FuzzyMatchMeta.OPERATION_TYPE_DOUBLE_METAPHONE:
      encodedValueMF = ( ( new DoubleMetaphone() ).doubleMetaphone( value ) );
      break;
    case FuzzyMatchMeta.OPERATION_TYPE_SOUNDEX:
      encodedValueMF = ( new Soundex() ).encode( value );
      break;
    case FuzzyMatchMeta.OPERATION_TYPE_REFINED_SOUNDEX:
      encodedValueMF = ( new RefinedSoundex() ).encode( value );
      break;
    default:
      break;
  }
  return encodedValueMF;
}
 
Example 4
Source Project: lucene-solr   Source File: TestPhoneticFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testAlgorithms() throws Exception {
  assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
      new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
  assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg",
      new String[] { "A", "B", "KKK", "ESKS" });
  
  assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
      new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
  assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
      new String[] { "A", "PP", "KK", "ASKS" });
  
  assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
      new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
  assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
      new String[] { "A000", "B000", "C000", "E220" });
  
  assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
      new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
  assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
      new String[] { "A0", "B1", "C3", "E034034" });
  
  assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene",
      new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", 
        "TTA1111111", "Datha", "KLN1111111", "Carlene" });
  assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
      new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });

  assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
      new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
  assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg",
      new String[] { "A", "B", "C", "EASGAS" });
}
 
Example 5
Source Project: lucene-solr   Source File: TestPhoneticFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Case: default
 */
public void testFactoryDefaults() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}
 
Example 6
Source Project: lucene-solr   Source File: TestPhoneticFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
public void testMaxCodeLength() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen());
}
 
Example 7
Source Project: lucene-solr   Source File: TestPhoneticFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Case: Reflection
 */
public void testFactoryReflection() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}
 
Example 8
Source Project: hop   Source File: ValueDataUtil.java    License: Apache License 2.0 4 votes vote down vote up
public static String get_Metaphone( IValueMeta metaA, Object dataA ) {
  if ( dataA == null ) {
    return null;
  }
  return ( new Metaphone() ).metaphone( dataA.toString() );
}
 
Example 9
/**
 * Method to iterate through the voice data and attempt to match the user's custom commands
 * using the {@link Metaphone} within ranges applied by the associated thresholds constants.
 *
 * @return the highest scoring {@link CustomCommand} or null if thresholds aren't satisfied
 */
public CustomCommand executeCustomCommand() {

    long then = System.nanoTime();

    final double jwdLowerThreshold = SPH.getJaroWinklerLower(mContext);
    CustomCommand customCommand = null;
    final ArrayList<CustomCommandContainer> toKeep = new ArrayList<>();
    final Metaphone metaphone = new Metaphone();
    final JaroWinklerDistance jwd = new JaroWinklerDistance();

    String phrase;
    CustomCommandContainer container;
    double score;
    boolean matches;

    int size = genericData.size();

    outer:
    for (int i = 0; i < size; i++) {
        container = (CustomCommandContainer) genericData.get(i);
        phrase = container.getKeyphrase().toLowerCase(loc).trim();

        for (String vd : inputData) {
            vd = vd.toLowerCase(loc).trim();
            matches = metaphone.isMetaphoneEqual(phrase, vd);

            if (matches && Algorithm.checkLength(phrase, vd)) {
                score = jwd.apply(phrase, vd);

                if (score > jwdLowerThreshold) {

                    container.setScore(score);
                    container.setUtterance(vd);
                    container.setExactMatch(true);
                    toKeep.add(SerializationUtils.clone(container));
                    break outer;
                } else {
                    if (DEBUG) {
                        MyLog.i(CLS_NAME, "Matches: double check JW: rejected");
                    }
                }
            }
        }
    }

    if (UtilsList.notNaked(toKeep)) {
        if (DEBUG) {
            MyLog.i(CLS_NAME, "Have a match");
        }

        final CustomCommandContainer ccc = toKeep.get(0);

        final Gson gson = new GsonBuilder().disableHtmlEscaping().create();
        customCommand = gson.fromJson(ccc.getSerialised(), CustomCommand.class);
        customCommand.setExactMatch(ccc.isExactMatch());
        customCommand.setUtterance(ccc.getUtterance());
        customCommand.setAlgorithm(Algorithm.METAPHONE);
    } else {
        if (DEBUG) {
            MyLog.i(CLS_NAME, "no custom phrases matched");
        }
    }

    if (DEBUG) {
        MyLog.getElapsed(CLS_NAME, then);
    }

    return customCommand;
}
 
Example 10
/**
 * Method to iterate through the given input data and attempt to match the given String data
 * using the {@link Metaphone} within ranges applied by the associated thresholds constants.
 *
 * @return an {@link AlgorithmicContainer} or null if thresholds aren't satisfied
 */
public AlgorithmicContainer executeGeneric() {

    long then = System.nanoTime();

    final double jwdLowerThreshold = SPH.getJaroWinklerLower(mContext);

    final ArrayList<AlgorithmicContainer> toKeep = new ArrayList<>();
    final Metaphone metaphone = new Metaphone();
    final JaroWinklerDistance jwd = new JaroWinklerDistance();

    String generic;
    String genericLower;
    AlgorithmicContainer container = null;
    double score;
    boolean matches;

    int size = genericData.size();

    outer:
    for (int i = 0; i < size; i++) {
        generic = (String) genericData.get(i);
        genericLower = generic.toLowerCase(loc).trim();

        for (String vd : inputData) {
            vd = vd.toLowerCase(loc).trim();
            matches = metaphone.isMetaphoneEqual(genericLower, vd);

            if (matches && Algorithm.checkLength(genericLower, vd)) {
                score = jwd.apply(genericLower, vd);

                if (score > jwdLowerThreshold) {

                    container = new AlgorithmicContainer();
                    container.setInput(vd);
                    container.setGenericMatch(generic);
                    container.setScore(score);
                    container.setAlgorithm(Algorithm.METAPHONE);
                    container.setParentPosition(i);
                    container.setExactMatch(true);
                    toKeep.add(container);

                    break outer;
                } else {
                    if (DEBUG) {
                        MyLog.i(CLS_NAME, "Matches: double check JW: rejected");
                    }
                }
            }
        }
    }

    if (UtilsList.notNaked(toKeep)) {
        if (DEBUG) {
            MyLog.i(CLS_NAME, "Have a match");
        }
        container = toKeep.get(0);
    } else {
        if (DEBUG) {
            MyLog.i(CLS_NAME, "no matches");
        }
    }

    if (DEBUG) {
        MyLog.getElapsed(CLS_NAME, then);
    }

    return container;
}
 
Example 11
Source Project: pentaho-kettle   Source File: ValueDataUtil.java    License: Apache License 2.0 4 votes vote down vote up
public static String get_Metaphone( ValueMetaInterface metaA, Object dataA ) {
  if ( dataA == null ) {
    return null;
  }
  return ( new Metaphone() ).metaphone( dataA.toString() );
}
 
Example 12
Source Project: crate   Source File: PhoneticTokenFilterFactory.java    License: Apache License 2.0 4 votes vote down vote up
public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.isDaitchMokotoff = false;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername)
            || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsList("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.isDaitchMokotoff = true;
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}