Java Code Examples for org.apache.commons.lang3.StringUtils#getLevenshteinDistance()

The following examples show how to use org.apache.commons.lang3.StringUtils#getLevenshteinDistance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContactsSyncAdapterService.java    From haxsync with GNU General Public License v2.0 6 votes vote down vote up
private static String matches(Set<String> phoneContacts, String fbContact, int maxdistance){
	if (maxdistance == 0){
		if (phoneContacts.contains(fbContact)){
			return fbContact;
		}
		return null;
		//return phoneContacts.contains(fbContact);
	}
	int bestDistance = maxdistance;
	String bestMatch = null;
	for (String contact : phoneContacts){
		int distance = StringUtils.getLevenshteinDistance(contact != null ? contact.toLowerCase() : "", fbContact != null ? fbContact.toLowerCase() : "");
		if( distance <= bestDistance){
			//Log.i("FOUND MATCH", "Phone Contact: " + contact +" FB Contact: " + fbContact +" distance: " + distance + "max distance: " +maxdistance);
			bestMatch = contact;
			bestDistance = distance;
		}
	}
	return bestMatch;
}
 
Example 2
Source File: AliasHandler.java    From Truck-Factor with MIT License 6 votes vote down vote up
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<String> copyList =  new CopyOnWriteArrayList<String>(allDevelopers);
	Map<String, List<String>> aliases =  new HashMap<String, List<String>>();
	for (String developer1 : copyList) {
		copyList.remove (developer1);
		for (String developer2 : copyList) {
			if(developer1.length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2));
				if (distance == -1){
					newDistance = developer1.split(" ").length;
				}
				if (!developer1.equals(developer2) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<String>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
Example 3
Source File: NewAliasHandler.java    From Truck-Factor with MIT License 6 votes vote down vote up
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<String> copyList =  new CopyOnWriteArrayList<String>(allDevelopers);
	Map<String, List<String>> aliases =  new HashMap<String, List<String>>();
	for (String developer1 : copyList) {
		copyList.remove (developer1);
		for (String developer2 : copyList) {
			if(developer1.length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2));
				if (distance == -1){
					newDistance = developer1.split(" ").length;
				}
				if (!developer1.equals(developer2) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<String>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
Example 4
Source File: AliasesIdentifier.java    From Truck-Factor with MIT License 6 votes vote down vote up
private static Map<Developer, List<Developer>> findAliases(List<Developer> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<Developer> copyList =  new CopyOnWriteArrayList<Developer>(allDevelopers);
	Map<Developer, List<Developer>> aliases =  new HashMap<Developer, List<Developer>>();
	for (Developer developer1 : copyList) {
		copyList.remove (developer1);
		for (Developer developer2 : copyList) {
			if(developer1.getId()!=developer2.getId() && developer1.getName().length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1.getName()), convertToUTFLower(developer2.getName()));
				if (distance == -1){
					newDistance = developer1.getName().split(" ").length;
				}
				if (!developer1.getName().equals(developer2.getName()) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<Developer>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
Example 5
Source File: LevenshteinAutomatonTest.java    From dictomaton with Apache License 2.0 5 votes vote down vote up
/**
 * Generate a word, create a dictionary of permutations permutations that are created using random edit operations,
 * and check that Levenshtein automaton for that word finds permutations within its edit distance.
 *
 * @param minLength             The minimum lenth of the generated word.
 * @param maxLength             The maximum length of the generated word.
 * @param nPermutations         The number of permutations to generate.
 * @param nRandomEditOperations The maximum number of random edit operations.
 * @param distance              Test the levenshtein automaton with this edit distance.
 * @throws DictionaryBuilderException
 */
private void generateAndCheckPermutations(int minLength, int maxLength, int nPermutations, int nRandomEditOperations,
                                          int distance) throws DictionaryBuilderException {
    String str = randomString(minLength + (maxLength - minLength + 1));

    TreeSet<String> all = new TreeSet<>();
    Set<String> shouldHave = new HashSet<>();

    for (int i = 0; i < nPermutations; ++i) {
        int n = d_rng.nextInt(nRandomEditOperations);

        StringBuilder permutedBuilder = new StringBuilder(str);
        for (int perm = 0; perm < n; ++perm)
            d_editOperations[d_rng.nextInt(d_editOperations.length)].apply(permutedBuilder);

        String permuted = permutedBuilder.toString();

        all.add(permuted);

        if (StringUtils.getLevenshteinDistance(str, permuted) <= distance)
            shouldHave.add(permuted);
    }

    Dictionary dict = new DictionaryBuilder().addAll(all).build();
    LevenshteinAutomaton la = new LevenshteinAutomaton(str, distance);

    Assert.assertEquals(shouldHave, la.intersectionLanguage(dict));
}
 
Example 6
Source File: ParameterMatchMockStrategy.java    From jvm-sandbox-repeater with Apache License 2.0 5 votes vote down vote up
private double calcSimilarity(Invocation invocation , MockRequest request, String requestSerialized) throws SerializeException {
    String requestSerializedTarget;
    if (CollectionUtils.isNotEmpty(request.getModifiedInvocationIdentity()) &&
        request.getModifiedInvocationIdentity().contains(invocation.getIdentity())) {
        requestSerializedTarget = SerializerWrapper.hessianSerialize(invocation.getRequest(),request.getEvent().javaClassLoader);
    } else {
        requestSerializedTarget = invocation.getRequestSerialized();
    }
    int distance = StringUtils.getLevenshteinDistance(requestSerialized, requestSerializedTarget);
    return 1 - (double) distance / Math.max(requestSerialized.length(), requestSerializedTarget.length());
}
 
Example 7
Source File: StringDistanceMetrics.java    From dungeon with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
static int levenshteinDistance(final String a, final String b) {
  if (!CommandLimits.isWithinMaximumCommandLength(a)) {
    throw new IllegalArgumentException("input is too big.");
  }
  if (!CommandLimits.isWithinMaximumCommandLength(b)) {
    throw new IllegalArgumentException("input is too big.");
  }
  return StringUtils.getLevenshteinDistance(a, b);
}
 
Example 8
Source File: StringSimilarityCallable.java    From OpenID-Attacker with GNU General Public License v2.0 5 votes vote down vote up
@Override
public Float call() throws Exception {
    //Instant startComputation = Instant.now();
    float result = StringUtils.getLevenshteinDistance(s1, s2);
    //Instant endComputation = Instant.now();
    //Duration duration = Duration.between(startComputation, endComputation);
    //System.out.println("length: " + s1.length() + "/" + s2.length() + ", duration: " + (duration.toNanos() / 1000000000) + " s, " + "result: " + result);
    //System.out.println("duration levenshtein: " + (duration.toNanos() / 1000000000) + " s");
    
    return result;
}
 
Example 9
Source File: Importer.java    From 10000sentences with Apache License 2.0 5 votes vote down vote up
protected boolean sentenceOK(SentenceVO s) {
    String targ = s.getTargetSentence();
    String known = s.getKnownSentence();

    if (StringUtils.equals(targ, known)) {
        //System.out.printf("Same: %s <-> %s\n", targ, known);
        return false;
    }

    int tLen = targ.length();
    int kLen = known.length();
    if (StringUtils.getLevenshteinDistance(targ, known) < 0.2 * (tLen + kLen) / 2.) {
        //System.out.printf("Too similar: %s <-> %s\n", targ, known);
        return false;
    }

    if (tLen < 50 && kLen < 50) {
        return true;
    }

    if (Math.max(tLen, kLen) / Math.min(tLen, kLen) > 3) {
        //System.out.printf("Nope: %s <-> %s\n", known, targ);
        return false;
    }

    if (NUMBER_DELIMITER.matcher(targ).matches() || NUMBER_DELIMITER.matcher(known).matches()) {
        //System.out.printf("Has numbers: %s <-> %s\n", known, targ);
        return false;
    }

    return true;
}
 
Example 10
Source File: GuildUtils.java    From FlareBot with MIT License 5 votes vote down vote up
/**
 * Gets a {@link Role} that matches a string. Case doesn't matter.
 *
 * @param s       The String to get a role from
 * @param guildId The id of the {@link Guild} to get the role from
 * @param channel The channel to send an error message to if anything goes wrong.
 * @return null if the role doesn't, otherwise a list of roles matching the string
 */
public static Role getRole(String s, String guildId, TextChannel channel) {
    Guild guild = Getters.getGuildById(guildId);
    Role role = guild.getRoles().stream()
            .filter(r -> r.getName().equalsIgnoreCase(s))
            .findFirst().orElse(null);
    if (role != null) return role;
    try {
        role = guild.getRoleById(Long.parseLong(s.replaceAll("[^0-9]", "")));
        if (role != null) return role;
    } catch (NumberFormatException | NullPointerException ignored) {
    }
    if (channel != null) {
        if (guild.getRolesByName(s, true).isEmpty()) {
            String closest = null;
            int distance = LEVENSHTEIN_DISTANCE;
            for (Role role1 : guild.getRoles().stream().filter(role1 -> FlareBotManager.instance().getGuild(guildId).getSelfAssignRoles()
                    .contains(role1.getId())).collect(Collectors.toList())) {
                int currentDistance = StringUtils.getLevenshteinDistance(role1.getName(), s);
                if (currentDistance < distance) {
                    distance = currentDistance;
                    closest = role1.getName();
                }
            }
            MessageUtils.sendErrorMessage("That role does not exist! "
                    + (closest != null ? "Maybe you mean `" + closest + "`" : ""), channel);
            return null;
        } else {
            return guild.getRolesByName(s, true).get(0);
        }
    }
    return null;
}
 
Example 11
Source File: RequestSender.java    From Airachnid-Burp-Extension with GNU General Public License v3.0 5 votes vote down vote up
/**
     * Testing if the responses of two requests are similar. This is the not the same as the same, rather there is a
     * threshold set in the static parameters of the class.
     * @param firstString
     * @param secondString
     * @return Test if similar
     */
    private static boolean testSimilar(String firstString, String secondString) {
//        int fuzzyDist = StringUtils.getFuzzyDistance(firstString, secondString, Locale.getDefault());
        double jaroDist = StringUtils.getJaroWinklerDistance(firstString, secondString);
        int levenDist = StringUtils.getLevenshteinDistance(firstString, secondString);

//        BurpExtender.print("============================================");
//        BurpExtender.print("Fuzzy Distance:" + fuzzyDist);
//        BurpExtender.print("        Jaro Winkler Distance:" + jaroDist);
//        BurpExtender.print("        Levenshtein Distance:" + levenDist);
//        BurpExtender.print("============================================");

        return jaroDist >= JARO_THRESHOLD || levenDist <= LEVENSHTEIN_THRESHOLD;
    }
 
Example 12
Source File: GeoNameResolver.java    From lucene-geo-gazetteer with Apache License 2.0 4 votes vote down vote up
/**
 * Select the best match for each location name extracted from a document,
 * choosing from among a list of lists of candidate matches. Filter uses the
 * following features: 1) edit distance between name and the resolved name,
 * choose smallest one 2) content (haven't implemented)
 *
 * @param resolvedEntities
 *            final result for the input stream
 * @param allCandidates
 *            each location name may hits several documents, this is the
 *            collection for all hitted documents
 * @param count
 * 			  Number of results for one locations
 * @throws IOException
 * @throws RuntimeException
 */

private void pickBestCandidates(
		HashMap<String, List<Location>> resolvedEntities,
		HashMap<String, List<Location>> allCandidates, int count) {

	for (String extractedName : allCandidates.keySet()) {

		List<Location> cur = allCandidates.get(extractedName);
		if(cur.isEmpty())
			continue;//continue if no results found

		int maxWeight = Integer.MIN_VALUE ;
		//In case weight is equal for all return top element
		int bestIndex = 0;
		//Priority queue to return top elements
		PriorityQueue<Location> pq = new PriorityQueue<>(cur.size(), new Comparator<Location>() {
			@Override
			public int compare(Location o1, Location o2) {
				return Integer.compare(o2.getWeight(), o1.getWeight());
			}
		});

		for (int i = 0; i < cur.size(); ++i) {
			int weight = 0;
			// get cur's ith resolved entry's name
			String resolvedName = String.format(" %s ", cur.get(i).getName());
			if (resolvedName.contains(String.format(" %s ", extractedName))) {
				// Assign a weight as per configuration if extracted name is found as a exact word in name
				weight = WEIGHT_NAME_MATCH;
			} else if (resolvedName.contains(extractedName)) {
				// Assign a weight as per configuration if extracted name is found partly in name
				weight = WEIGHT_NAME_PART_MATCH;
			}
			// get all alternate names of cur's ith resolved entry's
			String[] altNames = cur.get(i).getAlternateNames().split(",");
			float altEditDist = 0;
			for(String altName : altNames){
				if(altName.contains(extractedName)){
					altEditDist+=StringUtils.getLevenshteinDistance(extractedName, altName);
				}
			}
			//lesser the edit distance more should be the weight
			weight += getCalibratedWeight(altNames.length, altEditDist);

			//Give preference to sorted results. 0th result should have more priority
			weight += (cur.size()-i) * WEIGHT_SORT_ORDER;

			cur.get(i).setWeight(weight);

			if (weight > maxWeight) {
				maxWeight = weight;
				bestIndex = i;
			}

			pq.add(cur.get(i)) ;
		}
		if (bestIndex == -1)
			continue;

		List<Location> resultList = new ArrayList<>();

		for(int i =0 ; i< count && !pq.isEmpty() ; i++){
			resultList.add(pq.poll());
		}

		resolvedEntities.put(extractedName, resultList);
	}
}
 
Example 13
Source File: StringTools.java    From CogStack-Pipeline with Apache License 2.0 4 votes vote down vote up
public static int getLevenshteinDistance(String str1, String str2) {
    return StringUtils.getLevenshteinDistance(str1, str2);
}
 
Example 14
Source File: FuzzyMatch.java    From datacollector with Apache License 2.0 4 votes vote down vote up
private static int calculateLevenshteinDistance(String s1, String s2) {
  int distance = StringUtils.getLevenshteinDistance(s1, s2);
  double ratio = ((double) distance) / (Math.max(s1.length(), s2.length()));
  return 100 - (int)(ratio * 100);
}
 
Example 15
Source File: DistanceStringFilter.java    From Indra with MIT License 4 votes vote down vote up
@Override
public boolean matches(String t1, String t2) {
    return t1.length() >= this.threshold && StringUtils.getLevenshteinDistance(t1, t2) < min;
}
 
Example 16
Source File: LevenshteinRanker.java    From Stargraph with MIT License 4 votes vote down vote up
@Override
double computeStringDistance(CharSequence s1, CharSequence s2) {
    return StringUtils.getLevenshteinDistance(s1, s2);
}
 
Example 17
Source File: IngestHTRIntoAbbyyXML.java    From TranskribusCore with GNU General Public License v3.0 2 votes vote down vote up
private static void compareVersions() {
	 StringUtils.getLevenshteinDistance("fly", "ant");
	
}
 
Example 18
Source File: CandidateIEObject.java    From TableDisentangler with GNU General Public License v3.0 2 votes vote down vote up
/**
 * Calculate levenshtein. Calculation should be done with normalized pattern.
 *
 * @param pattern the pattern
 * @return the int
 */
public int calculateLevenshtein(String pattern)
{
	return StringUtils.getLevenshteinDistance(NormalizedPattern,pattern);
}