Java Code Examples for org.apache.commons.lang3.StringUtils#getLevenshteinDistance()

The following examples show how to use org.apache.commons.lang3.StringUtils#getLevenshteinDistance() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: Truck-Factor   File: AliasesIdentifier.java    License: MIT License 6 votes vote down vote up
private static Map<Developer, List<Developer>> findAliases(List<Developer> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<Developer> copyList =  new CopyOnWriteArrayList<Developer>(allDevelopers);
	Map<Developer, List<Developer>> aliases =  new HashMap<Developer, List<Developer>>();
	for (Developer developer1 : copyList) {
		copyList.remove (developer1);
		for (Developer developer2 : copyList) {
			if(developer1.getId()!=developer2.getId() && developer1.getName().length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1.getName()), convertToUTFLower(developer2.getName()));
				if (distance == -1){
					newDistance = developer1.getName().split(" ").length;
				}
				if (!developer1.getName().equals(developer2.getName()) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<Developer>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
Example 2
Source Project: Truck-Factor   File: NewAliasHandler.java    License: MIT License 6 votes vote down vote up
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<String> copyList =  new CopyOnWriteArrayList<String>(allDevelopers);
	Map<String, List<String>> aliases =  new HashMap<String, List<String>>();
	for (String developer1 : copyList) {
		copyList.remove (developer1);
		for (String developer2 : copyList) {
			if(developer1.length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2));
				if (distance == -1){
					newDistance = developer1.split(" ").length;
				}
				if (!developer1.equals(developer2) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<String>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
Example 3
Source Project: Truck-Factor   File: AliasHandler.java    License: MIT License 6 votes vote down vote up
private static Map<String, List<String>> findAliases(List<String> allDevelopers, int distance, int minSize) {
	int newDistance = distance;
	List<String> copyList =  new CopyOnWriteArrayList<String>(allDevelopers);
	Map<String, List<String>> aliases =  new HashMap<String, List<String>>();
	for (String developer1 : copyList) {
		copyList.remove (developer1);
		for (String developer2 : copyList) {
			if(developer1.length()>=minSize){
				int localDistance = StringUtils.getLevenshteinDistance(convertToUTFLower(developer1), convertToUTFLower(developer2));
				if (distance == -1){
					newDistance = developer1.split(" ").length;
				}
				if (!developer1.equals(developer2) && localDistance<=newDistance){
					if(!aliases.containsKey(developer1))
						aliases.put(developer1, new ArrayList<String>());
					aliases.get(developer1).add(developer2);
					copyList.remove(developer2);
				}
			}
		}
	}
	return aliases;
}
 
Example 4
private static String matches(Set<String> phoneContacts, String fbContact, int maxdistance){
	if (maxdistance == 0){
		if (phoneContacts.contains(fbContact)){
			return fbContact;
		}
		return null;
		//return phoneContacts.contains(fbContact);
	}
	int bestDistance = maxdistance;
	String bestMatch = null;
	for (String contact : phoneContacts){
		int distance = StringUtils.getLevenshteinDistance(contact != null ? contact.toLowerCase() : "", fbContact != null ? fbContact.toLowerCase() : "");
		if( distance <= bestDistance){
			//Log.i("FOUND MATCH", "Phone Contact: " + contact +" FB Contact: " + fbContact +" distance: " + distance + "max distance: " +maxdistance);
			bestMatch = contact;
			bestDistance = distance;
		}
	}
	return bestMatch;
}
 
Example 5
private double calcSimilarity(Invocation invocation , MockRequest request, String requestSerialized) throws SerializeException {
    String requestSerializedTarget;
    if (CollectionUtils.isNotEmpty(request.getModifiedInvocationIdentity()) &&
        request.getModifiedInvocationIdentity().contains(invocation.getIdentity())) {
        requestSerializedTarget = SerializerWrapper.hessianSerialize(invocation.getRequest(),request.getEvent().javaClassLoader);
    } else {
        requestSerializedTarget = invocation.getRequestSerialized();
    }
    int distance = StringUtils.getLevenshteinDistance(requestSerialized, requestSerializedTarget);
    return 1 - (double) distance / Math.max(requestSerialized.length(), requestSerializedTarget.length());
}
 
Example 6
/**
     * Testing if the responses of two requests are similar. This is the not the same as the same, rather there is a
     * threshold set in the static parameters of the class.
     * @param firstString
     * @param secondString
     * @return Test if similar
     */
    private static boolean testSimilar(String firstString, String secondString) {
//        int fuzzyDist = StringUtils.getFuzzyDistance(firstString, secondString, Locale.getDefault());
        double jaroDist = StringUtils.getJaroWinklerDistance(firstString, secondString);
        int levenDist = StringUtils.getLevenshteinDistance(firstString, secondString);

//        BurpExtender.print("============================================");
//        BurpExtender.print("Fuzzy Distance:" + fuzzyDist);
//        BurpExtender.print("        Jaro Winkler Distance:" + jaroDist);
//        BurpExtender.print("        Levenshtein Distance:" + levenDist);
//        BurpExtender.print("============================================");

        return jaroDist >= JARO_THRESHOLD || levenDist <= LEVENSHTEIN_THRESHOLD;
    }
 
Example 7
Source Project: FlareBot   File: GuildUtils.java    License: MIT License 5 votes vote down vote up
/**
 * Gets a {@link Role} that matches a string. Case doesn't matter.
 *
 * @param s       The String to get a role from
 * @param guildId The id of the {@link Guild} to get the role from
 * @param channel The channel to send an error message to if anything goes wrong.
 * @return null if the role doesn't, otherwise a list of roles matching the string
 */
public static Role getRole(String s, String guildId, TextChannel channel) {
    Guild guild = Getters.getGuildById(guildId);
    Role role = guild.getRoles().stream()
            .filter(r -> r.getName().equalsIgnoreCase(s))
            .findFirst().orElse(null);
    if (role != null) return role;
    try {
        role = guild.getRoleById(Long.parseLong(s.replaceAll("[^0-9]", "")));
        if (role != null) return role;
    } catch (NumberFormatException | NullPointerException ignored) {
    }
    if (channel != null) {
        if (guild.getRolesByName(s, true).isEmpty()) {
            String closest = null;
            int distance = LEVENSHTEIN_DISTANCE;
            for (Role role1 : guild.getRoles().stream().filter(role1 -> FlareBotManager.instance().getGuild(guildId).getSelfAssignRoles()
                    .contains(role1.getId())).collect(Collectors.toList())) {
                int currentDistance = StringUtils.getLevenshteinDistance(role1.getName(), s);
                if (currentDistance < distance) {
                    distance = currentDistance;
                    closest = role1.getName();
                }
            }
            MessageUtils.sendErrorMessage("That role does not exist! "
                    + (closest != null ? "Maybe you mean `" + closest + "`" : ""), channel);
            return null;
        } else {
            return guild.getRolesByName(s, true).get(0);
        }
    }
    return null;
}
 
Example 8
Source Project: 10000sentences   File: Importer.java    License: Apache License 2.0 5 votes vote down vote up
protected boolean sentenceOK(SentenceVO s) {
    String targ = s.getTargetSentence();
    String known = s.getKnownSentence();

    if (StringUtils.equals(targ, known)) {
        //System.out.printf("Same: %s <-> %s\n", targ, known);
        return false;
    }

    int tLen = targ.length();
    int kLen = known.length();
    if (StringUtils.getLevenshteinDistance(targ, known) < 0.2 * (tLen + kLen) / 2.) {
        //System.out.printf("Too similar: %s <-> %s\n", targ, known);
        return false;
    }

    if (tLen < 50 && kLen < 50) {
        return true;
    }

    if (Math.max(tLen, kLen) / Math.min(tLen, kLen) > 3) {
        //System.out.printf("Nope: %s <-> %s\n", known, targ);
        return false;
    }

    if (NUMBER_DELIMITER.matcher(targ).matches() || NUMBER_DELIMITER.matcher(known).matches()) {
        //System.out.printf("Has numbers: %s <-> %s\n", known, targ);
        return false;
    }

    return true;
}
 
Example 9
@Override
public Float call() throws Exception {
    //Instant startComputation = Instant.now();
    float result = StringUtils.getLevenshteinDistance(s1, s2);
    //Instant endComputation = Instant.now();
    //Duration duration = Duration.between(startComputation, endComputation);
    //System.out.println("length: " + s1.length() + "/" + s2.length() + ", duration: " + (duration.toNanos() / 1000000000) + " s, " + "result: " + result);
    //System.out.println("duration levenshtein: " + (duration.toNanos() / 1000000000) + " s");
    
    return result;
}
 
Example 10
static int levenshteinDistance(final String a, final String b) {
  if (!CommandLimits.isWithinMaximumCommandLength(a)) {
    throw new IllegalArgumentException("input is too big.");
  }
  if (!CommandLimits.isWithinMaximumCommandLength(b)) {
    throw new IllegalArgumentException("input is too big.");
  }
  return StringUtils.getLevenshteinDistance(a, b);
}
 
Example 11
/**
 * Generate a word, create a dictionary of permutations permutations that are created using random edit operations,
 * and check that Levenshtein automaton for that word finds permutations within its edit distance.
 *
 * @param minLength             The minimum lenth of the generated word.
 * @param maxLength             The maximum length of the generated word.
 * @param nPermutations         The number of permutations to generate.
 * @param nRandomEditOperations The maximum number of random edit operations.
 * @param distance              Test the levenshtein automaton with this edit distance.
 * @throws DictionaryBuilderException
 */
private void generateAndCheckPermutations(int minLength, int maxLength, int nPermutations, int nRandomEditOperations,
                                          int distance) throws DictionaryBuilderException {
    String str = randomString(minLength + (maxLength - minLength + 1));

    TreeSet<String> all = new TreeSet<>();
    Set<String> shouldHave = new HashSet<>();

    for (int i = 0; i < nPermutations; ++i) {
        int n = d_rng.nextInt(nRandomEditOperations);

        StringBuilder permutedBuilder = new StringBuilder(str);
        for (int perm = 0; perm < n; ++perm)
            d_editOperations[d_rng.nextInt(d_editOperations.length)].apply(permutedBuilder);

        String permuted = permutedBuilder.toString();

        all.add(permuted);

        if (StringUtils.getLevenshteinDistance(str, permuted) <= distance)
            shouldHave.add(permuted);
    }

    Dictionary dict = new DictionaryBuilder().addAll(all).build();
    LevenshteinAutomaton la = new LevenshteinAutomaton(str, distance);

    Assert.assertEquals(shouldHave, la.intersectionLanguage(dict));
}
 
Example 12
Source Project: Stargraph   File: LevenshteinRanker.java    License: MIT License 4 votes vote down vote up
@Override
double computeStringDistance(CharSequence s1, CharSequence s2) {
    return StringUtils.getLevenshteinDistance(s1, s2);
}
 
Example 13
Source Project: Indra   File: DistanceStringFilter.java    License: MIT License 4 votes vote down vote up
@Override
public boolean matches(String t1, String t2) {
    return t1.length() >= this.threshold && StringUtils.getLevenshteinDistance(t1, t2) < min;
}
 
Example 14
Source Project: CogStack-Pipeline   File: StringTools.java    License: Apache License 2.0 4 votes vote down vote up
public static int getLevenshteinDistance(String str1, String str2) {
    return StringUtils.getLevenshteinDistance(str1, str2);
}
 
Example 15
/**
 * Select the best match for each location name extracted from a document,
 * choosing from among a list of lists of candidate matches. Filter uses the
 * following features: 1) edit distance between name and the resolved name,
 * choose smallest one 2) content (haven't implemented)
 *
 * @param resolvedEntities
 *            final result for the input stream
 * @param allCandidates
 *            each location name may hits several documents, this is the
 *            collection for all hitted documents
 * @param count
 * 			  Number of results for one locations
 * @throws IOException
 * @throws RuntimeException
 */

private void pickBestCandidates(
		HashMap<String, List<Location>> resolvedEntities,
		HashMap<String, List<Location>> allCandidates, int count) {

	for (String extractedName : allCandidates.keySet()) {

		List<Location> cur = allCandidates.get(extractedName);
		if(cur.isEmpty())
			continue;//continue if no results found

		int maxWeight = Integer.MIN_VALUE ;
		//In case weight is equal for all return top element
		int bestIndex = 0;
		//Priority queue to return top elements
		PriorityQueue<Location> pq = new PriorityQueue<>(cur.size(), new Comparator<Location>() {
			@Override
			public int compare(Location o1, Location o2) {
				return Integer.compare(o2.getWeight(), o1.getWeight());
			}
		});

		for (int i = 0; i < cur.size(); ++i) {
			int weight = 0;
			// get cur's ith resolved entry's name
			String resolvedName = String.format(" %s ", cur.get(i).getName());
			if (resolvedName.contains(String.format(" %s ", extractedName))) {
				// Assign a weight as per configuration if extracted name is found as a exact word in name
				weight = WEIGHT_NAME_MATCH;
			} else if (resolvedName.contains(extractedName)) {
				// Assign a weight as per configuration if extracted name is found partly in name
				weight = WEIGHT_NAME_PART_MATCH;
			}
			// get all alternate names of cur's ith resolved entry's
			String[] altNames = cur.get(i).getAlternateNames().split(",");
			float altEditDist = 0;
			for(String altName : altNames){
				if(altName.contains(extractedName)){
					altEditDist+=StringUtils.getLevenshteinDistance(extractedName, altName);
				}
			}
			//lesser the edit distance more should be the weight
			weight += getCalibratedWeight(altNames.length, altEditDist);

			//Give preference to sorted results. 0th result should have more priority
			weight += (cur.size()-i) * WEIGHT_SORT_ORDER;

			cur.get(i).setWeight(weight);

			if (weight > maxWeight) {
				maxWeight = weight;
				bestIndex = i;
			}

			pq.add(cur.get(i)) ;
		}
		if (bestIndex == -1)
			continue;

		List<Location> resultList = new ArrayList<>();

		for(int i =0 ; i< count && !pq.isEmpty() ; i++){
			resultList.add(pq.poll());
		}

		resolvedEntities.put(extractedName, resultList);
	}
}
 
Example 16
Source Project: datacollector   File: FuzzyMatch.java    License: Apache License 2.0 4 votes vote down vote up
private static int calculateLevenshteinDistance(String s1, String s2) {
  int distance = StringUtils.getLevenshteinDistance(s1, s2);
  double ratio = ((double) distance) / (Math.max(s1.length(), s2.length()));
  return 100 - (int)(ratio * 100);
}
 
Example 17
private static void compareVersions() {
	 StringUtils.getLevenshteinDistance("fly", "ant");
	
}
 
Example 18
/**
 * Calculate levenshtein. Calculation should be done with normalized pattern.
 *
 * @param pattern the pattern
 * @return the int
 */
public int calculateLevenshtein(String pattern)
{
	return StringUtils.getLevenshteinDistance(NormalizedPattern,pattern);
}