Java Code Examples for java.text.Normalizer#normalize()

The following examples show how to use java.text.Normalizer#normalize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParsedIRI.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private String normalizePctEncoding(String encoded) {
	int cidx = Arrays.binarySearch(common_pct, encoded);
	if (cidx >= 0 && isMember(unreserved, common[cidx])) {
		return new String(Character.toChars(common[cidx])); // quickly decode unreserved encodings
	} else if (cidx >= 0) {
		return encoded; // pass through reserved encodings
	}
	String decoded = pctDecode(encoded);
	String ns = Normalizer.normalize(decoded, Normalizer.Form.NFC);
	StringBuilder sb = new StringBuilder(ns.length());
	for (int c = 0, n = ns.codePointCount(0, ns.length()); c < n; c++) {
		int chr = ns.codePointAt(ns.offsetByCodePoints(0, c));
		if (isMember(unreserved, chr)) {
			sb.appendCodePoint(chr);
		} else if (n == 1) {
			return toUpperCase(encoded);
		} else {
			sb.append(pctEncode(chr));
		}
	}
	return sb.toString();
}
 
Example 2
Source File: PubAnnotationConvertUtil.java    From bioasq with Apache License 2.0 6 votes vote down vote up
static String normalizeText(String text) {
    String ret = new String(text);
    // replace spaces, double-quotes, percentage, ® with spaces
    ret = ret.replaceAll("[\\s\"%®]", " ");
    // replace vbar with 'I' for "Deiodinase type || (DIO2)"
    ret = ret.replaceAll("\\|", "I");
    // replace multiplication mark '×' with 'x'
    ret = ret.replaceAll("×", "x");
    // sharp-s to beta
    ret = ret.replaceAll("ß", "β");
    // replace '·' with '.'
    ret = ret.replaceAll("·", ".");
    // remove '±' with '+'
    ret = ret.replaceAll("±", "+");
    // remove ending whitespaces
    ret = ret.replaceAll("\\s+$", "");
    // remove non ASCII characters
//    ret = nonAscii.replaceFrom(ret, ' ');
    // replace diacritical marks plus symbols that look alike, see
    // http://stackoverflow.com/questions/20674577/how-to-compare-unicode-characters-that-look-alike
    ret = Normalizer.normalize(ret, Normalizer.Form.NFKD);
    ret = diacriticalMarksPattern.matcher(ret).replaceAll("");
    return ret;
  }
 
Example 3
Source File: MCRPath.java    From mycore with GNU General Public License v3.0 6 votes vote down vote up
/**
 * removes redundant slashes and checks for invalid characters
 * @param uncleanPath path to check
 * @return normalized path
 * @throws InvalidPathException if <code>uncleanPath</code> contains invalid characters
 */
static String normalizeAndCheck(final String uncleanPath) {
    String unicodeNormalizedUncleanPath = Normalizer.normalize(uncleanPath, Normalizer.Form.NFC);

    char prevChar = 0;
    final boolean afterSeparator = false;
    for (int i = 0; i < unicodeNormalizedUncleanPath.length(); i++) {
        final char c = unicodeNormalizedUncleanPath.charAt(i);
        checkCharacter(unicodeNormalizedUncleanPath, c, afterSeparator);
        if (c == SEPARATOR && prevChar == SEPARATOR) {
            return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(), i - 1);
        }
        prevChar = c;
    }
    if (prevChar == SEPARATOR) {
        //remove final slash
        return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(),
            unicodeNormalizedUncleanPath.length() - 1);
    }
    return unicodeNormalizedUncleanPath;
}
 
Example 4
Source File: StringUtil.java    From Kepler with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * Filter input.
 *
 * @param input the input
 * @param filerNewline if new lines (ENTER) should be filtered
 * @return the string
 */
public static String filterInput(String input, boolean filerNewline) {
    input = input.replace((char)1, ' ');
    input = input.replace((char)2, ' ');
    input = input.replace((char)9, ' ');
    input = input.replace((char)10, ' ');
    input = input.replace((char)12, ' ');

    if (filerNewline) {
        input = input.replace((char)13, ' ');
    }

    if (GameConfiguration.getInstance().getBoolean("normalise.input.strings")) {
        input = Normalizer.normalize(input, Normalizer.Form.NFD);
    }
    
    return input;
}
 
Example 5
Source File: SlugUtil.java    From mapr-music with Apache License 2.0 5 votes vote down vote up
/**
 * Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
 * URLs.
 *
 * @param input string, which will be converted.
 * @return slug representation of string, which can be used to generate readable and SEO-friendly
 * URLs.
 */
public static String toSlug(String input) {

    String transliterated = transliterator.transform(input);
    String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
    String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    slug = EDGESDHASHES.matcher(slug).replaceAll("");

    return slug.toLowerCase(Locale.ENGLISH);
}
 
Example 6
Source File: KeyCharacterMap.java    From android_9.0.0_r45 with Apache License 2.0 5 votes vote down vote up
/**
 * Get the character that is produced by combining the dead key producing accent
 * with the key producing character c.
 * For example, getDeadChar('`', 'e') returns &egrave;.
 * getDeadChar('^', ' ') returns '^' and getDeadChar('^', '^') returns '^'.
 *
 * @param accent The accent character.  eg. '`'
 * @param c The basic character.
 * @return The combined character, or 0 if the characters cannot be combined.
 */
public static int getDeadChar(int accent, int c) {
    if (c == accent || CHAR_SPACE == c) {
        // The same dead character typed twice or a dead character followed by a
        // space should both produce the non-combining version of the combining char.
        // In this case we don't even need to compute the combining character.
        return accent;
    }

    int combining = sAccentToCombining.get(accent);
    if (combining == 0) {
        return 0;
    }

    final int combination = (combining << 16) | c;
    int combined;
    synchronized (sDeadKeyCache) {
        combined = sDeadKeyCache.get(combination, -1);
        if (combined == -1) {
            sDeadKeyBuilder.setLength(0);
            sDeadKeyBuilder.append((char)c);
            sDeadKeyBuilder.append((char)combining);
            String result = Normalizer.normalize(sDeadKeyBuilder, Normalizer.Form.NFC);
            combined = result.codePointCount(0, result.length()) == 1
                    ? result.codePointAt(0) : 0;
            sDeadKeyCache.put(combination, combined);
        }
    }
    return combined;
}
 
Example 7
Source File: Bip39.java    From AndroidWallet with GNU General Public License v3.0 5 votes vote down vote up
@SuppressWarnings("NewApi")
private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) {
   _bip39RawEntropy = bip39RawEntropy;
   _bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD);
   _bip32MasterSeed = bip32MasterSeed;
   _wordListType = ENGLISH_WORD_LIST_TYPE;
}
 
Example 8
Source File: RegularFileObject.java    From openjdk-8 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) {
    cn.getClass();
    // null check
    if (kind == Kind.OTHER && getKind() != kind) {
        return false;
    }
    String n = cn + kind.extension;
    if (name.equals(n)) {
        return true;
    }
    if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD)
        && Normalizer.isNormalized(n, Normalizer.Form.NFC)) {
        // On Mac OS X it is quite possible to file name and class
        // name normalized in a different way - in that case we have to normalize file name
        // to the Normal Form Compised (NFC)
        String normName = Normalizer.normalize(name, Normalizer.Form.NFC);
        if (normName.equals(n)) {
            this.name = normName;
            return true;
        }
    }

        if (name.equalsIgnoreCase(n)) {
        try {
            // allow for Windows
            return file.getCanonicalFile().getName().equals(n);
        } catch (IOException e) {
        }
    }
    return false;
}
 
Example 9
Source File: SmsMsg.java    From XposedSmsCode with GNU General Public License v3.0 5 votes vote down vote up
public static SmsMsg fromIntent(Intent intent) {
    SmsMessage[] smsMessageParts = SmsMessageUtils.fromIntent(intent);
    String sender = smsMessageParts[0].getDisplayOriginatingAddress();
    String body = SmsMessageUtils.getMessageBody(smsMessageParts);

    sender = Normalizer.normalize(sender, Normalizer.Form.NFC);
    body = Normalizer.normalize(body, Normalizer.Form.NFC);

    SmsMsg message = new SmsMsg();
    message.setSender(sender).setBody(body);
    return message;
}
 
Example 10
Source File: PinHasher.java    From mollyim-android with GNU General Public License v3.0 5 votes vote down vote up
public static byte[] normalize(String pin) {
  pin = pin.trim();

  if (PinString.allNumeric(pin)) {
    pin = PinString.toArabic(pin);
  }

  pin = Normalizer.normalize(pin, Normalizer.Form.NFKD);

  return pin.getBytes(StandardCharsets.UTF_8);
}
 
Example 11
Source File: BowlerStudioMenu.java    From BowlerStudio with GNU General Public License v3.0 5 votes vote down vote up
public static String slugify(String input) {
	String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("").replace('-', '_');
    
    return slug;
}
 
Example 12
Source File: ReScuePattern.java    From ReScue with GNU General Public License v2.0 5 votes vote down vote up
/**
 * The pattern is converted to normalizedD form and then a pure group
 * is constructed to match canonical equivalences of the characters.
 */
private void normalize() {
    boolean inCharClass = false;
    int lastCodePoint = -1;

    // Convert pattern into normalizedD form
    normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
    patternLength = normalizedPattern.length();

    // Modify pattern to match canonical equivalences
    StringBuilder newPattern = new StringBuilder(patternLength);
    for(int i=0; i<patternLength; ) {
        int c = normalizedPattern.codePointAt(i);
        StringBuilder sequenceBuffer;
        if ((Character.getType(c) == Character.NON_SPACING_MARK)
            && (lastCodePoint != -1)) {
            sequenceBuffer = new StringBuilder();
            sequenceBuffer.appendCodePoint(lastCodePoint);
            sequenceBuffer.appendCodePoint(c);
            while(Character.getType(c) == Character.NON_SPACING_MARK) {
                i += Character.charCount(c);
                if (i >= patternLength)
                    break;
                c = normalizedPattern.codePointAt(i);
                sequenceBuffer.appendCodePoint(c);
            }
            String ea = produceEquivalentAlternation(
                                           sequenceBuffer.toString());
            newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
            newPattern.append("(?:").append(ea).append(")");
        } else if (c == '[' && lastCodePoint != '\\') {
            i = normalizeCharClass(newPattern, i);
        } else {
            newPattern.appendCodePoint(c);
        }
        lastCodePoint = c;
        i += Character.charCount(c);
    }
    normalizedPattern = newPattern.toString();
}
 
Example 13
Source File: ReScuePattern.java    From ReScue with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Attempts to compose input by combining the first character
 * with the first combining mark following it. Returns a String
 * that is the composition of the leading character with its first
 * combining mark followed by the remaining combining marks. Returns
 * null if the first two characters cannot be further composed.
 */
private String composeOneStep(String input) {
    int len = countChars(input, 0, 2);
    String firstTwoCharacters = input.substring(0, len);
    String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);

    if (result.equals(firstTwoCharacters))
        return null;
    else {
        String remainder = input.substring(len);
        return result + remainder;
    }
}
 
Example 14
Source File: Frag.java    From prayer-times-android with Apache License 2.0 4 votes vote down vote up
private static String normalize(CharSequence str) {
    String string = Normalizer.normalize(str, Normalizer.Form.NFD);
    string = string.replaceAll("[^\\p{ASCII}]", "_");
    return string.toLowerCase(Locale.ENGLISH);
}
 
Example 15
Source File: Utils.java    From javaee8-jaxrs-sample with GNU General Public License v3.0 4 votes vote down vote up
public static String slugify(String input) {
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
 
Example 16
Source File: RedisSentinelURI.java    From redis-rdb-cli with Apache License 2.0 4 votes vote down vote up
private static ByteBuffer normalize(String s) {
    String v = Normalizer.normalize(s, Normalizer.Form.NFC);
    return StandardCharsets.UTF_8.encode(CharBuffer.wrap(v));
}
 
Example 17
Source File: LegacyAssignmentPolicy.java    From marklogic-contentpump with Apache License 2.0 4 votes vote down vote up
protected static String normalize(String uri) {
    return Normalizer.normalize(uri, Normalizer.Form.NFC);
}
 
Example 18
Source File: LauncherHelper.java    From Bytecoder with Apache License 2.0 4 votes vote down vote up
/**
 * Returns the main class for a module. The query is either a module name
 * or module-name/main-class. For the former then the module's main class
 * is obtained from the module descriptor (MainClass attribute).
 */
private static Class<?> loadModuleMainClass(String what) {
    int i = what.indexOf('/');
    String mainModule;
    String mainClass;
    if (i == -1) {
        mainModule = what;
        mainClass = null;
    } else {
        mainModule = what.substring(0, i);
        mainClass = what.substring(i+1);
    }

    // main module is in the boot layer
    ModuleLayer layer = ModuleLayer.boot();
    Optional<Module> om = layer.findModule(mainModule);
    if (!om.isPresent()) {
        // should not happen
        throw new InternalError("Module " + mainModule + " not in boot Layer");
    }
    Module m = om.get();

    // get main class
    if (mainClass == null) {
        Optional<String> omc = m.getDescriptor().mainClass();
        if (!omc.isPresent()) {
            abort(null, "java.launcher.module.error1", mainModule);
        }
        mainClass = omc.get();
    }

    // load the class from the module
    Class<?> c = null;
    try {
        c = Class.forName(m, mainClass);
        if (c == null && System.getProperty("os.name", "").contains("OS X")
                && Normalizer.isNormalized(mainClass, Normalizer.Form.NFD)) {

            String cn = Normalizer.normalize(mainClass, Normalizer.Form.NFC);
            c = Class.forName(m, cn);
        }
    } catch (LinkageError le) {
        abort(null, "java.launcher.module.error3", mainClass, m.getName(),
                le.getClass().getName() + ": " + le.getLocalizedMessage());
    }
    if (c == null) {
        abort(null, "java.launcher.module.error2", mainClass, mainModule);
    }

    System.setProperty("jdk.module.main.class", c.getName());
    return c;
}
 
Example 19
Source File: DomainServiceImpl.java    From graviteeio-access-management with Apache License 2.0 4 votes vote down vote up
private String generateContextPath(String domainName) {
    String nfdNormalizedString = Normalizer.normalize(domainName, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    domainName = pattern.matcher(nfdNormalizedString).replaceAll("");
    return domainName.toLowerCase().trim().replaceAll("\\s{1,}", "-");
}
 
Example 20
Source File: MCRXMLFunctions.java    From mycore with GNU General Public License v3.0 2 votes vote down vote up
/**
 * returns the given String in unicode NFC normal form.
 *
 * @param arg0 String to be normalized
 * @see Normalizer#normalize(CharSequence, java.text.Normalizer.Form)
 */
public static String normalizeUnicode(String arg0) {
    return Normalizer.normalize(arg0, Normalizer.Form.NFC);
}