Java Code Examples for java.text.Normalizer.Form

The following examples show how to use java.text.Normalizer.Form. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: jdk8u60   Source File: CDataTransferer.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example 2
Source Project: mycore   Source File: MCRTextNormalizer.java    License: GNU General Public License v3.0 6 votes vote down vote up
public static String normalizeText(String text) {
    String normalizedText = text.toLowerCase(Locale.getDefault());
    normalizedText = new MCRHyphenNormalizer().normalize(normalizedText).replace("-", " ");
    //canonical decomposition, remove accents
    normalizedText = Normalizer.normalize(normalizedText, Form.NFD).replaceAll("\\p{M}", "");
    normalizedText = normalizedText.replace("ue", "u")
        .replace("oe", "o").replace("ae", "a")
        .replace("ß", "s").replace("ss", "s");
    //remove all non-alphabetic characters
    normalizedText = normalizedText.replaceAll("[^a-z0-9]\\s]", "");
    // remove all words with fewer than four characters
    // normalizedText = normalizedText.replaceAll("\\b.{1,3}\\b", " ").trim();
    normalizedText = normalizedText.replaceAll("\\p{Punct}", " ").trim(); // remove all punctuation
    normalizedText = normalizedText.replaceAll("\\s+", " "); // normalize whitespace
    return normalizedText;
}
 
Example 3
Source Project: mycore   Source File: MCRUtils.java    License: GNU General Public License v3.0 6 votes vote down vote up
private static String getHash(int iterations, byte[] salt, String text, String algorithm)
    throws NoSuchAlgorithmException {
    MessageDigest digest;
    if (--iterations < 0) {
        iterations = 0;
    }
    byte[] data;
    digest = MessageDigest.getInstance(algorithm);
    text = Normalizer.normalize(text, Form.NFC);
    if (salt != null) {
        digest.update(salt);
    }
    data = digest.digest(text.getBytes(StandardCharsets.UTF_8));
    for (int i = 0; i < iterations; i++) {
        data = digest.digest(data);
    }
    return toHexString(data);
}
 
Example 4
Source Project: testarea-pdfbox2   Source File: TextSection.java    License: Apache License 2.0 6 votes vote down vote up
String toString(List<List<TextPosition>> words)
{
    StringBuilder stringBuilder = new StringBuilder();
    boolean first = true;
    for (List<TextPosition> word : words)
    {
        if (first)
            first = false;
        else
            stringBuilder.append(' ');
        for (TextPosition textPosition : word)
        {
            stringBuilder.append(textPosition.getUnicode());
        }
    }
    // cf. http://stackoverflow.com/a/7171932/1729265
    return Normalizer.normalize(stringBuilder, Form.NFKC);
}
 
Example 5
Source Project: packagedrone   Source File: Users.java    License: Eclipse Public License 1.0 6 votes vote down vote up
public static String hashIt ( final String salt, String data )
{
    data = Normalizer.normalize ( data, Form.NFC );

    final byte[] strData = data.getBytes ( StandardCharsets.UTF_8 );
    final byte[] saltData = salt.getBytes ( StandardCharsets.UTF_8 );

    final byte[] first = new byte[saltData.length + strData.length];
    System.arraycopy ( saltData, 0, first, 0, saltData.length );
    System.arraycopy ( strData, 0, first, saltData.length, strData.length );

    final MessageDigest md = createDigest ();

    byte[] digest = md.digest ( first );
    final byte[] current = new byte[saltData.length + digest.length];

    for ( int i = 0; i < 1000; i++ )
    {
        System.arraycopy ( saltData, 0, current, 0, saltData.length );
        System.arraycopy ( digest, 0, current, saltData.length, digest.length );

        digest = md.digest ( current );
    }

    return Base64.getEncoder ().encodeToString ( digest );
}
 
Example 6
Source Project: jdk8u-jdk   Source File: CDataTransferer.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example 7
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example 8
Source Project: voj   Source File: SlugifyUtils.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * 获取字符串的Slug.
 * @param str - 待获取Slug的字符串
 * @return 字符串对应的Slug
 */
public static String getSlug(String str) {
	if ( str == null ) {
		return "";
	}
	
	// Rid of White Spaces
	String noWhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-");
	// Processing Non-ASCII Characters
	try {
		noWhiteSpace = URLEncoder.encode(noWhiteSpace, "UTF-8");
	} catch (UnsupportedEncodingException e) {
		// Never reach here
	}
	// Slugify String
	String normalized = Normalizer.normalize(noWhiteSpace, Form.NFD);
	
	return normalized.toLowerCase();
}
 
Example 9
/**
 * <p>Normalize input using specified normalization form.</p>
 * 
 * <p>Following normalization forms are supported:
 * <ul>
 *   <li>NFD: canonical Unicode decomposition</li>
 *   <li>NFC: canonical Unicode decomposition followed by canonical composition</li>
 *   <li>NFKD: compatibility decomposition</li>
 *   <li>NFKC: compatibility decomposition followed by canonical composition</li>
 * </ul>
 * </p>
 * <p>Function gracefully handles null input - null is simply passed through.</p>
 * 
 * @param context function call context.
 * @param input input string to normalize. May be null.
 * @param form specifies algorithm to use. Algorithm name is case insensitive. Cannot be null.
 * 
 * @return normalized input string or null if input is also null.
 * 
 * @see Normalizer#normalize(CharSequence, Form)
 */
@TLFunctionAnnotation("Perform Unicode normalization of given string.")
@CTL2FunctionDeclaration(impl = UnicodeNormalizeFunction.class)
public static final String unicodeNormalize(TLFunctionCallContext context, String input, String form) {
	
	if (form == null) {
		throw new NullPointerException("Null form is not allowed.");
	}
	
	Form normalizerForm;
	try {
		normalizerForm = Form.valueOf(form.toUpperCase());
	} catch (IllegalArgumentException iae) {
		throw new IllegalArgumentException("Unsupported normalization form '" + form + "'.", iae);
	}
	
	if (input == null) {
		return null;
	}

	return Normalizer.normalize(input, normalizerForm);
}
 
Example 10
/**
 * <p>Determine if input string is Unicode normalized according to the given form.</p>
 * 
 * <p>Following normalization forms are supported:
 * <ul>
 *   <li>NFD: canonical Unicode decomposition</li>
 *   <li>NFC: canonical Unicode decomposition followed by canonical composition</li>
 *   <li>NFKD: compatibility decomposition</li>
 *   <li>NFKC: compatibility decomposition followed by canonical composition</li>
 * </ul>
 * </p>
 * <p>Function gracefully handles null input - null is simply passed through.</p>
 * 
 * @param context function call context.
 * @param input input string to normalize. May be null.
 * @param form specifies algorithm to use. Algorithm name is case insensitive. Cannot be null.
 * 
 * @return true if input is normalized with respect to the selected form of if input is null. False is returned otherwise.
 * 
 * @see Normalizer#isNormalized(CharSequence, Form)
 */
@TLFunctionAnnotation("Determine if given string is Unicode normalized.")
@CTL2FunctionDeclaration(impl = IsUnicodeNormalizedFunction.class)
public static final boolean isUnicodeNormalized(TLFunctionCallContext context, String input, String form) {

	if (form == null) {
		throw new NullPointerException("Null form is not allowed.");
	}
	
	Form normalizerForm;
	try {
		normalizerForm = Form.valueOf(form.toUpperCase());
	} catch (IllegalArgumentException iae) {
		throw new IllegalArgumentException("Unsupported normalization form '" + form + "'.", iae);
	}
	
	if (input == null) {
		return true;
	}
	
	return Normalizer.isNormalized(input, normalizerForm);
}
 
Example 11
Source Project: metafacture-core   Source File: PicaEncoder.java    License: Apache License 2.0 6 votes vote down vote up
@Override
    public void literal(final String name, final String value) {
        //A Subfield has one character or digit exactly.
        if (name.length() != 1) {
            throw new FormatException(name);
        }
        if (!entityOpen) {
            throw new FormatException(name); //new exceptions definition for literal out of entity
        }
        final String valueNew = Normalizer.normalize(value, Form.NFD);
        if (idnControlSubField) {
            // it is a [email protected] field, the same record id delivered with record should follow
            if (!this.id.equals(value)) {
                throw new MissingIdException(value);
            }
            idnControlSubField = false; //only one record Id will be checked.
        }
        builder.append(SUB_DELIMITER);
        builder.append(name);
        builder.append(valueNew);
}
 
Example 12
Source Project: FHIR   Source File: SearchUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Normalizes a string to be used as a search parameter value. All accents and
 * diacritics are removed. And then the
 * string is transformed to lower case.
 *
 * @param value
 * @return
 */
public static String normalizeForSearch(String value) {

    String normalizedValue = null;
    if (value != null) {
        normalizedValue = Normalizer.normalize(value, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        normalizedValue = normalizedValue.toLowerCase();
    }

    return normalizedValue;
}
 
Example 13
Source Project: CrossMobile   Source File: NSString.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Compares the given Strings using the specified options.
 *
 * @param from                   The first String to be compared.
 * @param with                   The second String to be compared.
 * @param NSStringCompareOptions The option for searching the Strings.
 * @return The result of comparing the two Strings.
 * @see crossmobile.ios.foundation.NSOrdered
 */
@CMSelector(value = "- (NSComparisonResult)compare:(NSString *)aString options:(NSStringCompareOptions)mask", staticMapping = true)
public static int compare(String from, String with, int NSStringCompareOptions) {
    if (with == null && from == null)
        return NSOrdered.Same;
    if (with == null)
        return NSOrdered.Descending;
    if (from == null)
        return NSOrdered.Ascending;
    int order;

    if ((NSStringCompareOptions & crossmobile.ios.foundation.NSStringCompareOptions.NSNumericSearch) != 0) {
        double fromD = stringToRelaxedDouble(from);
        double withD = stringToRelaxedDouble(with);
        order = fromD == withD ? 0 : (fromD < withD ? -1 : 1);
    } else {
        if ((NSStringCompareOptions & crossmobile.ios.foundation.NSStringCompareOptions.NSCaseInsensitiveSearch) != 0) {
            from = from.toLowerCase();
            with = with.toLowerCase();
        }
        if ((NSStringCompareOptions & crossmobile.ios.foundation.NSStringCompareOptions.NSDiacriticInsensitiveSearch) != 0) {
            from = Normalizer.normalize(from, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
            with = Normalizer.normalize(with, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        }
        order = from.compareTo(with);
    }
    return order < 0 ? NSOrdered.Ascending : (order > 0 ? NSOrdered.Descending : NSOrdered.Same);
}
 
Example 14
Source Project: yago3   Source File: PersonNameExtractor.java    License: GNU General Public License v3.0 5 votes vote down vote up
private void writeNormalized(String entity, String name, String source) throws IOException {
  String normalizedName = Normalizer.normalize(name, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
  if (!normalizedName.equals(name)) {
    write(PERSONNAMEHEURISTICS, new Fact(entity, RDFS.label, FactComponent.forStringWithLanguage(normalizedName, "eng")), PERSONNAMESOURCES, source,
        "PersonNameExtractor_normalized");
  }
}
 
Example 15
Source Project: ripme   Source File: JabArchivesRipper.java    License: MIT License 5 votes vote down vote up
protected String getSlug(String input) {
    // Get a URL/file-safe version of a string
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
 
Example 16
Source Project: openjdk-jdk9   Source File: CDataTransferer.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                             long format, Transferable transferable) throws IOException {

    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
        String charset = Charset.defaultCharset().name();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor), StandardCharsets.UTF_8);
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }

        String xml = new String(bytes, charset);
        // macosx pasteboard returns a property list that consists of one URL
        // let's extract it.
        return new URL(extractURL(xml));
    }

    if(isUriListFlavor(flavor) && format == CF_FILE) {
        // dragQueryFile works fine with files and url,
        // it parses and extracts values from property list.
        // maxosx always returns property list for
        // CF_URL and CF_FILE
        String[] strings = dragQueryFile(bytes);
        if(strings == null) {
            return null;
        }
        bytes = String.join(System.getProperty("line.separator"),
                strings).getBytes();
        // now we extracted uri from xml, now we should treat it as
        // regular string that allows to translate data to target represantation
        // class by base method
        format = CF_STRING;
    } else if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }

    return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example 17
Source Project: sunbird-lms-service   Source File: Slug.java    License: MIT License 5 votes vote down vote up
public static String makeSlug(String input, boolean transliterate) {
  String origInput = input;
  String tempInputValue = "";
  // Validate the input
  if (input == null) {
    ProjectLogger.log("Provided input value is null");
    return input;
  }
  // Remove extra spaces
  tempInputValue = input.trim();
  // Remove URL encoding
  tempInputValue = urlDecode(tempInputValue);
  // If transliterate is required
  if (transliterate) {
    // Tranlisterate & cleanup
    String transliterated = transliterate(tempInputValue);
    tempInputValue = transliterated;
  }
  // Replace all whitespace with dashes
  tempInputValue = WHITESPACE.matcher(tempInputValue).replaceAll("-");
  // Remove all accent chars
  tempInputValue = Normalizer.normalize(tempInputValue, Form.NFD);
  // Remove all non-latin special characters
  tempInputValue = NONLATIN.matcher(tempInputValue).replaceAll("");
  // Remove any consecutive dashes
  tempInputValue = normalizeDashes(tempInputValue);
  // Validate before returning
  validateResult(tempInputValue, origInput);
  // Slug is always lowercase
  return tempInputValue.toLowerCase(Locale.ENGLISH);
}
 
Example 18
protected Object translateBytesOrStream(InputStream stream, byte[] bytes, DataFlavor flavor, long format,
                                    Transferable transferable) throws IOException
{
    // 5-28-03 VL: [Radar 3266030]
    // We need to do like Windows does here.
    if (format == CF_HTML && flavor.isFlavorTextType()) {
        if (stream == null) {
            stream = new ByteArrayInputStream(bytes);
            bytes = null;
        }

        stream = new HTMLDecodingInputStream(stream);
    }

    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
    {
        if (bytes == null) {
            bytes = inputStreamToByteArray(stream);
            stream = null;
        }

        String charset = getDefaultTextCharset();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }

        return new URL(new String(bytes, charset));
    }

    if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }

    return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example 19
Source Project: mycore   Source File: MCRNameMerger.java    License: GNU General Public License v3.0 5 votes vote down vote up
private String normalize(String nameFragment) {
    String text = nameFragment.toLowerCase(Locale.getDefault());
    text = new MCRHyphenNormalizer().normalize(text).replace("-", " ");
    // canonical decomposition, then remove accents
    text = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{M}", "");
    text = text.replace("ue", "u").replace("oe", "o").replace("ae", "a").replace("ß", "s").replace("ss", "s");
    text = text.replaceAll("[^a-z0-9]\\s]", ""); //remove all non-alphabetic characters
    text = text.replaceAll("\\p{Punct}", " ").trim(); // remove all punctuation
    text = text.replaceAll("\\s+", " "); // normalize whitespace
    return text.trim();
}
 
Example 20
Source Project: zest-writer   Source File: ZdsHttp.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Transform any string on slug. Just alphanumeric, dash or underscore characters.
 * @param input string to convert on slug
 * @return slug string
 */
public static String toSlug(String input) {
    String nowhitespace = Constant.WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = Constant.NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
 
Example 21
Source Project: openjdk-8   Source File: CDataTransferer.java    License: GNU General Public License v2.0 5 votes vote down vote up
protected Object translateBytesOrStream(InputStream stream, byte[] bytes, DataFlavor flavor, long format,
                                    Transferable transferable) throws IOException
{
    // 5-28-03 VL: [Radar 3266030]
    // We need to do like Windows does here.
    if (format == CF_HTML && flavor.isFlavorTextType()) {
        if (stream == null) {
            stream = new ByteArrayInputStream(bytes);
            bytes = null;
        }

        stream = new HTMLDecodingInputStream(stream);
    }

    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
    {
        if (bytes == null) {
            bytes = inputStreamToByteArray(stream);
            stream = null;
        }

        String charset = getDefaultTextCharset();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }

        return new URL(new String(bytes, charset));
    }

    if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }

    return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example 22
public static String slugify(String input) {
	String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("").replace('-', '_');
    
    return slug;
}
 
Example 23
Source Project: JavaSCR   Source File: ValidateString.java    License: MIT License 5 votes vote down vote up
private static String NormalizeThenValidate(String input) {
	// Normalize
	String s = Normalizer.normalize(input, Form.NFKC);

	// Validate
	Pattern pattern = Pattern.compile("[<>]"); // Check for angle brackets
	Matcher matcher = pattern.matcher(s);
	if (matcher.find()) {
		// Found black listed tag
		throw new IllegalStateException();
	}
	System.out.println("valid input");

	return s;
}
 
Example 24
Source Project: JavaSCR   Source File: TagFilter.java    License: MIT License 5 votes vote down vote up
private static String filterString(String str) {
	String s = Normalizer.normalize(str, Form.NFKC);

	// Replaces all noncharacter code points with Unicode U+FFFD
	s = s.replaceAll("[\\p{Cn}]", "\uFFFD");

	// Validate input
	Pattern pattern = Pattern.compile("<script>");
	Matcher matcher = pattern.matcher(s);
	if (matcher.find()) {
		throw new IllegalArgumentException("Invalid input");
	}
	return s;
}
 
Example 25
Source Project: tech-gallery   Source File: TechGalleryUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Method to slugify a name.
 *
 * @param name name to be changed.
 * @return Changed name.
 */
public static String slugify(String name) {
  String nowhitespace = WHITESPACE.matcher(name).replaceAll("_");
  String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
  String slug = NONLATIN.matcher(normalized).replaceAll("");
  return slug.toLowerCase(Locale.ENGLISH);
}
 
Example 26
Source Project: termsuite-core   Source File: StringUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static String replaceAccents(String string) {
	String withoutAccent = Normalizer
			.normalize(string, Form.NFD)
			.replaceAll(ASCII_REPLACEMENT, EMPTY_STRING);

	//FIXME accent removal fails for russian. This is a quick fix
	if(withoutAccent.isEmpty() && !string.isEmpty()) 
		withoutAccent = string;
	
	return withoutAccent;
}
 
Example 27
Source Project: dkpro-jwktl   Source File: WiktionaryPage.java    License: Apache License 2.0 5 votes vote down vote up
/** Static helper method for normalizing the title. That is, the title
 *  is converted into lower case and non-ASCII characters are removed. */
public static String normalizeTitle(final String title) {
	if (title == null)
		return null;
	
	return Normalizer.normalize(title, Form.NFD)
			.replaceAll("[^\\p{ASCII}]", "")
			.toLowerCase(Locale.US);
}
 
Example 28
Source Project: aliada-tool   Source File: Function.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Normalizes a given string as {@link Function#normalize} but also removing all spaces and punctuation.
 * 
 * @param value the string to be normalized.
 * @return the normalized string.
 */
public String normalizeStrong(final String value) {
	   return value == null ? UUID.randomUUID().toString()
		        : uuid(Normalizer.normalize(value, Form.NFD)
		            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
		            .replaceAll("[^A-Za-z0-9]", ""));
}
 
Example 29
Source Project: aliada-tool   Source File: Strings.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts the given value to a string that can be used as local name in URIs.
 * Basically it will normalize diacritics and replace spaces with underscores.
 * 
 * @param value the source string.
 * @return a string that can be used as local name in URIs.
 */
public static String toURILocalName(final String value) {
	   return value == null ? null
		        : Normalizer.normalize(value, Form.NFD)
		            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
		            .replaceAll(" ", "")
		            .replaceAll("©", "")
		            .replaceAll("\\p{Punct}", "")
		            .replaceAll("\\uFFFD", "")
		            .trim();
}
 
Example 30
Source Project: mamute   Source File: NormalizerBrutal.java    License: Apache License 2.0 5 votes vote down vote up
public static String toSlug(String input) {
	if(input == null || input.isEmpty()) return "";
	Pattern NONLATIN = Pattern.compile("[^\\w-]");
	Pattern WHITESPACE = Pattern.compile("[\\s]");
	String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
	String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
	String slug = NONLATIN.matcher(normalized).replaceAll("");
	return slug.toLowerCase(Locale.ENGLISH);
}