java.text.Normalizer.Form Java Examples

The following examples show how to use java.text.Normalizer.Form. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Users.java    From packagedrone with Eclipse Public License 1.0 7 votes vote down vote up
public static String hashIt ( final String salt, String data )
{
    data = Normalizer.normalize ( data, Form.NFC );

    final byte[] strData = data.getBytes ( StandardCharsets.UTF_8 );
    final byte[] saltData = salt.getBytes ( StandardCharsets.UTF_8 );

    final byte[] first = new byte[saltData.length + strData.length];
    System.arraycopy ( saltData, 0, first, 0, saltData.length );
    System.arraycopy ( strData, 0, first, saltData.length, strData.length );

    final MessageDigest md = createDigest ();

    byte[] digest = md.digest ( first );
    final byte[] current = new byte[saltData.length + digest.length];

    for ( int i = 0; i < 1000; i++ )
    {
        System.arraycopy ( saltData, 0, current, 0, saltData.length );
        System.arraycopy ( digest, 0, current, saltData.length, digest.length );

        digest = md.digest ( current );
    }

    return Base64.getEncoder ().encodeToString ( digest );
}
 
Example #2
Source File: PicaEncoder.java    From metafacture-core with Apache License 2.0 6 votes vote down vote up
@Override
    public void literal(final String name, final String value) {
        //A Subfield has one character or digit exactly.
        if (name.length() != 1) {
            throw new FormatException(name);
        }
        if (!entityOpen) {
            throw new FormatException(name); //new exceptions definition for literal out of entity
        }
        final String valueNew = Normalizer.normalize(value, Form.NFD);
        if (idnControlSubField) {
            // it is a 003@ field, the same record id delivered with record should follow
            if (!this.id.equals(value)) {
                throw new MissingIdException(value);
            }
            idnControlSubField = false; //only one record Id will be checked.
        }
        builder.append(SUB_DELIMITER);
        builder.append(name);
        builder.append(valueNew);
}
 
Example #3
Source File: TextSection.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
String toString(List<List<TextPosition>> words)
{
    StringBuilder stringBuilder = new StringBuilder();
    boolean first = true;
    for (List<TextPosition> word : words)
    {
        if (first)
            first = false;
        else
            stringBuilder.append(' ');
        for (TextPosition textPosition : word)
        {
            stringBuilder.append(textPosition.getUnicode());
        }
    }
    // cf. http://stackoverflow.com/a/7171932/1729265
    return Normalizer.normalize(stringBuilder, Form.NFKC);
}
 
Example #4
Source File: StringLibExt.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * <p>Determine if input string is Unicode normalized according to the given form.</p>
 * 
 * <p>Following normalization forms are supported:
 * <ul>
 *   <li>NFD: canonical Unicode decomposition</li>
 *   <li>NFC: canonical Unicode decomposition followed by canonical composition</li>
 *   <li>NFKD: compatibility decomposition</li>
 *   <li>NFKC: compatibility decomposition followed by canonical composition</li>
 * </ul>
 * </p>
 * <p>Function gracefully handles null input - null is simply passed through.</p>
 * 
 * @param context function call context.
 * @param input input string to normalize. May be null.
 * @param form specifies algorithm to use. Algorithm name is case insensitive. Cannot be null.
 * 
 * @return true if input is normalized with respect to the selected form of if input is null. False is returned otherwise.
 * 
 * @see Normalizer#isNormalized(CharSequence, Form)
 */
@TLFunctionAnnotation("Determine if given string is Unicode normalized.")
@CTL2FunctionDeclaration(impl = IsUnicodeNormalizedFunction.class)
public static final boolean isUnicodeNormalized(TLFunctionCallContext context, String input, String form) {

	if (form == null) {
		throw new NullPointerException("Null form is not allowed.");
	}
	
	Form normalizerForm;
	try {
		normalizerForm = Form.valueOf(form.toUpperCase());
	} catch (IllegalArgumentException iae) {
		throw new IllegalArgumentException("Unsupported normalization form '" + form + "'.", iae);
	}
	
	if (input == null) {
		return true;
	}
	
	return Normalizer.isNormalized(input, normalizerForm);
}
 
Example #5
Source File: StringLibExt.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * <p>Normalize input using specified normalization form.</p>
 * 
 * <p>Following normalization forms are supported:
 * <ul>
 *   <li>NFD: canonical Unicode decomposition</li>
 *   <li>NFC: canonical Unicode decomposition followed by canonical composition</li>
 *   <li>NFKD: compatibility decomposition</li>
 *   <li>NFKC: compatibility decomposition followed by canonical composition</li>
 * </ul>
 * </p>
 * <p>Function gracefully handles null input - null is simply passed through.</p>
 * 
 * @param context function call context.
 * @param input input string to normalize. May be null.
 * @param form specifies algorithm to use. Algorithm name is case insensitive. Cannot be null.
 * 
 * @return normalized input string or null if input is also null.
 * 
 * @see Normalizer#normalize(CharSequence, Form)
 */
@TLFunctionAnnotation("Perform Unicode normalization of given string.")
@CTL2FunctionDeclaration(impl = UnicodeNormalizeFunction.class)
public static final String unicodeNormalize(TLFunctionCallContext context, String input, String form) {
	
	if (form == null) {
		throw new NullPointerException("Null form is not allowed.");
	}
	
	Form normalizerForm;
	try {
		normalizerForm = Form.valueOf(form.toUpperCase());
	} catch (IllegalArgumentException iae) {
		throw new IllegalArgumentException("Unsupported normalization form '" + form + "'.", iae);
	}
	
	if (input == null) {
		return null;
	}

	return Normalizer.normalize(input, normalizerForm);
}
 
Example #6
Source File: SlugifyUtils.java    From voj with GNU General Public License v3.0 6 votes vote down vote up
/**
 * 获取字符串的Slug.
 * @param str - 待获取Slug的字符串
 * @return 字符串对应的Slug
 */
public static String getSlug(String str) {
	if ( str == null ) {
		return "";
	}
	
	// Rid of White Spaces
	String noWhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-");
	// Processing Non-ASCII Characters
	try {
		noWhiteSpace = URLEncoder.encode(noWhiteSpace, "UTF-8");
	} catch (UnsupportedEncodingException e) {
		// Never reach here
	}
	// Slugify String
	String normalized = Normalizer.normalize(noWhiteSpace, Form.NFD);
	
	return normalized.toLowerCase();
}
 
Example #7
Source File: CDataTransferer.java    From jdk8u60 with GNU General Public License v2.0 6 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example #8
Source File: MCRUtils.java    From mycore with GNU General Public License v3.0 6 votes vote down vote up
private static String getHash(int iterations, byte[] salt, String text, String algorithm)
    throws NoSuchAlgorithmException {
    MessageDigest digest;
    if (--iterations < 0) {
        iterations = 0;
    }
    byte[] data;
    digest = MessageDigest.getInstance(algorithm);
    text = Normalizer.normalize(text, Form.NFC);
    if (salt != null) {
        digest.update(salt);
    }
    data = digest.digest(text.getBytes(StandardCharsets.UTF_8));
    for (int i = 0; i < iterations; i++) {
        data = digest.digest(data);
    }
    return toHexString(data);
}
 
Example #9
Source File: MCRTextNormalizer.java    From mycore with GNU General Public License v3.0 6 votes vote down vote up
public static String normalizeText(String text) {
    String normalizedText = text.toLowerCase(Locale.getDefault());
    normalizedText = new MCRHyphenNormalizer().normalize(normalizedText).replace("-", " ");
    //canonical decomposition, remove accents
    normalizedText = Normalizer.normalize(normalizedText, Form.NFD).replaceAll("\\p{M}", "");
    normalizedText = normalizedText.replace("ue", "u")
        .replace("oe", "o").replace("ae", "a")
        .replace("ß", "s").replace("ss", "s");
    //remove all non-alphabetic characters
    normalizedText = normalizedText.replaceAll("[^a-z0-9]\\s]", "");
    // remove all words with fewer than four characters
    // normalizedText = normalizedText.replaceAll("\\b.{1,3}\\b", " ").trim();
    normalizedText = normalizedText.replaceAll("\\p{Punct}", " ").trim(); // remove all punctuation
    normalizedText = normalizedText.replaceAll("\\s+", " "); // normalize whitespace
    return normalizedText;
}
 
Example #10
Source File: CDataTransferer.java    From jdk8u-dev-jdk with GNU General Public License v2.0 6 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example #11
Source File: CDataTransferer.java    From jdk8u-jdk with GNU General Public License v2.0 6 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                                long format, Transferable transferable) throws IOException {

        if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
        {
            String charset = getDefaultTextCharset();
            if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
                try {
                    charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
                } catch (UnsupportedFlavorException cannotHappen) {
                }
            }

            return new URL(new String(bytes, charset));
        }

        if (format == CF_STRING) {
            bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
        }

        return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example #12
Source File: WiktionaryPage.java    From dkpro-jwktl with Apache License 2.0 5 votes vote down vote up
/** Static helper method for normalizing the title. That is, the title
 *  is converted into lower case and non-ASCII characters are removed. */
public static String normalizeTitle(final String title) {
	if (title == null)
		return null;
	
	return Normalizer.normalize(title, Form.NFD)
			.replaceAll("[^\\p{ASCII}]", "")
			.toLowerCase(Locale.US);
}
 
Example #13
Source File: SearchUtil.java    From FHIR with Apache License 2.0 5 votes vote down vote up
/**
 * Normalizes a string to be used as a search parameter value. All accents and
 * diacritics are removed. And then the
 * string is transformed to lower case.
 *
 * @param value
 * @return
 */
public static String normalizeForSearch(String value) {

    String normalizedValue = null;
    if (value != null) {
        normalizedValue = Normalizer.normalize(value, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        normalizedValue = normalizedValue.toLowerCase();
    }

    return normalizedValue;
}
 
Example #14
Source File: TagFilter.java    From JavaSCR with MIT License 5 votes vote down vote up
private static String filterString(String str) {
	String s = Normalizer.normalize(str, Form.NFKC);

	// Replaces all noncharacter code points with Unicode U+FFFD
	s = s.replaceAll("[\\p{Cn}]", "\uFFFD");

	// Validate input
	Pattern pattern = Pattern.compile("<script>");
	Matcher matcher = pattern.matcher(s);
	if (matcher.find()) {
		throw new IllegalArgumentException("Invalid input");
	}
	return s;
}
 
Example #15
Source File: TechGalleryUtil.java    From tech-gallery with Apache License 2.0 5 votes vote down vote up
/**
 * Method to slugify a name.
 *
 * @param name name to be changed.
 * @return Changed name.
 */
public static String slugify(String name) {
  String nowhitespace = WHITESPACE.matcher(name).replaceAll("_");
  String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
  String slug = NONLATIN.matcher(normalized).replaceAll("");
  return slug.toLowerCase(Locale.ENGLISH);
}
 
Example #16
Source File: StringUtils.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static String replaceAccents(String string) {
	String withoutAccent = Normalizer
			.normalize(string, Form.NFD)
			.replaceAll(ASCII_REPLACEMENT, EMPTY_STRING);

	//FIXME accent removal fails for russian. This is a quick fix
	if(withoutAccent.isEmpty() && !string.isEmpty()) 
		withoutAccent = string;
	
	return withoutAccent;
}
 
Example #17
Source File: ValidateString.java    From JavaSCR with MIT License 5 votes vote down vote up
private static String NormalizeThenValidate(String input) {
	// Normalize
	String s = Normalizer.normalize(input, Form.NFKC);

	// Validate
	Pattern pattern = Pattern.compile("[<>]"); // Check for angle brackets
	Matcher matcher = pattern.matcher(s);
	if (matcher.find()) {
		// Found black listed tag
		throw new IllegalStateException();
	}
	System.out.println("valid input");

	return s;
}
 
Example #18
Source File: Function.java    From aliada-tool with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Normalizes a given string as {@link Function#normalize} but also removing all spaces and punctuation.
 * 
 * @param value the string to be normalized.
 * @return the normalized string.
 */
public String normalizeStrong(final String value) {
	   return value == null ? UUID.randomUUID().toString()
		        : uuid(Normalizer.normalize(value, Form.NFD)
		            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
		            .replaceAll("[^A-Za-z0-9]", ""));
}
 
Example #19
Source File: Strings.java    From aliada-tool with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Converts the given value to a string that can be used as local name in URIs.
 * Basically it will normalize diacritics and replace spaces with underscores.
 * 
 * @param value the source string.
 * @return a string that can be used as local name in URIs.
 */
public static String toURILocalName(final String value) {
	   return value == null ? null
		        : Normalizer.normalize(value, Form.NFD)
		            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
		            .replaceAll(" ", "")
		            .replaceAll("©", "")
		            .replaceAll("\\p{Punct}", "")
		            .replaceAll("\\uFFFD", "")
		            .trim();
}
 
Example #20
Source File: NormalizerBrutal.java    From mamute with Apache License 2.0 5 votes vote down vote up
public static String toSlug(String input) {
	if(input == null || input.isEmpty()) return "";
	Pattern NONLATIN = Pattern.compile("[^\\w-]");
	Pattern WHITESPACE = Pattern.compile("[\\s]");
	String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
	String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
	String slug = NONLATIN.matcher(normalized).replaceAll("");
	return slug.toLowerCase(Locale.ENGLISH);
}
 
Example #21
Source File: InputHomogenization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the normalized text passed in via constructor
 * @return the normalized text passed in via constructor
 */
public String transform() {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < input.length(); i++) {
        if (ignoreCharactersContaining != null
                        && ignoreCharactersContaining.contains(String.valueOf(input.charAt(i))))
            sb.append(input.charAt(i));
        else if (Character.isDigit(input.charAt(i)))
            sb.append("d");
        else if (Character.isUpperCase(input.charAt(i)) && !preserveCase)
            sb.append(Character.toLowerCase(input.charAt(i)));
        else
            sb.append(input.charAt(i));

    }

    String normalized = Normalizer.normalize(sb.toString(), Form.NFD);
    normalized = normalized.replace(".", "");
    normalized = normalized.replace(",", "");
    normalized = normalized.replaceAll("\"", "");
    normalized = normalized.replace("'", "");
    normalized = normalized.replace("(", "");
    normalized = normalized.replace(")", "");
    normalized = normalized.replace("“", "");
    normalized = normalized.replace("”", "");
    normalized = normalized.replace("…", "");
    normalized = normalized.replace("|", "");
    normalized = normalized.replace("/", "");
    normalized = normalized.replace("\\", "");
    normalized = normalized.replace("[", "");
    normalized = normalized.replace("]", "");
    normalized = normalized.replace("‘", "");
    normalized = normalized.replace("’", "");
    normalized = normalized.replaceAll("[!]+", "!");
    return normalized;
}
 
Example #22
Source File: Speller.java    From morfologik-stemming with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private boolean areEqual(final char x, final char y) {
  if (x == y) {
    return true;
  }
  if (dictionaryMetadata.getEquivalentChars() != null) {
    List<Character> chars = dictionaryMetadata.getEquivalentChars().get(x);
    if (chars != null && chars.contains(y)) {
      return true;
    }
  }
  if (dictionaryMetadata.isIgnoringDiacritics()) {
    String xn = Normalizer.normalize(Character.toString(x), Form.NFD);
    String yn = Normalizer.normalize(Character.toString(y), Form.NFD);
    if (xn.charAt(0) == yn.charAt(0)) { // avoid case conversion, if possible
      return true;
    }
    if (dictionaryMetadata.isConvertingCase()) {
      //again case conversion only when needed -- we
      // do not need String.lowercase because we only check
      // single characters, so a cheaper method is enough
      if (Character.isLetter(xn.charAt(0))) {
        boolean testNeeded = Character.isLowerCase(xn.charAt(0)) != Character.isLowerCase(yn.charAt(0));
        if (testNeeded) {
          return Character.toLowerCase(xn.charAt(0)) == Character.toLowerCase(yn.charAt(0));
        }
      }
    }
    return xn.charAt(0) == yn.charAt(0);
  }
  return false;
}
 
Example #23
Source File: CDataTransferer.java    From openjdk-8 with GNU General Public License v2.0 5 votes vote down vote up
protected Object translateBytesOrStream(InputStream stream, byte[] bytes, DataFlavor flavor, long format,
                                    Transferable transferable) throws IOException
{
    // 5-28-03 VL: [Radar 3266030]
    // We need to do like Windows does here.
    if (format == CF_HTML && flavor.isFlavorTextType()) {
        if (stream == null) {
            stream = new ByteArrayInputStream(bytes);
            bytes = null;
        }

        stream = new HTMLDecodingInputStream(stream);
    }

    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
    {
        if (bytes == null) {
            bytes = inputStreamToByteArray(stream);
            stream = null;
        }

        String charset = getDefaultTextCharset();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }

        return new URL(new String(bytes, charset));
    }

    if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }

    return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example #24
Source File: NSString.java    From CrossMobile with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Compares the given Strings using the specified options.
 *
 * @param from                   The first String to be compared.
 * @param with                   The second String to be compared.
 * @param NSStringCompareOptions The option for searching the Strings.
 * @return The result of comparing the two Strings.
 * @see crossmobile.ios.foundation.NSOrdered
 */
@CMSelector(value = "- (NSComparisonResult)compare:(NSString *)aString options:(NSStringCompareOptions)mask", staticMapping = true)
public static int compare(String from, String with, int NSStringCompareOptions) {
    if (with == null && from == null)
        return NSOrdered.Same;
    if (with == null)
        return NSOrdered.Descending;
    if (from == null)
        return NSOrdered.Ascending;
    int order;

    if ((NSStringCompareOptions & crossmobile.ios.foundation.NSStringCompareOptions.NSNumericSearch) != 0) {
        double fromD = stringToRelaxedDouble(from);
        double withD = stringToRelaxedDouble(with);
        order = fromD == withD ? 0 : (fromD < withD ? -1 : 1);
    } else {
        if ((NSStringCompareOptions & crossmobile.ios.foundation.NSStringCompareOptions.NSCaseInsensitiveSearch) != 0) {
            from = from.toLowerCase();
            with = with.toLowerCase();
        }
        if ((NSStringCompareOptions & crossmobile.ios.foundation.NSStringCompareOptions.NSDiacriticInsensitiveSearch) != 0) {
            from = Normalizer.normalize(from, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
            with = Normalizer.normalize(with, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        }
        order = from.compareTo(with);
    }
    return order < 0 ? NSOrdered.Ascending : (order > 0 ? NSOrdered.Descending : NSOrdered.Same);
}
 
Example #25
Source File: PersonNameExtractor.java    From yago3 with GNU General Public License v3.0 5 votes vote down vote up
private void writeNormalized(String entity, String name, String source) throws IOException {
  String normalizedName = Normalizer.normalize(name, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
  if (!normalizedName.equals(name)) {
    write(PERSONNAMEHEURISTICS, new Fact(entity, RDFS.label, FactComponent.forStringWithLanguage(normalizedName, "eng")), PERSONNAMESOURCES, source,
        "PersonNameExtractor_normalized");
  }
}
 
Example #26
Source File: JabArchivesRipper.java    From ripme with MIT License 5 votes vote down vote up
protected String getSlug(String input) {
    // Get a URL/file-safe version of a string
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
 
Example #27
Source File: CDataTransferer.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
                             long format, Transferable transferable) throws IOException {

    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
        String charset = Charset.defaultCharset().name();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor), StandardCharsets.UTF_8);
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }

        String xml = new String(bytes, charset);
        // macosx pasteboard returns a property list that consists of one URL
        // let's extract it.
        return new URL(extractURL(xml));
    }

    if(isUriListFlavor(flavor) && format == CF_FILE) {
        // dragQueryFile works fine with files and url,
        // it parses and extracts values from property list.
        // maxosx always returns property list for
        // CF_URL and CF_FILE
        String[] strings = dragQueryFile(bytes);
        if(strings == null) {
            return null;
        }
        bytes = String.join(System.getProperty("line.separator"),
                strings).getBytes();
        // now we extracted uri from xml, now we should treat it as
        // regular string that allows to translate data to target represantation
        // class by base method
        format = CF_STRING;
    } else if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }

    return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example #28
Source File: Slug.java    From sunbird-lms-service with MIT License 5 votes vote down vote up
public static String makeSlug(String input, boolean transliterate) {
  String origInput = input;
  String tempInputValue = "";
  // Validate the input
  if (input == null) {
    ProjectLogger.log("Provided input value is null");
    return input;
  }
  // Remove extra spaces
  tempInputValue = input.trim();
  // Remove URL encoding
  tempInputValue = urlDecode(tempInputValue);
  // If transliterate is required
  if (transliterate) {
    // Tranlisterate & cleanup
    String transliterated = transliterate(tempInputValue);
    tempInputValue = transliterated;
  }
  // Replace all whitespace with dashes
  tempInputValue = WHITESPACE.matcher(tempInputValue).replaceAll("-");
  // Remove all accent chars
  tempInputValue = Normalizer.normalize(tempInputValue, Form.NFD);
  // Remove all non-latin special characters
  tempInputValue = NONLATIN.matcher(tempInputValue).replaceAll("");
  // Remove any consecutive dashes
  tempInputValue = normalizeDashes(tempInputValue);
  // Validate before returning
  validateResult(tempInputValue, origInput);
  // Slug is always lowercase
  return tempInputValue.toLowerCase(Locale.ENGLISH);
}
 
Example #29
Source File: CDataTransferer.java    From openjdk-8-source with GNU General Public License v2.0 5 votes vote down vote up
protected Object translateBytesOrStream(InputStream stream, byte[] bytes, DataFlavor flavor, long format,
                                    Transferable transferable) throws IOException
{
    // 5-28-03 VL: [Radar 3266030]
    // We need to do like Windows does here.
    if (format == CF_HTML && flavor.isFlavorTextType()) {
        if (stream == null) {
            stream = new ByteArrayInputStream(bytes);
            bytes = null;
        }

        stream = new HTMLDecodingInputStream(stream);
    }

    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
    {
        if (bytes == null) {
            bytes = inputStreamToByteArray(stream);
            stream = null;
        }

        String charset = getDefaultTextCharset();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }

        return new URL(new String(bytes, charset));
    }

    if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }

    return super.translateBytes(bytes, flavor, format, transferable);
}
 
Example #30
Source File: MCRNameMerger.java    From mycore with GNU General Public License v3.0 5 votes vote down vote up
private String normalize(String nameFragment) {
    String text = nameFragment.toLowerCase(Locale.getDefault());
    text = new MCRHyphenNormalizer().normalize(text).replace("-", " ");
    // canonical decomposition, then remove accents
    text = Normalizer.normalize(text, Form.NFD).replaceAll("\\p{M}", "");
    text = text.replace("ue", "u").replace("oe", "o").replace("ae", "a").replace("ß", "s").replace("ss", "s");
    text = text.replaceAll("[^a-z0-9]\\s]", ""); //remove all non-alphabetic characters
    text = text.replaceAll("\\p{Punct}", " ").trim(); // remove all punctuation
    text = text.replaceAll("\\s+", " "); // normalize whitespace
    return text.trim();
}