Java Code Examples for java.net.IDN#toUnicode()

The following examples show how to use java.net.IDN#toUnicode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IDNTest.java    From j2objc with Apache License 2.0 6 votes vote down vote up
/**
 * {@link java.net.IDN#toUnicode(String)}
 * @since 1.6
 */
public void test_ToUnicode_LString() {
    try {
        IDN.toUnicode(null);
        fail("should throw NullPointerException");
    } catch (NullPointerException e) {
        // expected
    }

    assertEquals("", IDN.toUnicode(""));
    assertEquals("www.bcher.de", IDN.toUnicode("www.bcher.de"));
    assertEquals("www.b\u00FCcher.de", IDN.toUnicode("www.b\u00FCcher.de"));
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN
            .toUnicode("www.\u65E5\u672C\u5E73.jp"));
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www\uFF0Exn--gwtq9nb2a\uFF61jp"));
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www.xn--gwtq9nb2a.jp"));
}
 
Example 2
Source File: DnsMessage.java    From happy-dns-android with MIT License 6 votes vote down vote up
/**
 * Parse a domain name starting at the current offset and moving the input
 * stream pointer past this domain name (even if cross references occure).
 *
 * @param dis  The input stream.
 * @param data The raw data (for cross references).
 * @return The domain name string.
 * @throws IOException Should never happen.
 */
private static String readName(DataInputStream dis, byte data[])
        throws IOException {
    int c = dis.readUnsignedByte();
    if ((c & 0xc0) == 0xc0) {
        c = ((c & 0x3f) << 8) + dis.readUnsignedByte();
        HashSet<Integer> jumps = new HashSet<Integer>();
        jumps.add(c);
        return readName(data, c, jumps);
    }
    if (c == 0) {
        return "";
    }
    byte b[] = new byte[c];
    dis.readFully(b);
    String s = IDN.toUnicode(new String(b));
    String t = readName(dis, data);
    if (t.length() > 0) {
        s = s + "." + t;
    }
    return s;
}
 
Example 3
Source File: DnsMessage.java    From android-netdiag with MIT License 6 votes vote down vote up
/**
 * Parse a domain name starting at the current offset and moving the input
 * stream pointer past this domain name (even if cross references occure).
 *
 * @param dis  The input stream.
 * @param data The raw data (for cross references).
 * @return The domain name string.
 * @throws IOException Should never happen.
 */
private static String readName(DataInputStream dis, byte data[])
        throws IOException {
    int c = dis.readUnsignedByte();
    if ((c & 0xc0) == 0xc0) {
        c = ((c & 0x3f) << 8) + dis.readUnsignedByte();
        HashSet<Integer> jumps = new HashSet<Integer>();
        jumps.add(c);
        return readName(data, c, jumps);
    }
    if (c == 0) {
        return "";
    }
    byte b[] = new byte[c];
    dis.readFully(b);
    String s = IDN.toUnicode(new String(b));
    String t = readName(dis, data);
    if (t.length() > 0) {
        s = s + "." + t;
    }
    return s;
}
 
Example 4
Source File: Dns.java    From urllib with Apache License 2.0 6 votes vote down vote up
static Dns parse(String hostname) {
  int lastDot = -1;
  for (int i = 0; i < hostname.length(); i++) {
    char c = hostname.charAt(i);
    if (!DNS.matches(c)) {
      throw new InvalidHostException(hostname, i);
    } else if (c == '.') {
      if (lastDot == i - 1) {
        throw new InvalidHostException(hostname, i);
      }
      lastDot = i;
    }
  }
  String lower = hostname.toLowerCase(Locale.US);
  return new AutoValue_Dns(lower, IDN.toUnicode(lower));
}
 
Example 5
Source File: HostnameChecker.java    From openjsse with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Returns true if name matches against template.<p>
 *
 * The matching is performed as per RFC 2818 rules for TLS and
 * RFC 2830 rules for LDAP.<p>
 *
 * The <code>name</code> parameter should represent a DNS name.  The
 * <code>template</code> parameter may contain the wildcard character '*'.
 */
private boolean isMatched(String name, String template,
                          boolean chainsToPublicCA) {

    // Normalize to Unicode, because PSL is in Unicode.
    try {
        name = IDN.toUnicode(IDN.toASCII(name));
        template = IDN.toUnicode(IDN.toASCII(template));
    } catch (RuntimeException re) {
        if (SSLLogger.isOn) {
            SSLLogger.fine("Failed to normalize to Unicode: " + re);
        }

        return false;
    }

    if (hasIllegalWildcard(template, chainsToPublicCA)) {
        return false;
    }

    // check the validity of the domain name template.
    try {
        // Replacing wildcard character '*' with 'z' so as to check
        // the domain name template validity.
        //
        // Using the checking implemented in SNIHostName
        new SNIHostName(template.replace('*', 'z'));
    } catch (IllegalArgumentException iae) {
        // It would be nice to add debug log if not matching.
        return false;
    }

    if (checkType == TYPE_TLS) {
        return matchAllWildcards(name, template);
    } else if (checkType == TYPE_LDAP) {
        return matchLeftmostWildcard(name, template);
    } else {
        return false;
    }
}
 
Example 6
Source File: HomoglyphStrategy.java    From metron with Apache License 2.0 5 votes vote down vote up
@Override
public Set<String> generateCandidates(String originalString) {
  Set<String> result = new HashSet<>();
  String domain = originalString;
  if(StringUtils.isEmpty(domain)) {
    return result;
  }
  if(isAce(domain)) {
    //this is an ace domain.
    domain = IDN.toUnicode(domain);
  }
  for(int ws = 0;ws < domain.length();ws++) {
    for(int i = 0;i < domain.length() - ws + 1;++i) {
      String win = domain.substring(i, i+ws);
      for(int j = 0;j < ws;j++) {
        char c = win.charAt(j);
        if( glyphs.containsKey(c)) {
          for( String g : glyphs.get(c)) {
            String winNew = win.replaceAll("" + c, g);
            String d = domain.substring(0, i) + winNew + domain.substring(i + ws);
            result.add(d);
            if(!isAce(d)) {
              try {
                String dAscii = IDN.toASCII(d, IDN.ALLOW_UNASSIGNED);
                if (!d.equals(dAscii)) {
                  result.add(dAscii);
                }
              }
              catch(IllegalArgumentException iae) {
                LOG.debug("Unable to parse " + d + ": " + iae.getMessage(), iae);
              }
            }
          }
        }
      }
    }
  }
  return result;
}
 
Example 7
Source File: PublicSuffixDatabase.java    From styT with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list.
 * Returns null if the domain is a public suffix.
 *
 * <p>Here are some examples: <pre>{@code
 * assertEquals("google.com", getEffectiveTldPlusOne("google.com"));
 * assertEquals("google.com", getEffectiveTldPlusOne("www.google.com"));
 * assertNull(getEffectiveTldPlusOne("com"));
 * }</pre>
 *
 * @param domain A canonicalized domain. An International Domain Name (IDN) should be punycode
 *    encoded.
 */
public String getEffectiveTldPlusOne(String domain) {
  if (domain == null) throw new NullPointerException("domain == null");

  // We use UTF-8 in the list so we need to convert to Unicode.
  String unicodeDomain = IDN.toUnicode(domain);
  String[] domainLabels = unicodeDomain.split("\\.");
  String[] rule = findMatchingRule(domainLabels);
  if (domainLabels.length == rule.length && rule[0].charAt(0) != EXCEPTION_MARKER) {
    // The domain is a public suffix.
    return null;
  }

  int firstLabelOffset;
  if (rule[0].charAt(0) == EXCEPTION_MARKER) {
    // Exception rules hold the effective TLD plus one.
    firstLabelOffset = domainLabels.length - rule.length;
  } else {
    // Otherwise the rule is for a public suffix, so we must take one more label.
    firstLabelOffset = domainLabels.length - (rule.length + 1);
  }

  StringBuilder effectiveTldPlusOne = new StringBuilder();
  String[] punycodeLabels = domain.split("\\.");
  for (int i = firstLabelOffset; i < punycodeLabels.length; i++) {
    effectiveTldPlusOne.append(punycodeLabels[i]).append('.');
  }
  effectiveTldPlusOne.deleteCharAt(effectiveTldPlusOne.length() - 1);

  return effectiveTldPlusOne.toString();
}
 
Example 8
Source File: DisplayUtils.java    From Cirrus_depricated with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Converts an internationalized domain name (IDN) in an URL to and from ASCII/Unicode.
 * @param url the URL where the domain name should be converted
 * @param toASCII if true converts from Unicode to ASCII, if false converts from ASCII to Unicode
 * @return the URL containing the converted domain name
 */
@TargetApi(Build.VERSION_CODES.GINGERBREAD)
public static String convertIdn(String url, boolean toASCII) {

    String urlNoDots = url;
    String dots="";
    while (urlNoDots.startsWith(".")) {
        urlNoDots = url.substring(1);
        dots = dots + ".";
    }

    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
        // Find host name after '//' or '@'
        int hostStart = 0;
        if  (urlNoDots.contains("//")) {
            hostStart = url.indexOf("//") + "//".length();
        } else if (url.contains("@")) {
            hostStart = url.indexOf("@") + "@".length();
        }

        int hostEnd = url.substring(hostStart).indexOf("/");
        // Handle URL which doesn't have a path (path is implicitly '/')
        hostEnd = (hostEnd == -1 ? urlNoDots.length() : hostStart + hostEnd);

        String host = urlNoDots.substring(hostStart, hostEnd);
        host = (toASCII ? IDN.toASCII(host) : IDN.toUnicode(host));

        return dots + urlNoDots.substring(0, hostStart) + host + urlNoDots.substring(hostEnd);
    } else {
        return dots + url;
    }
}
 
Example 9
Source File: HostnameChecker.java    From Bytecoder with Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if name matches against template.<p>
 *
 * The matching is performed as per RFC 2818 rules for TLS and
 * RFC 2830 rules for LDAP.<p>
 *
 * The <code>name</code> parameter should represent a DNS name.  The
 * <code>template</code> parameter may contain the wildcard character '*'.
 */
private boolean isMatched(String name, String template,
                          boolean chainsToPublicCA) {

    // Normalize to Unicode, because PSL is in Unicode.
    try {
        name = IDN.toUnicode(IDN.toASCII(name));
        template = IDN.toUnicode(IDN.toASCII(template));
    } catch (RuntimeException re) {
        if (SSLLogger.isOn) {
            SSLLogger.fine("Failed to normalize to Unicode: " + re);
        }

        return false;
    }

    if (hasIllegalWildcard(template, chainsToPublicCA)) {
        return false;
    }

    // check the validity of the domain name template.
    try {
        // Replacing wildcard character '*' with 'z' so as to check
        // the domain name template validity.
        //
        // Using the checking implemented in SNIHostName
        new SNIHostName(template.replace('*', 'z'));
    } catch (IllegalArgumentException iae) {
        // It would be nice to add debug log if not matching.
        return false;
    }

    if (checkType == TYPE_TLS) {
        return matchAllWildcards(name, template);
    } else if (checkType == TYPE_LDAP) {
        return matchLeftmostWildcard(name, template);
    } else {
        return false;
    }
}
 
Example 10
Source File: SystemSearchResult.java    From spacewalk with GNU General Public License v2.0 4 votes vote down vote up
/**
  * @return the hostname in IDN encoding
  */
public String getDecodedHostname() {
    return (hostname == null) ? null : IDN.toUnicode(hostname);
}
 
Example 11
Source File: ParsedIRI.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Normalizes this IRI's components.
 * <p>
 * Because IRIs exist to identify resources, presumably they should be considered equivalent when they identify the
 * same resource. However, this definition of equivalence is not of much practical use, as there is no way for an
 * implementation to compare two resources unless it has full knowledge or control of them. Therefore, IRI
 * normalization is designed to minimize false negatives while strictly avoiding false positives.
 * <p>
 * <b>Case Normalization</b> the hexadecimal digits within a percent-encoding triplet (e.g., "%3a" versus "%3A") are
 * case-insensitive and are normalized to use uppercase letters for the digits A - F. The scheme and host are case
 * insensitive and are normalized to lowercase.
 * <p>
 * <b>Character Normalization</b> The Unicode Standard defines various equivalences between sequences of characters
 * for various purposes. Unicode Standard Annex defines various Normalization Forms for these equivalences and is
 * applied to the IRI components.
 * <p>
 * <b>Percent-Encoding Normalization</b> decodes any percent-encoded octet sequence that corresponds to an
 * unreserved character anywhere in the IRI.
 * <p>
 * <b>Path Segment Normalization</b> is the process of removing unnecessary {@code "."} and {@code ".."} segments
 * from the path component of a hierarchical IRI. Each {@code "."} segment is simply removed. A {@code ".."} segment
 * is removed only if it is preceded by a non-{@code ".."} segment or the start of the path.
 * <p>
 * <b>HTTP(S) Scheme Normalization</b> if the port uses the default port number or not given it is set to undefined.
 * An empty path is replaced with "/".
 * <p>
 * <b>File Scheme Normalization</b> if the host is "localhost" or empty it is set to undefined.
 * <p>
 * <b>Internationalized Domain Name Normalization</b> of the host component to Unicode.
 *
 * @return normalized IRI
 */
public ParsedIRI normalize() {
	String _scheme = toLowerCase(scheme);
	boolean optionalPort = isScheme("http") && 80 == port || isScheme("https") && 443 == port;
	int _port = optionalPort ? -1 : port;
	boolean localhost = isScheme("file") && userInfo == null && -1 == port
			&& ("".equals(host) || "localhost".equals(host));
	String _host = localhost ? null
			: host == null || host.length() == 0 ? host
					: IDN.toUnicode(pctEncodingNormalization(toLowerCase(host)),
							IDN.USE_STD3_ASCII_RULES | IDN.ALLOW_UNASSIGNED);
	String _path = _scheme != null && path == null ? "" : normalizePath(path);
	String _userInfo = pctEncodingNormalization(userInfo);
	String _query = pctEncodingNormalization(query);
	String _fragment = pctEncodingNormalization(fragment);
	ParsedIRI normalized = new ParsedIRI(_scheme, _userInfo, _host, _port, _path, _query, _fragment);
	if (this.iri.equals(normalized.iri)) {
		return this;
	} else {
		return normalized;
	}
}
 
Example 12
Source File: SSAddrRequest.java    From shadowsocks-java with MIT License 4 votes vote down vote up
public String host() {
    return IDN.toUnicode(this.host);
}
 
Example 13
Source File: SystemSearchResult.java    From uyuni with GNU General Public License v2.0 4 votes vote down vote up
/**
  * @return the hostname in IDN encoding
  */
public String getDecodedHostname() {
    return (hostname == null) ? null : IDN.toUnicode(hostname);
}
 
Example 14
Source File: SocksCmdResponse.java    From netty4.0.27Learn with Apache License 2.0 3 votes vote down vote up
/**
 * Returns host that is used as a parameter in {@link io.netty.handler.codec.socks.SocksCmdType}.
 * Host (BND.ADDR field in response) is address that server used when connecting to the target host.
 * This is typically different from address which client uses to connect to the SOCKS server.
 *
 * @return host that is used as a parameter in {@link io.netty.handler.codec.socks.SocksCmdType}
 *         or null when there was no host specified during response construction
 */
public String host() {
    if (host != null) {
        return IDN.toUnicode(host);
    } else {
        return null;
    }
}
 
Example 15
Source File: PunycodeAutoDecoder.java    From public-suffix-list with Do What The F*ck You Want To Public License 3 votes vote down vote up
/**
 * Decodes a domain name into UTF-8 if it is in Punycode ASCII.
 *
 * If the domain name was already UTF-8 nothing will happen. This
 * method saves the original format (Punycode or UTF-8) in
 * {@link #decoded}. {@link #recode(String)} can return the string
 * in the saved format.
 *
 * @param domain  the domain name, may be null
 * @return the UTF-8 domain name
 */
public String decode(final String domain) {
    if (domain == null) {
        return null;

    }
    String asciiDomain = IDN.toUnicode(domain);
    decoded = !asciiDomain.equals(domain);
    return asciiDomain;
}
 
Example 16
Source File: Server.java    From uyuni with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Get the primary hostname for this server
 * If hostname is IDN, it is decoded from Puny encoding
 * @return Returns the primary hostname for this server
 */
public String getDecodedHostname() {
    return (hostname == null) ? null : IDN.toUnicode(hostname);
}
 
Example 17
Source File: SocksCmdRequest.java    From netty4.0.27Learn with Apache License 2.0 2 votes vote down vote up
/**
 * Returns host that is used as a parameter in {@link SocksCmdType}
 *
 * @return host that is used as a parameter in {@link SocksCmdType}
 */
public String host() {
    return IDN.toUnicode(host);
}
 
Example 18
Source File: SocksCmdResponse.java    From netty-4.1.22 with Apache License 2.0 2 votes vote down vote up
/**
 * Returns host that is used as a parameter in {@link SocksCmdType}.
 * Host (BND.ADDR field in response) is address that server used when connecting to the target host.
 * This is typically different from address which client uses to connect to the SOCKS server.
 *
 * @return host that is used as a parameter in {@link SocksCmdType}
 *         or null when there was no host specified during response construction
 */
public String host() {
    return host != null && addressType == SocksAddressType.DOMAIN ? IDN.toUnicode(host) : host;
}
 
Example 19
Source File: SocksCmdRequest.java    From netty-4.1.22 with Apache License 2.0 2 votes vote down vote up
/**
 * Returns host that is used as a parameter in {@link SocksCmdType}
 *
 * @return host that is used as a parameter in {@link SocksCmdType}
 */
public String host() {
    return addressType == SocksAddressType.DOMAIN ? IDN.toUnicode(host) : host;
}
 
Example 20
Source File: Server.java    From spacewalk with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Get the primary hostname for this server
 * If hostname is IDN, it is decoded from Puny encoding
 * @return Returns the primary hostname for this server
 */
public String getDecodedHostname() {
    String hostname = getHostname();
    return (hostname == null) ? null : IDN.toUnicode(hostname);
}