java.net.IDN Java Examples

The following examples show how to use java.net.IDN. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Util.java    From AndroidProjects with MIT License 6 votes vote down vote up
/**
 * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts
 * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}.
 * {@code null} will be returned if the input cannot be ToASCII encoded or if the result
 * contains unsupported ASCII characters.
 */
public static String domainToAscii(String input) {
  try {
    String result = IDN.toASCII(input).toLowerCase(Locale.US);
    if (result.isEmpty()) return null;

    // Confirm that the IDN ToASCII result doesn't contain any illegal characters.
    if (containsInvalidHostnameAsciiCodes(result)) {
      return null;
    }
    // TODO: implement all label limits.
    return result;
  } catch (IllegalArgumentException e) {
    return null;
  }
}
 
Example #2
Source File: HttpUrl.java    From HttPizza with Apache License 2.0 6 votes vote down vote up
/**
 * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts
 * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}.
 * {@code null} will be returned if the input cannot be ToASCII encoded or if the result
 * contains unsupported ASCII characters.
 */
public static String domainToAscii(String input) {
    try {
        String result = IDN.toASCII(input).toLowerCase(Locale.US);
        if (result.isEmpty()) {
            return null;
        }

        // Confirm that the IDN ToASCII result doesn't contain any illegal characters.
        if (containsInvalidHostnameAsciiCodes(result)) {
            return null;
        }
        // TODO: implement all label limits.
        return result;
    } catch (IllegalArgumentException e) {
        return null;
    }
}
 
Example #3
Source File: Socks5CommandRequestDecoderTest.java    From netty-4.1.22 with Apache License 2.0 6 votes vote down vote up
private static void test(
        Socks5CommandType type, Socks5AddressType dstAddrType, String dstAddr, int dstPort) {
    logger.debug(
            "Testing type: " + type + " dstAddrType: " + dstAddrType +
            " dstAddr: " + dstAddr + " dstPort: " + dstPort);

    Socks5CommandRequest msg =
            new DefaultSocks5CommandRequest(type, dstAddrType, dstAddr, dstPort);
    EmbeddedChannel embedder = new EmbeddedChannel(new Socks5CommandRequestDecoder());
    Socks5CommonTestUtils.writeFromClientToServer(embedder, msg);
    msg = embedder.readInbound();
    assertSame(msg.type(), type);
    assertSame(msg.dstAddrType(), dstAddrType);
    assertEquals(msg.dstAddr(), IDN.toASCII(dstAddr));
    assertEquals(msg.dstPort(), dstPort);
    assertNull(embedder.readInbound());
}
 
Example #4
Source File: URLUtil.java    From sparkler with Apache License 2.0 6 votes vote down vote up
public static String toASCII(String url) {
    try {
        URL u = new URL(url);
        String host = u.getHost();
        if (host == null || host.isEmpty()) {
            // no host name => no punycoded domain name
            // also do not add additional slashes for file: URLs (NUTCH-1880)
            return url;
        }
        URI p = new URI(u.getProtocol(), u.getUserInfo(), IDN.toASCII(host),
                u.getPort(), u.getPath(), u.getQuery(), u.getRef());

        return p.toString();
    } catch (Exception e) {
        return null;
    }
}
 
Example #5
Source File: SocksCmdRequestTest.java    From netty-4.1.22 with Apache License 2.0 6 votes vote down vote up
@Test
public void testIDNEncodeToAsciiForDomain() {
    String host = "тест.рф";
    String asciiHost = IDN.toASCII(host);
    short port = 10000;

    SocksCmdRequest rq = new SocksCmdRequest(SocksCmdType.BIND, SocksAddressType.DOMAIN, host, port);
    assertEquals(host, rq.host());

    ByteBuf buffer = Unpooled.buffer(24);
    rq.encodeAsByteBuf(buffer);

    buffer.resetReaderIndex();
    assertEquals(SocksProtocolVersion.SOCKS5.byteValue(), buffer.readByte());
    assertEquals(SocksCmdType.BIND.byteValue(), buffer.readByte());
    assertEquals((byte) 0x00, buffer.readByte());
    assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
    assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
    assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII));
    assertEquals(port, buffer.readUnsignedShort());

    buffer.release();
}
 
Example #6
Source File: DnsNameResolverContext.java    From netty-4.1.22 with Apache License 2.0 6 votes vote down vote up
private InetAddress parseAddress(DnsRecord r, String name) {
    if (!(r instanceof DnsRawRecord)) {
        return null;
    }
    final ByteBuf content = ((ByteBufHolder) r).content();
    final int contentLen = content.readableBytes();
    if (contentLen != INADDRSZ4 && contentLen != INADDRSZ6) {
        return null;
    }

    final byte[] addrBytes = new byte[contentLen];
    content.getBytes(content.readerIndex(), addrBytes);

    try {
        return InetAddress.getByAddress(
                parent.isDecodeIdn() ? IDN.toUnicode(name) : name, addrBytes);
    } catch (UnknownHostException e) {
        // Should never reach here.
        throw new Error(e);
    }
}
 
Example #7
Source File: Util.java    From jus with Apache License 2.0 6 votes vote down vote up
/**
 * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts
 * {@code ☃.net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}.
 * {@code null} will be returned if the input cannot be ToASCII encoded or if the result
 * contains unsupported ASCII characters.
 */
public static String domainToAscii(String input) {
  try {
    String result = IDN.toASCII(input).toLowerCase(Locale.US);
    if (result.isEmpty()) return null;

    // Confirm that the IDN ToASCII result doesn't contain any illegal characters.
    if (containsInvalidHostnameAsciiCodes(result)) {
      return null;
    }
    // TODO: implement all label limits.
    return result;
  } catch (IllegalArgumentException e) {
    return null;
  }
}
 
Example #8
Source File: IDNTest.java    From j2objc with Apache License 2.0 6 votes vote down vote up
/**
 * {@link java.net.IDN#toUnicode(String)}
 * @since 1.6
 */
public void test_ToUnicode_LString() {
    try {
        IDN.toUnicode(null);
        fail("should throw NullPointerException");
    } catch (NullPointerException e) {
        // expected
    }

    assertEquals("", IDN.toUnicode(""));
    assertEquals("www.bcher.de", IDN.toUnicode("www.bcher.de"));
    assertEquals("www.b\u00FCcher.de", IDN.toUnicode("www.b\u00FCcher.de"));
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN
            .toUnicode("www.\u65E5\u672C\u5E73.jp"));
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www\uFF0Exn--gwtq9nb2a\uFF61jp"));
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode("www.xn--gwtq9nb2a.jp"));
}
 
Example #9
Source File: CronetEngineBuilderImpl.java    From cronet with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Checks whether a given string represents a valid host name for PKP and converts it
 * to ASCII Compatible Encoding representation according to RFC 1122, RFC 1123 and
 * RFC 3490. This method is more restrictive than required by RFC 7469. Thus, a host
 * that contains digits and the dot character only is considered invalid.
 *
 * Note: Currently Cronet doesn't have native implementation of host name validation that
 *       can be used. There is code that parses a provided URL but doesn't ensure its
 *       correctness. The implementation relies on {@code getaddrinfo} function.
 *
 * @param hostName host name to check and convert.
 * @return true if the string is a valid host name.
 * @throws IllegalArgumentException if the the given string does not represent a valid
 *                                  hostname.
 */
private static String validateHostNameForPinningAndConvert(String hostName)
        throws IllegalArgumentException {
    if (INVALID_PKP_HOST_NAME.matcher(hostName).matches()) {
        throw new IllegalArgumentException("Hostname " + hostName + " is illegal."
                + " A hostname should not consist of digits and/or dots only.");
    }
    // Workaround for crash, see crbug.com/634914
    if (hostName.length() > 255) {
        throw new IllegalArgumentException("Hostname " + hostName + " is too long."
                + " The name of the host does not comply with RFC 1122 and RFC 1123.");
    }
    try {
        return IDN.toASCII(hostName, IDN.USE_STD3_ASCII_RULES);
    } catch (IllegalArgumentException ex) {
        throw new IllegalArgumentException("Hostname " + hostName + " is illegal."
                + " The name of the host does not comply with RFC 1122 and RFC 1123.");
    }
}
 
Example #10
Source File: HttpClient.java    From Dashchan with Apache License 2.0 6 votes vote down vote up
URL encodeUri(Uri uri) throws MalformedURLException {
	StringBuilder uriStringBuilder = new StringBuilder();
	uriStringBuilder.append(uri.getScheme()).append("://");
	String host = IDN.toASCII(uri.getHost());
	uriStringBuilder.append(host);
	int port = uri.getPort();
	if (port != -1) {
		uriStringBuilder.append(':').append(port);
	}
	String path = uri.getEncodedPath();
	if (!StringUtils.isEmpty(path)) {
		encodeUriAppend(uriStringBuilder, path);
	}
	String query = uri.getEncodedQuery();
	if (!StringUtils.isEmpty(query)) {
		uriStringBuilder.append('?');
		encodeUriAppend(uriStringBuilder, query);
	}
	return new URL(uriStringBuilder.toString());
}
 
Example #11
Source File: PunycodeURLNormalizer.java    From news-crawl with Apache License 2.0 6 votes vote down vote up
@Override
public String filter(URL sourceUrl, Metadata sourceMetadata,
        String urlToFilter) {
    try {
        URL url = new URL(urlToFilter);
        String hostName = url.getHost();
        if (isAscii(hostName)) {
            return urlToFilter;
        }
        hostName = IDN.toASCII(url.getHost());
        if (hostName.equals(url.getHost())) {
            return urlToFilter;
        }
        urlToFilter = new URL(url.getProtocol(), hostName, url.getPort(),
                url.getFile()).toString();
    } catch (MalformedURLException e) {
        return null;
    }
    return urlToFilter;
}
 
Example #12
Source File: IDNTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
/**
 * {@link java.net.IDN#toASCII(String)}
 * @since 1.6
 */
public void test_ToASCII_LString() {
    try {
        IDN.toASCII(null);
        fail("should throw NullPointerException");
    } catch (NullPointerException e) {
        // expected
    }

    assertEquals("www.xn--gwtq9nb2a.jp", IDN
            .toASCII("www.\u65E5\u672C\u5E73.jp"));
    assertEquals(
            "www.xn--vckk7bxa0eza9ezc9d.com",
            IDN
                    .toASCII("www.\u30CF\u30F3\u30C9\u30DC\u30FC\u30EB\u30B5\u30E0\u30BA.com"));
    assertEquals("www.xn--frgbolaget-q5a.nu", IDN
            .toASCII("www.f\u00E4rgbolaget.nu"));
    assertEquals("www.xn--bcher-kva.de", IDN.toASCII("www.b\u00FCcher.de"));
    assertEquals("www.xn--brndendekrlighed-vobh.com", IDN
            .toASCII("www.br\u00E6ndendek\u00E6rlighed.com"));
    assertEquals("www.xn--rksmrgs-5wao1o.se", IDN
            .toASCII("www.r\u00E4ksm\u00F6rg\u00E5s.se"));
    assertEquals("www.xn--9d0bm53a3xbzui.com", IDN
            .toASCII("www.\uC608\uBE44\uAD50\uC0AC.com"));
    assertEquals("xn--lck1c3crb1723bpq4a.com", IDN
            .toASCII("\u7406\u5BB9\u30CA\u30AB\u30E0\u30E9.com"));
    assertEquals("xn--l8je6s7a45b.org", IDN
            .toASCII("\u3042\u30FC\u308B\u3044\u3093.org"));
    assertEquals("www.xn--frjestadsbk-l8a.net", IDN
            .toASCII("www.f\u00E4rjestadsbk.net"));
    assertEquals("www.xn--mkitorppa-v2a.edu", IDN
            .toASCII("www.m\u00E4kitorppa.edu"));
}
 
Example #13
Source File: BasicURLNormalizer.java    From crawler-commons with Apache License 2.0 5 votes vote down vote up
private String normalizeHostName(String host) throws IllegalArgumentException, IndexOutOfBoundsException {

        /* 1. lowercase host name */
        host = host.toLowerCase(Locale.ROOT);

        /*
         * 2. convert between Unicode and ASCII forms for Internationalized
         * Domain Names (IDNs)
         */
        if (!isAscii(host)) {
            /*
             * IllegalArgumentException: thrown if the input string contains
             * non-convertible Unicode codepoints
             * 
             * IndexOutOfBoundsException: thrown (undocumented) if one "label"
             * (non-ASCII dot-separated segment) is longer than 256 characters,
             * cf. https://bugs.openjdk.java.net/browse/JDK-6806873
             */
            host = IDN.toASCII(host);
        }

        /* 3. trim a trailing dot */
        if (host.endsWith(".")) {
            host = host.substring(0, host.length() - 1);
        }

        return host;
    }
 
Example #14
Source File: DomainValidator.java    From json-schema-validator with Apache License 2.0 5 votes vote down vote up
/**
 * Converts potentially Unicode input to punycode.
 * If conversion fails, returns the original input.
 *
 * @param input the string to convert, not null
 * @return converted input, or original input if conversion fails
 */
// Needed by UrlValidator
static String unicodeToASCII(String input) {
    if (isOnlyASCII(input)) { // skip possibly expensive processing
        return input;
    }
    try {
        final String ascii = IDN.toASCII(input);
        if (IDNBUGHOLDER.IDN_TOASCII_PRESERVES_TRAILING_DOTS) {
            return ascii;
        }
        final int length = input.length();
        if (length == 0) {// check there is a last character
            return input;
        }
        // RFC3490 3.1. 1)
        //            Whenever dots are used as label separators, the following
        //            characters MUST be recognized as dots: U+002E (full stop), U+3002
        //            (ideographic full stop), U+FF0E (fullwidth full stop), U+FF61
        //            (halfwidth ideographic full stop).
        char lastChar = input.charAt(length - 1);// fetch original last char
        switch (lastChar) {
            case '\u002E': // "." full stop
            case '\u3002': // ideographic full stop
            case '\uFF0E': // fullwidth full stop
            case '\uFF61': // halfwidth ideographic full stop
                return ascii + "."; // restore the missing stop
            default:
                return ascii;
        }
    } catch (IllegalArgumentException e) { // input is not valid
        return input;
    }
}
 
Example #15
Source File: SNIHostName.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Attempts to match the given {@link SNIServerName}.
 *
 * @param  serverName
 *         the {@link SNIServerName} instance on which this matcher
 *         performs match operations
 *
 * @return {@code true} if, and only if, the matcher matches the
 *         given {@code serverName}
 *
 * @throws NullPointerException if {@code serverName} is {@code null}
 * @throws IllegalArgumentException if {@code serverName} is
 *         not of {@code StandardConstants#SNI_HOST_NAME} type
 *
 * @see SNIServerName
 */
@Override
public boolean matches(SNIServerName serverName) {
    if (serverName == null) {
        throw new NullPointerException(
            "The SNIServerName argument cannot be null");
    }

    SNIHostName hostname;
    if (!(serverName instanceof SNIHostName)) {
        if (serverName.getType() != StandardConstants.SNI_HOST_NAME) {
            throw new IllegalArgumentException(
                "The server name type is not host_name");
        }

        try {
            hostname = new SNIHostName(serverName.getEncoded());
        } catch (NullPointerException | IllegalArgumentException e) {
            return false;
        }
    } else {
        hostname = (SNIHostName)serverName;
    }

    // Let's first try the ascii name matching
    String asciiName = hostname.getAsciiName();
    if (pattern.matcher(asciiName).matches()) {
        return true;
    }

    // May be an internationalized domain name, check the Unicode
    // representations.
    return pattern.matcher(IDN.toUnicode(asciiName)).matches();
}
 
Example #16
Source File: DnsMessage.java    From android-netdiag with MIT License 5 votes vote down vote up
private static void writeDomain(OutputStream out, String domain) throws IOException {
    for (String s : domain.split("[.\u3002\uFF0E\uFF61]")) {
        byte[] buffer = IDN.toASCII(s).getBytes();
        out.write(buffer.length);
        out.write(buffer, 0, buffer.length); // ?
    }
    out.write(0);
}
 
Example #17
Source File: SocksCmdResponse.java    From netty4.0.27Learn with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs new response and includes provided host and port as part of it.
 *
 * @param cmdStatus status of the response
 * @param addressType type of host parameter
 * @param host host (BND.ADDR field) is address that server used when connecting to the target host.
 *             When null a value of 4/8 0x00 octets will be used for IPv4/IPv6 and a single 0x00 byte will be
 *             used for domain addressType. Value is converted to ASCII using {@link IDN#toASCII(String)}.
 * @param port port (BND.PORT field) that the server assigned to connect to the target host
 * @throws NullPointerException in case cmdStatus or addressType are missing
 * @throws IllegalArgumentException in case host or port cannot be validated
 * @see IDN#toASCII(String)
 */
public SocksCmdResponse(SocksCmdStatus cmdStatus, SocksAddressType addressType, String host, int port) {
    super(SocksResponseType.CMD);
    if (cmdStatus == null) {
        throw new NullPointerException("cmdStatus");
    }
    if (addressType == null) {
        throw new NullPointerException("addressType");
    }
    if (host != null) {
        switch (addressType) {
            case IPv4:
                if (!NetUtil.isValidIpV4Address(host)) {
                    throw new IllegalArgumentException(host + " is not a valid IPv4 address");
                }
                break;
            case DOMAIN:
                if (IDN.toASCII(host).length() > 255) {
                    throw new IllegalArgumentException(host + " IDN: " +
                            IDN.toASCII(host) + " exceeds 255 char limit");
                }
                break;
            case IPv6:
                if (!NetUtil.isValidIpV6Address(host)) {
                    throw new IllegalArgumentException(host + " is not a valid IPv6 address");
                }
                break;
            case UNKNOWN:
                break;
        }
        host = IDN.toASCII(host);
    }
    if (port < 0 || port > 65535) {
        throw new IllegalArgumentException(port + " is not in bounds 0 <= x <= 65535");
    }
    this.cmdStatus = cmdStatus;
    this.addressType = addressType;
    this.host = host;
    this.port = port;
}
 
Example #18
Source File: HostnameChecker.java    From Bytecoder with Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if name matches against template.<p>
 *
 * The matching is performed as per RFC 2818 rules for TLS and
 * RFC 2830 rules for LDAP.<p>
 *
 * The <code>name</code> parameter should represent a DNS name.  The
 * <code>template</code> parameter may contain the wildcard character '*'.
 */
private boolean isMatched(String name, String template,
                          boolean chainsToPublicCA) {

    // Normalize to Unicode, because PSL is in Unicode.
    try {
        name = IDN.toUnicode(IDN.toASCII(name));
        template = IDN.toUnicode(IDN.toASCII(template));
    } catch (RuntimeException re) {
        if (SSLLogger.isOn) {
            SSLLogger.fine("Failed to normalize to Unicode: " + re);
        }

        return false;
    }

    if (hasIllegalWildcard(template, chainsToPublicCA)) {
        return false;
    }

    // check the validity of the domain name template.
    try {
        // Replacing wildcard character '*' with 'z' so as to check
        // the domain name template validity.
        //
        // Using the checking implemented in SNIHostName
        new SNIHostName(template.replace('*', 'z'));
    } catch (IllegalArgumentException iae) {
        // It would be nice to add debug log if not matching.
        return false;
    }

    if (checkType == TYPE_TLS) {
        return matchAllWildcards(name, template);
    } else if (checkType == TYPE_LDAP) {
        return matchLeftmostWildcard(name, template);
    } else {
        return false;
    }
}
 
Example #19
Source File: IDNTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
/**
 * {@link java.net.IDN#toUnicode(String, int)}
 * @since 1.6
 */
public void test_ToUnicode_LString_I() {
    assertEquals("", IDN.toUnicode("", IDN.ALLOW_UNASSIGNED));
    assertEquals("www.f\u00E4rgbolaget.nu", IDN.toUnicode(
            "www.f\u00E4rgbolaget.nu", IDN.USE_STD3_ASCII_RULES));
    assertEquals("www.r\u00E4ksm\u00F6rg\u00E5s.nu", IDN.toUnicode(
            "www.r\u00E4ksm\u00F6rg\u00E5s\u3002nu",
            IDN.USE_STD3_ASCII_RULES));
    // RI bug. It cannot parse "www.xn--gwtq9nb2a.jp" when
    // USE_STD3_ASCII_RULES is set.
    assertEquals("www.\u65E5\u672C\u5E73.jp", IDN.toUnicode(
            "www\uFF0Exn--gwtq9nb2a\uFF61jp", IDN.USE_STD3_ASCII_RULES));

}
 
Example #20
Source File: PunycodeConverter.java    From cyberduck with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @return IDN normalized hostname
 */
public String convert(final String hostname) {
    if(!PreferencesFactory.get().getBoolean("connection.hostname.idn")) {
        return StringUtils.strip(hostname);
    }
    if(StringUtils.isNotEmpty(hostname)) {
        try {
            // Convenience function that implements the IDNToASCII operation as defined in
            // the IDNA RFC. This operation is done on complete domain names, e.g: "www.example.com".
            // It is important to note that this operation can fail. If it fails, then the input
            // domain name cannot be used as an Internationalized Domain Name and the application
            // should have methods defined to deal with the failure.
            // IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
            // and do not use STD3 ASCII rules If unassigned code points are found
            // the operation fails with ParseException
            final String idn = IDN.toASCII(StringUtils.strip(hostname));
            if(log.isDebugEnabled()) {
                if(!StringUtils.equals(StringUtils.strip(hostname), idn)) {
                    log.debug(String.format("IDN hostname for %s is %s", hostname, idn));
                }
            }
            if(StringUtils.isNotEmpty(idn)) {
                return idn;
            }
        }
        catch(IllegalArgumentException e) {
            log.warn(String.format("Failed to convert hostname %s to IDNA", hostname), e);
        }
    }
    return StringUtils.strip(hostname);
}
 
Example #21
Source File: SNIHostName.java    From j2objc with Apache License 2.0 5 votes vote down vote up
/**
 * Attempts to match the given {@link SNIServerName}.
 *
 * @param  serverName
 *         the {@link SNIServerName} instance on which this matcher
 *         performs match operations
 *
 * @return {@code true} if, and only if, the matcher matches the
 *         given {@code serverName}
 *
 * @throws NullPointerException if {@code serverName} is {@code null}
 * @throws IllegalArgumentException if {@code serverName} is
 *         not of {@code StandardConstants#SNI_HOST_NAME} type
 *
 * @see SNIServerName
 */
@Override
public boolean matches(SNIServerName serverName) {
    if (serverName == null) {
        throw new NullPointerException(
            "The SNIServerName argument cannot be null");
    }

    SNIHostName hostname;
    if (!(serverName instanceof SNIHostName)) {
        if (serverName.getType() != StandardConstants.SNI_HOST_NAME) {
            throw new IllegalArgumentException(
                "The server name type is not host_name");
        }

        try {
            hostname = new SNIHostName(serverName.getEncoded());
        } catch (NullPointerException | IllegalArgumentException e) {
            return false;
        }
    } else {
        hostname = (SNIHostName)serverName;
    }

    // Let's first try the ascii name matching
    String asciiName = hostname.getAsciiName();
    if (pattern.matcher(asciiName).matches()) {
        return true;
    }

    // May be an internationalized domain name, check the Unicode
    // representations.
    return pattern.matcher(IDN.toUnicode(asciiName)).matches();
}
 
Example #22
Source File: DomainValidator.java    From TrustKit-Android with MIT License 5 votes vote down vote up
/**
 * Converts potentially Unicode input to punycode.
 * If conversion fails, returns the original input.
 *
 * @param input the string to convert, not null
 * @return converted input, or original input if conversion fails
 */
// Needed by UrlValidator
static String unicodeToASCII(String input) {
    if (isOnlyASCII(input)) { // skip possibly expensive processing
        return input;
    }
    try {
        final String ascii = IDN.toASCII(input);
        if (IDNBUGHOLDER.IDN_TOASCII_PRESERVES_TRAILING_DOTS) {
            return ascii;
        }
        final int length = input.length();
        if (length == 0) {// check there is a last character
            return input;
        }
        // RFC3490 3.1. 1)
        //            Whenever dots are used as label separators, the following
        //            characters MUST be recognized as dots: U+002E (full stop), U+3002
        //            (ideographic full stop), U+FF0E (fullwidth full stop), U+FF61
        //            (halfwidth ideographic full stop).
        char lastChar = input.charAt(length-1);// fetch original last char
        switch(lastChar) {
            case '\u002E': // "." full stop
            case '\u3002': // ideographic full stop
            case '\uFF0E': // fullwidth full stop
            case '\uFF61': // halfwidth ideographic full stop
                return ascii + "."; // restore the missing stop
            default:
                return ascii;
        }
    } catch (IllegalArgumentException e) { // input is not valid
        return input;
    }
}
 
Example #23
Source File: ParsedIRI.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns the content of this IRI as a US-ASCII string.
 * <p>
 * If this IRI only contains 8bit characters then an invocation of this method will return the same value as an
 * invocation of the {@link #toString() toString} method. Otherwise this method works as if by encoding the host via
 * <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a> and all other components by percent encoding their
 * UTF-8 values.
 * </p>
 *
 * @return The string form of this IRI, encoded as needed so that it only contains characters in the US-ASCII
 *         charset
 */
public String toASCIIString() {
	StringBuilder sb = new StringBuilder(iri.length());
	if (scheme != null) {
		sb.append(scheme).append(':');
	}
	if (host != null) {
		sb.append("//");
		if (userInfo != null) {
			appendAscii(sb, userInfo);
			sb.append('@');
		}
		if (host.length() > 0) {
			sb.append(IDN.toASCII(host, IDN.ALLOW_UNASSIGNED));
		}
		if (port >= 0) {
			sb.append(':').append(port);
		}
	}
	if (path != null) {
		appendAscii(sb, path);
	}
	if (query != null) {
		sb.append('?');
		appendAscii(sb, query);
	}
	if (fragment != null) {
		sb.append('#');
		appendAscii(sb, fragment);
	}
	return sb.toString();
}
 
Example #24
Source File: SNIHostName.java    From openjdk-jdk8u-backup with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Attempts to match the given {@link SNIServerName}.
 *
 * @param  serverName
 *         the {@link SNIServerName} instance on which this matcher
 *         performs match operations
 *
 * @return {@code true} if, and only if, the matcher matches the
 *         given {@code serverName}
 *
 * @throws NullPointerException if {@code serverName} is {@code null}
 * @throws IllegalArgumentException if {@code serverName} is
 *         not of {@code StandardConstants#SNI_HOST_NAME} type
 *
 * @see SNIServerName
 */
@Override
public boolean matches(SNIServerName serverName) {
    if (serverName == null) {
        throw new NullPointerException(
            "The SNIServerName argument cannot be null");
    }

    SNIHostName hostname;
    if (!(serverName instanceof SNIHostName)) {
        if (serverName.getType() != StandardConstants.SNI_HOST_NAME) {
            throw new IllegalArgumentException(
                "The server name type is not host_name");
        }

        try {
            hostname = new SNIHostName(serverName.getEncoded());
        } catch (NullPointerException | IllegalArgumentException e) {
            return false;
        }
    } else {
        hostname = (SNIHostName)serverName;
    }

    // Let's first try the ascii name matching
    String asciiName = hostname.getAsciiName();
    if (pattern.matcher(asciiName).matches()) {
        return true;
    }

    // May be an internationalized domain name, check the Unicode
    // representations.
    return pattern.matcher(IDN.toUnicode(asciiName)).matches();
}
 
Example #25
Source File: SNIHostName.java    From Bytecoder with Apache License 2.0 5 votes vote down vote up
/**
 * Attempts to match the given {@link SNIServerName}.
 *
 * @param  serverName
 *         the {@link SNIServerName} instance on which this matcher
 *         performs match operations
 *
 * @return {@code true} if, and only if, the matcher matches the
 *         given {@code serverName}
 *
 * @throws NullPointerException if {@code serverName} is {@code null}
 * @throws IllegalArgumentException if {@code serverName} is
 *         not of {@code StandardConstants#SNI_HOST_NAME} type
 *
 * @see SNIServerName
 */
@Override
public boolean matches(SNIServerName serverName) {
    if (serverName == null) {
        throw new NullPointerException(
            "The SNIServerName argument cannot be null");
    }

    SNIHostName hostname;
    if (!(serverName instanceof SNIHostName)) {
        if (serverName.getType() != StandardConstants.SNI_HOST_NAME) {
            throw new IllegalArgumentException(
                "The server name type is not host_name");
        }

        try {
            hostname = new SNIHostName(serverName.getEncoded());
        } catch (NullPointerException | IllegalArgumentException e) {
            return false;
        }
    } else {
        hostname = (SNIHostName)serverName;
    }

    // Let's first try the ascii name matching
    String asciiName = hostname.getAsciiName();
    if (pattern.matcher(asciiName).matches()) {
        return true;
    }

    // May be an internationalized domain name, check the Unicode
    // representations.
    return pattern.matcher(IDN.toUnicode(asciiName)).matches();
}
 
Example #26
Source File: GenerateTLDLists.java    From ache with Apache License 2.0 5 votes vote down vote up
private String normalizeTld(final String tld) {
    int lastChar = tld.length() - 1;
    if (!(
            (tld.charAt(0) == '.') ||
                    (tld.charAt(0) == '\u200F' && tld.charAt(1) == '.'
                            && tld.charAt(lastChar) == '\u200E')
    )
            ) {
        System.err.printf("WARN: Found a TLD without leading dot: [%s]."
                + " The HTML template may have changed.\n", tld);
    }
    String normalized = null;
    if (tld.charAt(0) == '\u200F' && tld.charAt(1) == '.' && tld.charAt(lastChar) == '\u200E') {
        normalized = tld.substring(2, tld.length() - 1);
    }
    if (tld.charAt(0) == '.') {
        normalized = tld.substring(1);
    }
    try {
        normalized = IDN.toASCII(normalized);
    } catch (Exception e) {
        System.err.printf("WARN: Failed to convert normalized string [%s]"
                + " from TLD [%s] to punnycode.\n", normalized, tld);
        return null;
    }
    return normalized;
}
 
Example #27
Source File: SNIHostName.java    From openjdk-8 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Attempts to match the given {@link SNIServerName}.
 *
 * @param  serverName
 *         the {@link SNIServerName} instance on which this matcher
 *         performs match operations
 *
 * @return {@code true} if, and only if, the matcher matches the
 *         given {@code serverName}
 *
 * @throws NullPointerException if {@code serverName} is {@code null}
 * @throws IllegalArgumentException if {@code serverName} is
 *         not of {@code StandardConstants#SNI_HOST_NAME} type
 *
 * @see SNIServerName
 */
@Override
public boolean matches(SNIServerName serverName) {
    if (serverName == null) {
        throw new NullPointerException(
            "The SNIServerName argument cannot be null");
    }

    SNIHostName hostname;
    if (!(serverName instanceof SNIHostName)) {
        if (serverName.getType() != StandardConstants.SNI_HOST_NAME) {
            throw new IllegalArgumentException(
                "The server name type is not host_name");
        }

        try {
            hostname = new SNIHostName(serverName.getEncoded());
        } catch (NullPointerException | IllegalArgumentException e) {
            return false;
        }
    } else {
        hostname = (SNIHostName)serverName;
    }

    // Let's first try the ascii name matching
    String asciiName = hostname.getAsciiName();
    if (pattern.matcher(asciiName).matches()) {
        return true;
    }

    // May be an internationalized domain name, check the Unicode
    // representations.
    return pattern.matcher(IDN.toUnicode(asciiName)).matches();
}
 
Example #28
Source File: VirtualHost.java    From armeria with Apache License 2.0 5 votes vote down vote up
/**
 * IDNA ASCII conversion, case normalization and validation.
 */
static String normalizeHostnamePattern(String hostnamePattern) {
    requireNonNull(hostnamePattern, "hostnamePattern");
    if (needsNormalization(hostnamePattern)) {
        hostnamePattern = IDN.toASCII(hostnamePattern, IDN.ALLOW_UNASSIGNED);
    }

    if (!"*".equals(hostnamePattern) &&
        !HOSTNAME_PATTERN.matcher(hostnamePattern.startsWith("*.") ? hostnamePattern.substring(2)
                                                                   : hostnamePattern).matches()) {
        throw new IllegalArgumentException("hostnamePattern: " + hostnamePattern);
    }

    return Ascii.toLowerCase(hostnamePattern);
}
 
Example #29
Source File: PublicSuffixDatabase.java    From styT with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list.
 * Returns null if the domain is a public suffix.
 *
 * <p>Here are some examples: <pre>{@code
 * assertEquals("google.com", getEffectiveTldPlusOne("google.com"));
 * assertEquals("google.com", getEffectiveTldPlusOne("www.google.com"));
 * assertNull(getEffectiveTldPlusOne("com"));
 * }</pre>
 *
 * @param domain A canonicalized domain. An International Domain Name (IDN) should be punycode
 *    encoded.
 */
public String getEffectiveTldPlusOne(String domain) {
  if (domain == null) throw new NullPointerException("domain == null");

  // We use UTF-8 in the list so we need to convert to Unicode.
  String unicodeDomain = IDN.toUnicode(domain);
  String[] domainLabels = unicodeDomain.split("\\.");
  String[] rule = findMatchingRule(domainLabels);
  if (domainLabels.length == rule.length && rule[0].charAt(0) != EXCEPTION_MARKER) {
    // The domain is a public suffix.
    return null;
  }

  int firstLabelOffset;
  if (rule[0].charAt(0) == EXCEPTION_MARKER) {
    // Exception rules hold the effective TLD plus one.
    firstLabelOffset = domainLabels.length - rule.length;
  } else {
    // Otherwise the rule is for a public suffix, so we must take one more label.
    firstLabelOffset = domainLabels.length - (rule.length + 1);
  }

  StringBuilder effectiveTldPlusOne = new StringBuilder();
  String[] punycodeLabels = domain.split("\\.");
  for (int i = firstLabelOffset; i < punycodeLabels.length; i++) {
    effectiveTldPlusOne.append(punycodeLabels[i]).append('.');
  }
  effectiveTldPlusOne.deleteCharAt(effectiveTldPlusOne.length() - 1);

  return effectiveTldPlusOne.toString();
}
 
Example #30
Source File: NameHash.java    From etherscan-explorer with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Normalise ENS name as per the
 * <a href="http://docs.ens.domains/en/latest/implementers.html#normalising-and-validating-names">specification</a>.
 *
 * @param ensName our user input ENS name
 * @return normalised ens name
 * @throws EnsResolutionException if the name cannot be normalised
 */
static String normalise(String ensName) {
    try {
        return IDN.toASCII(ensName, IDN.USE_STD3_ASCII_RULES)
                .toLowerCase();
    } catch (IllegalArgumentException e) {
        throw new EnsResolutionException("Invalid ENS name provided: " + ensName);
    }
}