/** * Copyright (c) 2013-2014 Santiago M. Mola <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package io.mola.galimatias; import com.ibm.icu.text.IDNA; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class Domain extends Host { private static final long serialVersionUID = 2L; private final String domain; private final boolean unicode; private Domain(final String domain, final boolean unicode) { this.domain = domain; this.unicode = unicode; } public static Domain parseDomain(final String input) throws GalimatiasParseException { return parseDomain(input, false); } public List<String> labels() { return Arrays.asList(splitWorker(domain, "\u002E\u3002\uFF0E\uFF61", -1, true)); } public static Domain parseDomain(final String input, final boolean unicode) throws GalimatiasParseException { return parseDomain(URLParsingSettings.create(), input, unicode); } public static Domain parseDomain(final URLParsingSettings settings, final String input, final boolean unicode) throws GalimatiasParseException { if (input == null) { throw new NullPointerException(); } if (input.isEmpty()) { throw new GalimatiasParseException("input is empty"); } final ErrorHandler errorHandler = settings.errorHandler(); // WHATWG says: Let host be the result of running utf-8's decoder on the percent decoding of running utf-8 encode on input. String domain = URLUtils.percentDecode(input); String asciiDomain = URLUtils.domainToASCII(domain, errorHandler); for (int i = 0; i < asciiDomain.length(); i++) { char codePoint = asciiDomain.charAt(i); switch (codePoint) { case 0x0000: case 0x0009: case 0x000A: case 0x000D: case 0x0020: case '#': case '%': case '/': case ':': case '?': case '@': case '[': case '\\': case ']': String message = "Illegal character in domain"; if (codePoint == ' ') { message += ": space is not allowed"; } else if (codePoint == '\t') { message += ": tab is not allowed"; } else if (codePoint == '\n') { message += ": line break is not allowed"; } else if (codePoint == '\r') { message += ": carriage return is not allowed"; } else { message += ": \u201c" + new String(Character.toChars(codePoint)) + "\u201d is not allowed"; } final GalimatiasParseException exception = new GalimatiasParseException(message); errorHandler.fatalError(exception); throw exception; } } if (!unicode) { return new Domain(asciiDomain, unicode); } return new Domain(URLUtils.domainToUnicode(asciiDomain, errorHandler), unicode); } /** * {@inheritDoc} */ @Override public String toString() { return domain; } /** * Converts the domain to a Unicode representation suitable for human interpretation. It does IDNA conversion. * * @see Host#toHumanString() * @return unicode string */ @Override public String toHumanString() { if (unicode) { return domain; } final IDNA.Info idnaInfo = new IDNA.Info(); final StringBuilder idnaOutput = new StringBuilder(); IDNA.getUTS46Instance(IDNA.DEFAULT).nameToUnicode(domain, idnaOutput, idnaInfo); return idnaOutput.toString(); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Domain domain1 = (Domain) o; //if (unicode != domain1.unicode) return false; if (domain != null ? !domain.equals(domain1.domain) : domain1.domain != null) return false; return true; } @Override public int hashCode() { int result = domain != null ? domain.hashCode() : 0; //result = 31 * result + (unicode ? 1 : 0); return result; } /** * Imported from * https://github.com/apache/commons-lang/blob/690dc3c9c4cf8a1875d882ae09741c2e6342ad6b/src/main/java/org/apache/commons/lang3/StringUtils.java * * Performs the logic for the {@code split} and * {@code splitPreserveAllTokens} methods that return a maximum array * length. * * @param str the String to parse, may be {@code null} * @param separatorChars the separate character * @param max the maximum number of elements to include in the * array. A zero or negative value implies no limit. * @param preserveAllTokens if {@code true}, adjacent separators are * treated as empty token separators; if {@code false}, adjacent * separators are treated as one separator. * @return an array of parsed Strings, {@code null} if null String input */ private static String[] splitWorker(final String str, final String separatorChars, final int max, final boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) // Direct code is quicker than StringTokenizer. // Also, StringTokenizer uses isSpace() not isWhitespace() //XXX: This never happens in Domain.parseDomain //if (str == null) { // return null; //} final int len = str.length(); //if (len == 0) { // return EMPTY_STRING_ARRAY; //} final List<String> list = new ArrayList<String>(); int sizePlus1 = 1; int i = 0, start = 0; boolean match = false; boolean lastMatch = false; if (separatorChars == null) { // Null separator means use whitespace while (i < len) { if (Character.isWhitespace(str.charAt(i))) { if (match || preserveAllTokens) { lastMatch = true; if (sizePlus1++ == max) { i = len; lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; } lastMatch = false; match = true; i++; } } else if (separatorChars.length() == 1) { // Optimise 1 character case final char sep = separatorChars.charAt(0); while (i < len) { if (str.charAt(i) == sep) { if (match || preserveAllTokens) { lastMatch = true; if (sizePlus1++ == max) { i = len; lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; } lastMatch = false; match = true; i++; } } else { // standard case while (i < len) { if (separatorChars.indexOf(str.charAt(i)) >= 0) { if (match || preserveAllTokens) { lastMatch = true; if (sizePlus1++ == max) { i = len; lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; } lastMatch = false; match = true; i++; } } if (match || preserveAllTokens && lastMatch) { list.add(str.substring(start, i)); } return list.toArray(new String[list.size()]); } }