Java Code Examples for org.apache.commons.codec.net.URLCodec#encodeUrl()

The following examples show how to use org.apache.commons.codec.net.URLCodec#encodeUrl() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestURIBuilder.java    From davmail with GNU General Public License v2.0 4 votes vote down vote up
public void testEncodeSpecial() {
    BitSet ical_allowed_abs_path = new BitSet(256);

    ical_allowed_abs_path.or(org.apache.commons.httpclient.URI.allowed_abs_path);
    ical_allowed_abs_path.clear('@');

    String path = "[email protected]";
    String encoded = URIUtil.encode(path, ical_allowed_abs_path);

    System.out.println(encoded);

    String newEncoded = new String(URLCodec.encodeUrl(ical_allowed_abs_path, path.getBytes(Consts.UTF_8)), Consts.UTF_8);
    System.out.println(newEncoded);

    assertEquals(newEncoded, encoded);
}
 
Example 2
Source File: HttpUriDissector.java    From logparser with Apache License 2.0 4 votes vote down vote up
@Override
public void dissect(final Parsable<?> parsable, final String inputname) throws DissectionFailure {
    final ParsedField field = parsable.getParsableField(INPUT_TYPE, inputname);

    String uriString = field.getValue().getString();
    if (uriString == null || uriString.isEmpty()) {
        return; // Nothing to do here
    }

    // First we cleanup the URI so we fail less often over 'garbage' URIs.
    // See: https://stackoverflow.com/questions/11038967/brackets-in-a-request-url-are-legal-but-not-in-a-uri-java
    uriString = new String(URLCodec.encodeUrl(BAD_URI_CHARS, uriString.getBytes(UTF_8)), US_ASCII);

    // Before we hand it to the standard parser we hack it around a bit so we can parse
    // nasty edge cases that are illegal yet do occur in real clickstreams.
    // Also we force the query string to start with ?& so the returned query string starts with &
    // Which leads to more consistent output after parsing.
    int firstQuestionMark = uriString.indexOf('?');
    int firstAmpersand = uriString.indexOf('&');
    // Now we can have one of 3 situations:
    // 1) No query string
    // 2) Query string starts with a '?'
    //      (and optionally followed by one or more '&' or '?' )
    // 3) Query string starts with a '&'. This is invalid but does occur!
    // We may have ?x=x&y=y?z=z so we normalize it always
    // to:  ?&x=x&y=y&z=z
    if (firstAmpersand != -1 || firstQuestionMark != -1) {
        uriString = uriString.replaceAll("\\?", "&");
        uriString = uriString.replaceFirst("&", "?&");
    }

    // We find that people muck up the URL by putting % signs in the URLs that are NOT escape sequences
    // So any % that is not followed by a two 'hex' letters is fixed
    uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");
    uriString = BAD_EXCAPE_PATTERN.matcher(uriString).replaceAll("%25$1");

    // We have URIs with fragments like this:
    //    /path/?_requestid=1234#x3D;12341234&Referrer&#x3D;blablabla
    // So first we repair the broken encoded char
    uriString = ALMOST_HTML_ENCODED.matcher(uriString).replaceAll("$1&$2");
    uriString = StringEscapeUtils.unescapeHtml4(uriString);
    // And we see URIs with this:
    //    /path/?Referrer=ADV1234#&f=API&subid=#&name=12341234
    uriString = EQUALS_HASH_PATTERN.matcher(uriString).replaceAll("=");
    uriString = HASH_AMP_PATTERN.matcher(uriString).replaceAll("&");

    // If we still have multiple '#' in here we replace them with something else: '~'
    while (true) {
        Matcher doubleHashMatcher = DOUBLE_HASH_PATTERN.matcher(uriString);
        if (!doubleHashMatcher.find()) {
            break;
        }
        uriString = doubleHashMatcher.replaceAll("~$1#");
    }

    boolean isUrl = true;
    URI uri;
    try {
        if (uriString.charAt(0) == '/') {
            uri = URI.create("dummy-protocol://dummy.host.name" + uriString);
            isUrl = false; // I.e. we do not return the values we just faked.
        } else {
            uri = URI.create(uriString);
        }
    } catch (IllegalArgumentException e) {
        throw new DissectionFailure("Failed to parse URI >>" + field.getValue().getString()+"<< because of : " +e.getMessage());
    }

    if (wantQuery || wantPath || wantRef) {
        if (wantQuery) {
            String query = uri.getRawQuery();
            if (query == null) {
                query = "";
            }
            parsable.addDissection(inputname, "HTTP.QUERYSTRING", "query", query);
        }
        if (wantPath) {
            parsable.addDissection(inputname, "HTTP.PATH", "path", uri.getPath());
        }
        if (wantRef) {
            parsable.addDissection(inputname, "HTTP.REF", "ref", uri.getFragment());
        }
    }

    if (isUrl) {
        if (wantProtocol) {
            parsable.addDissection(inputname, "HTTP.PROTOCOL", "protocol", uri.getScheme());
        }
        if (wantUserinfo) {
            parsable.addDissection(inputname, "HTTP.USERINFO", "userinfo", uri.getUserInfo());
        }
        if (wantHost) {
            parsable.addDissection(inputname, "HTTP.HOST", "host", uri.getHost());
        }
        if (wantPort) {
            if (uri.getPort() != -1) {
                parsable.addDissection(inputname, "HTTP.PORT", "port", uri.getPort());
            }
        }
    }
}
 
Example 3
Source File: URIUtil.java    From knopflerfish.org with BSD 3-Clause "New" or "Revised" License 3 votes vote down vote up
/**
 * Escape and encode a given string with allowed characters not to be
 * escaped and a given charset.
 *
 * @param unescaped a string
 * @param allowed allowed characters not to be escaped
 * @param charset the charset
 * @return the escaped string
 */
public static String encode(String unescaped, BitSet allowed,
        String charset) throws URIException {
    byte[] rawdata = URLCodec.encodeUrl(allowed, 
        EncodingUtil.getBytes(unescaped, charset));
    return EncodingUtil.getAsciiString(rawdata);
}
 
Example 4
Source File: URI.java    From knopflerfish.org with BSD 3-Clause "New" or "Revised" License 3 votes vote down vote up
/**
 * Encodes URI string.
 *
 * This is a two mapping, one from original characters to octets, and
 * subsequently a second from octets to URI characters:
 * <p><blockquote><pre>
 *   original character sequence->octet sequence->URI character sequence
 * </pre></blockquote><p>
 *
 * An escaped octet is encoded as a character triplet, consisting of the
 * percent character "%" followed by the two hexadecimal digits
 * representing the octet code. For example, "%20" is the escaped
 * encoding for the US-ASCII space character.
 * <p>
 * Conversion from the local filesystem character set to UTF-8 will
 * normally involve a two step process. First convert the local character
 * set to the UCS; then convert the UCS to UTF-8.
 * The first step in the process can be performed by maintaining a mapping
 * table that includes the local character set code and the corresponding
 * UCS code.
 * The next step is to convert the UCS character code to the UTF-8 encoding.
 * <p>
 * Mapping between vendor codepages can be done in a very similar manner
 * as described above.
 * <p>
 * The only time escape encodings can allowedly be made is when a URI is
 * being created from its component parts.  The escape and validate methods
 * are internally performed within this method.
 *
 * @param original the original character sequence
 * @param allowed those characters that are allowed within a component
 * @param charset the protocol charset
 * @return URI character sequence
 * @throws URIException null component or unsupported character encoding
 */
    
protected static char[] encode(String original, BitSet allowed,
        String charset) throws URIException {
    if (original == null) {
        throw new IllegalArgumentException("Original string may not be null");
    }
    if (allowed == null) {
        throw new IllegalArgumentException("Allowed bitset may not be null");
    }
    byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
    return EncodingUtil.getAsciiString(rawdata).toCharArray();
}
 
Example 5
Source File: CodecUtil.java    From common_gui_tools with Apache License 2.0 3 votes vote down vote up
/**
 * Encode string for URL.
 *
 * @param string  String
 * @param charSet CharSet
 * @return <code>String</code> url string
 * @throws UnsupportedEncodingException unsupported encoding exception
 */
public static String encodeURL(String string, String charSet) throws UnsupportedEncodingException {
    if (string == null) {
        return null;
    }
    return new String(URLCodec.encodeUrl(null, string.getBytes(charSet)), charSet);
}
 
Example 6
Source File: URIUtil.java    From bintray-client-java with Apache License 2.0 3 votes vote down vote up
/**
 * Escape and encode a given string with allowed characters not to be
 * escaped and a given charset.
 *
 * @param unescaped a string
 * @param allowed   allowed characters not to be escaped
 * @param charset   the charset
 * @return the escaped string
 */
public static String encode(String unescaped, BitSet allowed,
                            String charset) throws HttpException {
    byte[] rawdata = URLCodec.encodeUrl(allowed,
            EncodingUtils.getBytes(unescaped, charset));
    return EncodingUtils.getAsciiString(rawdata);
}
 
Example 7
Source File: URI.java    From bintray-client-java with Apache License 2.0 3 votes vote down vote up
/**
 * Encodes URI string.
 * <p/>
 * This is a two mapping, one from original characters to octets, and
 * subsequently a second from octets to URI characters:
 * <p><blockquote><pre>
 *   original character sequence->octet sequence->URI character sequence
 * </pre></blockquote><p>
 * <p/>
 * An escaped octet is encoded as a character triplet, consisting of the
 * percent character "%" followed by the two hexadecimal digits
 * representing the octet code. For example, "%20" is the escaped
 * encoding for the US-ASCII space character.
 * <p/>
 * Conversion from the local filesystem character set to UTF-8 will
 * normally involve a two step process. First convert the local character
 * set to the UCS; then convert the UCS to UTF-8.
 * The first step in the process can be performed by maintaining a mapping
 * table that includes the local character set code and the corresponding
 * UCS code.
 * The next step is to convert the UCS character code to the UTF-8 encoding.
 * <p/>
 * Mapping between vendor codepages can be done in a very similar manner
 * as described above.
 * <p/>
 * The only time escape encodings can allowedly be made is when a URI is
 * being created from its component parts.  The escape and validate methods
 * are internally performed within this method.
 *
 * @param original the original character sequence
 * @param allowed  those characters that are allowed within a component
 * @param charset  the protocol charset
 * @return URI character sequence
 * @throws HttpException null component or unsupported character encoding
 */

protected static char[] encode(String original, BitSet allowed,
                               String charset) throws HttpException {
    if (original == null) {
        throw new IllegalArgumentException("Original string may not be null");
    }
    if (allowed == null) {
        throw new IllegalArgumentException("Allowed bitset may not be null");
    }
    byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtils.getBytes(original, charset));
    return EncodingUtils.getAsciiString(rawdata).toCharArray();
}
 
Example 8
Source File: UrlUtils.java    From htmlunit with Apache License 2.0 2 votes vote down vote up
/**
 * Escapes and encodes the specified string. Based on HttpClient 3.1's <tt>URIUtil.encode()</tt> method.
 *
 * @param unescaped the string to encode
 * @param allowed allowed characters that shouldn't be escaped
 * @param charset the charset to use
 * @return the escaped string
 */
private static String encode(final String unescaped, final BitSet allowed, final Charset charset) {
    final byte[] bytes = unescaped.getBytes(charset);
    final byte[] bytes2 = URLCodec.encodeUrl(allowed, bytes);
    return encodePercentSign(bytes2);
}
 
Example 9
Source File: UrlUtils.java    From HtmlUnit-Android with Apache License 2.0 2 votes vote down vote up
/**
 * Escapes and encodes the specified string. Based on HttpClient 3.1's <tt>URIUtil.encode()</tt> method.
 *
 * @param unescaped the string to encode
 * @param allowed allowed characters that shouldn't be escaped
 * @param charset the charset to use
 * @return the escaped string
 */
private static String encode(final String unescaped, final BitSet allowed, final Charset charset) {
    final byte[] bytes = unescaped.getBytes(charset);
    final byte[] bytes2 = URLCodec.encodeUrl(allowed, bytes);
    return encodePercentSign(bytes2);
}