Java Code Examples for org.jsoup.Connection#userAgent()

The following examples show how to use org.jsoup.Connection#userAgent() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ArtStationRipper.java    From ripme with MIT License 6 votes vote down vote up
private JSONObject getJson(URL url) throws IOException {
        Connection con = Http.url(url).method(Method.GET).connection();
        con.ignoreHttpErrors(true);
        con.ignoreContentType(true);
        con.userAgent(
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
        con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        con.header("Accept-Language", "en-US,en;q=0.5");
//        con.header("Accept-Encoding", "gzip, deflate, br");
        con.header("Upgrade-Insecure-Requests", "1");
        Response res = con.execute();
        int status = res.statusCode();
        if (status / 100 == 2) {
            String jsonString = res.body();
            return new JSONObject(jsonString);
        }
        throw new IOException("Error fetching json. Status code:" + status);
    }
 
Example 2
Source File: JspToHtml.java    From DWSurvey with GNU Affero General Public License v3.0 5 votes vote down vote up
public void postJspToHtml(String postUrl, String filePath,String fileName) throws Exception{
	HttpServletRequest request=Struts2Utils.getRequest();
	//${pageContext.request.scheme}://${pageContext.request.serverName }:${pageContext.request.serverPort} pageContext.request.contextPath
	String reqTarget = request.getScheme()+"://"+request.getServerName()+(request.getServerPort()==80?"":":"+request.getServerPort())+request.getContextPath();
	reqTarget =reqTarget+"/toHtml";
	//?url="+postUrl+"&filePath="+filePath+"&fileName="+fileName;
	Map<String, String> map=new HashMap<String, String>();
	map.put("url", postUrl);
	map.put("filePath", filePath);
	map.put("fileName", fileName);
	Connection connection = Jsoup.connect(reqTarget);
	connection.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31");
	connection.data(map);
	Document doc=connection.timeout(8000).get();
}
 
Example 3
Source File: ArtStationRipper.java    From ripme with MIT License 4 votes vote down vote up
/**
     * Parses an ArtStation URL.
     * 
     * @param url URL to an ArtStation user profile
     *            (https://www.artstation.com/username) or single project
     *            (https://www.artstation.com/artwork/projectid)
     * @return ParsedURL object containing URL type, JSON location and ID (stores
     *         account name or project hash, depending of the URL type identified)
     * 
     */
    private ParsedURL parseURL(URL url) {
        String htmlSource;
        ParsedURL parsedURL;

        // Load HTML Source of the specified URL
        try {
            // htmlSource = Http.url(url).get().html();
            Connection con = Http.url(url).method(Method.GET).connection();
            con.ignoreHttpErrors(true);
            con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
            con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
            con.header("Accept-Language", "en-US,en;q=0.5");
//            con.header("Accept-Encoding", "gzip, deflate, br");
            con.header("Upgrade-Insecure-Requests", "1");
            Response res = con.execute();
            int status = res.statusCode();

            if (status / 100 == 2) {
                htmlSource = res.parse().html();
            } else if (status == 403 && url.toString().contains("artwork/")) {
                // Catches cloudflare page. Error 403.
                // Usually caused by artwork URLs( arstation.com/artwork/someProjectId)
                String urlId = url.toString().substring(url.toString().lastIndexOf("/") + 1);
                String jsonURL = "https://www.artstation.com/projects/" + urlId + ".json";
                parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT, jsonURL, urlId);
                return parsedURL;
            } else {
                LOGGER.error("Couldnt fetch URL: " + url);
                throw new IOException("Error fetching URL: " + url + " Status Code: " + status);
            }
        } catch (IOException e) {
            htmlSource = "";
        }

        // Check if HTML Source of the specified URL references a project
        Pattern p = Pattern.compile("'/projects/(\\w+)\\.json'");
        Matcher m = p.matcher(htmlSource);
        if (m.find()) {
            parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT,
                    "https://www.artstation.com/projects/" + m.group(1) + ".json", m.group(1));
            return parsedURL;
        }

        // Check if HTML Source of the specified URL references a user profile
        p = Pattern.compile("'/users/([\\w-]+)/quick\\.json'");
        m = p.matcher(htmlSource);
        if (m.find()) {
            parsedURL = new ParsedURL(URL_TYPE.USER_PORTFOLIO,
                    "https://www.artstation.com/users/" + m.group(1) + "/projects.json", m.group(1));
            return parsedURL;
        }

        // HTML Source of the specified URL doesn't reference a user profile or project
        parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
        return parsedURL;
    }
 
Example 4
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 加载页面
 *
 * @param pageRequest
 *
 * @return Document
 */
public static Document load(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        // 发出请求
        Document html = null;
        if (pageRequest.isIfPost()) {
            html = conn.post();
        } else {
            html = conn.get();
        }
        return html;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}
 
Example 5
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
public static String loadPageSource(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        conn.ignoreContentType(true);
        conn.method(pageRequest.isIfPost()?Connection.Method.POST:Connection.Method.GET);

        // 发出请求
        Connection.Response resp = conn.execute();
        String pageSource = resp.body();
        return pageSource;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}
 
Example 6
Source File: HttpConnectionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test public void userAgent() {
    Connection con = HttpConnection.connect("http://example.com/");
    con.userAgent("Mozilla");
    assertEquals("Mozilla", con.request().header("User-Agent"));
}
 
Example 7
Source File: HttpConnectionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test public void userAgent() {
    Connection con = HttpConnection.connect("http://example.com/");
    assertEquals(HttpConnection.DEFAULT_UA, con.request().header("User-Agent"));
    con.userAgent("Mozilla");
    assertEquals("Mozilla", con.request().header("User-Agent"));
}
 
Example 8
Source File: HttpConnectionTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test public void userAgent() {
    Connection con = HttpConnection.connect("http://example.com/");
    assertEquals(HttpConnection.DEFAULT_UA, con.request().header("User-Agent"));
    con.userAgent("Mozilla");
    assertEquals("Mozilla", con.request().header("User-Agent"));
}
 
Example 9
Source File: HttpConnectionTest.java    From jsoup-learning with MIT License 4 votes vote down vote up
@Test public void userAgent() {
    Connection con = HttpConnection.connect("http://example.com/");
    con.userAgent("Mozilla");
    assertEquals("Mozilla", con.request().header("User-Agent"));
}