Java Code Examples for org.jsoup.Connection#proxy()

The following examples show how to use org.jsoup.Connection#proxy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsoupDownloader.java    From vw-crawler with MIT License 6 votes vote down vote up
@Override
public Document downloadPage(PageRequest requestBody) throws IOException {
	if (requestBody.getUrl() == null && requestBody.getUrl().length() <= 0) {
		throw new RuntimeException("PageRequest中的URL不合法");
	}
	Connection connection = Jsoup.connect(requestBody.getUrl());

	if (requestBody.getTimeout() > 0) {
		connection.timeout(requestBody.getTimeout());
	}
	if (requestBody.getProxy() != null) {
		connection.proxy(requestBody.getProxy());
	}
	if (requestBody.getHeader() != null && !requestBody.getHeader().isEmpty()) {
		connection.headers(requestBody.getHeader());
	}

	Document document = connection.execute().parse();
	if (document == null) {
		throw new RuntimeException("页面请求失败,请检查网络、RequestBody或者其他配置参数是否有误");
	}
	return document;
}
 
Example 2
Source File: ShadowSocksCrawlerService.java    From ShadowSocks-Share with Apache License 2.0 5 votes vote down vote up
protected Connection getConnection(String url) {
	@SuppressWarnings("deprecation")
	Connection connection = Jsoup.connect(url)
			.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36")
			// .referrer("https://www.google.com/")
			.ignoreContentType(true)
			.followRedirects(true)
			.ignoreHttpErrors(true)
			.validateTLSCertificates(false)
			.timeout(TIME_OUT);
	if (isProxyEnable())
		connection.proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress(getProxyHost(), getProxyPort())));
	return connection;
}
 
Example 3
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 加载页面
 *
 * @param pageRequest
 *
 * @return Document
 */
public static Document load(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        // 发出请求
        Document html = null;
        if (pageRequest.isIfPost()) {
            html = conn.post();
        } else {
            html = conn.get();
        }
        return html;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}
 
Example 4
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
public static String loadPageSource(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        conn.ignoreContentType(true);
        conn.method(pageRequest.isIfPost()?Connection.Method.POST:Connection.Method.GET);

        // 发出请求
        Connection.Response resp = conn.execute();
        String pageSource = resp.body();
        return pageSource;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}