Java Code Examples for com.gargoylesoftware.htmlunit.WebRequest#setAdditionalHeaders()

The following examples show how to use com.gargoylesoftware.htmlunit.WebRequest#setAdditionalHeaders() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HtmlUnitPageLoader.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
@Override
public Document load(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }

    WebClient webClient = new WebClient();
    try {
        WebRequest webRequest = new WebRequest(new URL(pageRequest.getUrl()));

        // 请求设置
        webClient.getOptions().setUseInsecureSSL(true);
        webClient.getOptions().setJavaScriptEnabled(true);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.getOptions().setDoNotTrackEnabled(false);
        webClient.getOptions().setUseInsecureSSL(!pageRequest.isValidateTLSCertificates());

        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            for (Map.Entry<String, String> paramItem : pageRequest.getParamMap().entrySet()) {
                webRequest.getRequestParameters().add(new NameValuePair(paramItem.getKey(), paramItem.getValue()));
            }
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            webClient.getCookieManager().setCookiesEnabled(true);
            for (Map.Entry<String, String> cookieItem : pageRequest.getCookieMap().entrySet()) {
                webClient.getCookieManager().addCookie(new Cookie("", cookieItem.getKey(), cookieItem.getValue()));
            }
        }
        if (pageRequest.getHeaderMap() != null && !pageRequest.getHeaderMap().isEmpty()) {
            webRequest.setAdditionalHeaders(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent() != null) {
            webRequest.setAdditionalHeader("User-Agent", pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            webRequest.setAdditionalHeader("Referer", pageRequest.getReferrer());
        }

        webClient.getOptions().setTimeout(pageRequest.getTimeoutMillis());
        webClient.setJavaScriptTimeout(pageRequest.getTimeoutMillis());
        webClient.waitForBackgroundJavaScript(pageRequest.getTimeoutMillis());

        // 代理
        if (pageRequest.getProxy() != null) {
            InetSocketAddress address = (InetSocketAddress) pageRequest.getProxy().address();
            boolean isSocks = pageRequest.getProxy().type() == Proxy.Type.SOCKS;
            webClient.getOptions().setProxyConfig(new ProxyConfig(address.getHostName(), address.getPort(), isSocks));
        }

        // 发出请求
        if (pageRequest.isIfPost()) {
            webRequest.setHttpMethod(HttpMethod.POST);
        } else {
            webRequest.setHttpMethod(HttpMethod.GET);
        }
        HtmlPage page = webClient.getPage(webRequest);

        String pageAsXml = page.asXml();
        if (pageAsXml != null) {
            Document html = Jsoup.parse(pageAsXml);
            return html;
        }
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    } finally {
        if (webClient != null) {
            webClient.close();
        }
    }
    return null;
}
 
Example 2
Source File: HtmlUnitDownloder.java    From gecco-htmlunit with MIT License 4 votes vote down vote up
public HttpResponse download(HttpRequest request, int timeout) throws DownloadException {
	try {
		URL url = new URL(request.getUrl());
		WebRequest webRequest = new WebRequest(url);
		webRequest.setHttpMethod(HttpMethod.GET);
		if(request instanceof HttpPostRequest) {//post
			HttpPostRequest post = (HttpPostRequest)request;
			webRequest.setHttpMethod(HttpMethod.POST);
			List<NameValuePair> requestParameters = new ArrayList<NameValuePair>();
			for(Map.Entry<String, Object> entry : post.getFields().entrySet()) {
				NameValuePair nvp = new NameValuePair(entry.getKey(), entry.getValue().toString());
				requestParameters.add(nvp);
			}
			webRequest.setRequestParameters(requestParameters);	
		}
		//header
		boolean isMobile = SpiderThreadLocal.get().getEngine().isMobile();
		webRequest.setAdditionalHeader("User-Agent", UserAgent.getUserAgent(isMobile));
		webRequest.setAdditionalHeaders(request.getHeaders());
		//proxy
		HttpHost proxy = Proxys.getProxy();
		if(proxy != null) {
			webRequest.setProxyHost(proxy.getHostName());
			webRequest.setProxyPort(proxy.getPort());
		}
		//timeout
		this.webClient.getOptions().setTimeout(timeout);
		//request,response
		webClient.getPage(webRequest);
		HtmlPage page = webClient.getPage(request.getUrl());
		HttpResponse resp = new HttpResponse();
		WebResponse webResponse = page.getWebResponse();
		int status = webResponse.getStatusCode();
		resp.setStatus(status);
		if(status == 302 || status == 301) {
			String redirectUrl = webResponse.getResponseHeaderValue("Location");
			resp.setContent(UrlUtils.relative2Absolute(request.getUrl(), redirectUrl));
		} else if(status == 200) {
			String content = page.asXml();
			resp.setContent(content);
			resp.setRaw(webResponse.getContentAsStream());
			String contentType = webResponse.getContentType();
			resp.setContentType(contentType);
			String charset = getCharset(request.getCharset(), contentType);
			resp.setCharset(charset);
		} else {
			throw new DownloadException("ERROR : " + status);
		}
		return resp;
	} catch(Exception ex) {
		throw new DownloadException(ex);
	}
}