package com.geccocrawler.gecco.downloader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;



import org.apache.http.HttpHost;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.HttpMethod;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import com.geccocrawler.gecco.downloader.DownloadException;
import com.geccocrawler.gecco.request.HttpPostRequest;
import com.geccocrawler.gecco.request.HttpRequest;
import com.geccocrawler.gecco.response.HttpResponse;
import com.geccocrawler.gecco.spider.SpiderThreadLocal;
import com.geccocrawler.gecco.utils.UrlUtils;

/**
 * 利用htmlunit实现js的dom操作和ajax
 * 
 * @author huchengyi
 *
 */
@com.geccocrawler.gecco.annotation.Downloader("htmlUnitDownloder")
public class HtmlUnitDownloder extends AbstractDownloader {

	private WebClient webClient;
	
	public HtmlUnitDownloder() {
		this.webClient = new WebClient(BrowserVersion.CHROME);
		this.webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
		this.webClient.getOptions().setThrowExceptionOnScriptError(false);
		this.webClient.getOptions().setRedirectEnabled(false);
		this.webClient.getOptions().setCssEnabled(false);
		this.webClient.setJavaScriptTimeout(1000);
		//this.webClient.setJavaScriptErrorListener(new GeccoJavaScriptErrorListener());
	}
	
	public HttpResponse download(HttpRequest request, int timeout) throws DownloadException {
		try {
			URL url = new URL(request.getUrl());
			WebRequest webRequest = new WebRequest(url);
			webRequest.setHttpMethod(HttpMethod.GET);
			if(request instanceof HttpPostRequest) {//post
				HttpPostRequest post = (HttpPostRequest)request;
				webRequest.setHttpMethod(HttpMethod.POST);
				List<NameValuePair> requestParameters = new ArrayList<NameValuePair>();
				for(Map.Entry<String, Object> entry : post.getFields().entrySet()) {
					NameValuePair nvp = new NameValuePair(entry.getKey(), entry.getValue().toString());
					requestParameters.add(nvp);
				}
				webRequest.setRequestParameters(requestParameters);	
			}
			//header
			boolean isMobile = SpiderThreadLocal.get().getEngine().isMobile();
			webRequest.setAdditionalHeader("User-Agent", UserAgent.getUserAgent(isMobile));
			webRequest.setAdditionalHeaders(request.getHeaders());
			//proxy
			HttpHost proxy = Proxys.getProxy();
			if(proxy != null) {
				webRequest.setProxyHost(proxy.getHostName());
				webRequest.setProxyPort(proxy.getPort());
			}
			//timeout
			this.webClient.getOptions().setTimeout(timeout);
			//request,response
			webClient.getPage(webRequest);
			HtmlPage page = webClient.getPage(request.getUrl());
			HttpResponse resp = new HttpResponse();
			WebResponse webResponse = page.getWebResponse();
			int status = webResponse.getStatusCode();
			resp.setStatus(status);
			if(status == 302 || status == 301) {
				String redirectUrl = webResponse.getResponseHeaderValue("Location");
				resp.setContent(UrlUtils.relative2Absolute(request.getUrl(), redirectUrl));
			} else if(status == 200) {
				String content = page.asXml();
				resp.setContent(content);
				resp.setRaw(webResponse.getContentAsStream());
				String contentType = webResponse.getContentType();
				resp.setContentType(contentType);
				String charset = getCharset(request.getCharset(), contentType);
				resp.setCharset(charset);
			} else {
				throw new DownloadException("ERROR : " + status);
			}
			return resp;
		} catch(Exception ex) {
			throw new DownloadException(ex);
		}
	}

	public void shutdown() {
		webClient.close();
	}
}