package com.xjtushilei.main;

import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.net.URL;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

/**
 * Created by shilei on 2017/2/27.
 */
public class Spider {

	private static final Logger logger = Logger.getLogger(Spider.class);
	private static BigInteger MaxNumber = new BigInteger("9999999999999999999999999");; // 最多爬取多少条关系停止
	private static String rootPath = "D://互动百科/";

	private static Set<String> nameSet = new HashSet<String>();
	private static Queue<String> nameQueue = new LinkedList<String>();
	private static BigInteger count = new BigInteger("0");

	/**
	 * 搜索函数,主要处理路径
	 */
	public static void strat() {
		
		// 删除存在的文件,重新开始
		try {
			FileUtils.deleteDirectory(new File(rootPath));
		} catch (IOException e1) {
			e1.printStackTrace();
		}

		try {
			// 将自己想要开始的明星加入到队列中
			List<String> namelist = FileUtils
					.readLines(new File(Spider.class.getClassLoader().getResource("start_list").getFile()), "utf-8");
			for (String string : namelist) {
				nameQueue.offer(string);
			}
			logger.info("一共 【" + namelist.size() + "】 加入开始队列!");
		} catch (IOException e) {
			e.printStackTrace();
		}

		// 开始bfs爬虫
		while (!nameQueue.isEmpty() && count.compareTo(MaxNumber) == -1) {
			String name=nameQueue.poll();
			if (name.equals("")) {
				continue;
			}
			go(name);
		}
	}

	private static void go(String name) {
		logger.info(name + ":start nameSet:" + nameSet.size() + "	nameQueue:" + nameQueue.size());
		Document doc = null;
		try {
			doc = Jsoup.connect("http://www.baike.com/wiki/" + name)
					.userAgent("Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0")
					.ignoreContentType(true).timeout(30000).get();

			// 选择器 选到制定的位置
			Elements relationships = doc.select("#figurerelation li");
			for (Element li : relationships) {
				// 获取相关的信息
				Element other = li.select("a").first();
				String otherName = other.text();
				String relationShip = li.ownText();
				if (relationShip.equals("")) {
					relationShip="朋友";
				}
				if (nameSet.add(otherName)) {
					nameQueue.add(otherName);
					logger.debug("队列增加:" + otherName);
					// 写文件
					try {
						FileUtils.write(new File(rootPath + "Relationship.data"),
								name + "\t" + relationShip + "\t" + otherName + "\n", "utf-8", true);
						count = count.add(new BigInteger("1"));
						logger.info("当前成功数目:【" + count.toString() + "】");
					} catch (IOException e) {
						logger.error("写入:" + name + "-" + relationShip + "-" + otherName + "\t\t失败!");
					}
				}

			}
		} catch (IOException e) {
			logger.error("" + name + "\t失败 !  重新加入队列!");
			nameQueue.add(name);// 超时之后,爬取失败,所以又一次加入了队列中
			return;
		}
		// 下载这个人summary信息
		downloadSummary(name, doc);
		// 下载这个人的图片信息
		downloadPicture(name, doc);
	}

	private static void downloadSummary(String name, Document doc) {
		
		// 写文件
		try {
			String summary = doc.select(".summary p").first().text();
			logger.debug(summary);
			FileUtils.write(new File(rootPath + "Summary.data"), name + "\n" + summary + "\n", "utf-8", true);
		} catch (Exception e) {
			logger.error("个人描述信息 写入:【" + name + "】\t失败!");
			try {
				FileUtils.write(new File(rootPath + "ErrorSummary.data"), name + "\t" + e.toString() + "\n", "utf-8", true);
			} catch (IOException e1) {
				e1.printStackTrace();
			}
		}
	}

	private static void downloadPicture(String name, Document doc) {
		String picUrl = null;
		try {
			picUrl = doc.select(".doc-img a img").first().attr("src");
			FileUtils.copyURLToFile(new URL(picUrl), new File(rootPath + "img/" + name + ".jpg"));
			logger.info(name + " 【图片】 下载成功");
		} catch (Exception e) {
			logger.error(name + " 【图片】 下载失败! ");
			try {
				FileUtils.write(new File(rootPath + "ErrorPicture.data"), name + "\t" + picUrl +"\t"+e.toString()+  "\n", "utf-8", true);
			} catch (IOException e1) {
				e1.printStackTrace();
			}
		}
	}

}