package 爬虫.数据解析.电影资源爬取;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.RandomAccessFile;
import java.util.HashSet;

/*
*    不能离开 www.dy2018.com
*    队列不重复。
*    检查顺序
*
* */
public class Demo implements Runnable{
    public static HashSet<String> set = new HashSet<String>() ;
    public  void run() {
        while (!LinkQueue.unVisitedUrlEmpty()) {
            try{
                String url = LinkQueue.unVisitedUrlDeQueue();
                LinkQueue.addVisitedUrl(url);
                Queue newQ = getUrlQueue(url);
                while(!newQ.isQueueEmpty()){
                    String oneUrl = newQ.deQueue();
                    if((!set.contains(oneUrl))&&(oneUrl.indexOf("/i/")!=-1)) {
                        synchronized(new Object()) {
                            System.out.println(oneUrl);
                            set.add(oneUrl);
                            RandomAccessFile randomAccessFile = new RandomAccessFile("D:\\dy.txt", "rw");
                            randomAccessFile.seek(randomAccessFile.length());
                            randomAccessFile.write(oneUrl.getBytes());
                            randomAccessFile.write("\r\n".getBytes());
                            randomAccessFile.close();
                        }
                    }
                    LinkQueue.addUnvisitedUrl(oneUrl);
                }
                System.out.println("线程 : "+Thread.currentThread().getName()+"  已访问数目 :"+LinkQueue.getVisitedUrlNum()+" 待访问队列数目 : "+LinkQueue.getUnVisitedUrlNum());
                System.out.println();
            }catch (Exception e){
            }
        }
    }
    public static void main(String[] args) throws Exception {
        Queue queue = getUrlQueue("http://www.dy2018.com/");
        LinkQueue.addVisitedUrl("http://www.dy2018.com/");
        while(!queue.isQueueEmpty()){
            String oneUrl = queue.deQueue() ;
            LinkQueue.addUnvisitedUrl(oneUrl);
        }
        int i ;
        Demo demo = new Demo();
        for(i=0;i<100;i++){
            new Thread(demo,"线程"+i).start();
        }

    }

    public static Queue getUrlQueue(String url) throws Exception{
        Queue queue = new Queue() ;
        CloseableHttpClient closeableHttpClient = HttpClients.createDefault();
        HttpGet httpGet = new HttpGet(url) ;
        CloseableHttpResponse closeableHttpResponse = closeableHttpClient.execute(httpGet) ;
        HttpEntity httpEntity = closeableHttpResponse.getEntity() ;
        String index = EntityUtils.toString(httpEntity,"gb2312");

        Document doc = Jsoup.parse(index);
        Elements elements = doc.select("a");
        for(Element element : elements) {
            String aurl = element.attr("href");

            if(aurl.indexOf("webPlay")!=-1){

            }else {
                queue.enQueue("http://www.dy2018.com" + aurl);
            }
        }
        return queue ;
    }
}