package me.zhyd.hunter.processor;

import me.zhyd.hunter.Hunter;
import me.zhyd.hunter.config.HunterConfig;
import me.zhyd.hunter.entity.VirtualArticle;
import me.zhyd.hunter.scheduler.BlockingQueueScheduler;
import me.zhyd.hunter.util.HunterPrintWriter;
import me.zhyd.hunter.downloader.HttpClientDownloader;
import org.apache.commons.collections.CollectionUtils;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.SimpleProxyProvider;

import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;

/**
 * 爬虫入口
 *
 * @author yadong.zhang (yadong.zhang0415(a)gmail.com)
 * @version 1.0
 */
public class BlogHunterProcessor extends HunterProcessor {

    public BlogHunterProcessor(String url, boolean convertImage) {
        super(url, convertImage);
    }

    public BlogHunterProcessor(String url, boolean convertImage, HunterPrintWriter writer) {
        super(url, convertImage, writer);
    }

    public BlogHunterProcessor(HunterConfig config) {
        super(config);
    }

    public BlogHunterProcessor(HunterConfig config, String uuid) {
        super(config, uuid);
    }

    /**
     * @param config Hunter Config
     * @param writer
     * @param uuid
     */
    public BlogHunterProcessor(HunterConfig config, HunterPrintWriter writer, String uuid) {
        super(config, writer, uuid);
    }

    /**
     * 运行爬虫并返回结果
     *
     * @return
     */
    @Override
    public CopyOnWriteArrayList<VirtualArticle> execute() {
        List<String> errors = this.validateModel(config);
        if (CollectionUtils.isNotEmpty(errors)) {
            writer.print("校验不通过!请依据下方提示,检查输入参数是否正确......");
            for (String error : errors) {
                writer.print(">> " + error);
            }
            return null;
        }

        CopyOnWriteArrayList<VirtualArticle> virtualArticles = new CopyOnWriteArrayList<>();
        Hunter spider = Hunter.create(this, config, uuid);

        spider.addUrl(config.getEntryUrls().toArray(new String[0]))
                .setScheduler(new BlockingQueueScheduler(config))
                .addPipeline((resultItems, task) -> this.process(resultItems, virtualArticles, spider))
                .setDownloader(new HttpClientDownloader())
                .thread(config.getThreadCount());

        //设置抓取代理IP
        if (!CollectionUtils.isEmpty(config.getProxyList())) {
            HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
            SimpleProxyProvider provider = SimpleProxyProvider.from(config.getProxyList().toArray(new Proxy[0]));
            httpClientDownloader.setProxyProvider(provider);
            spider.setDownloader(httpClientDownloader);
        }
        // 测试代理
        /*HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
        SimpleProxyProvider provider = SimpleProxyProvider.from(
                new Proxy("61.135.217.7", 80)
        );
        httpClientDownloader.setProxyProvider(provider);
        spider.setDownloader(httpClientDownloader);*/

        // 启动爬虫
        spider.run();
        return virtualArticles;
    }


}