package cn.xpleaf.spider.core.scheduler.job;

import cn.xpleaf.spider.constants.SpiderConstants;
import cn.xpleaf.spider.utils.JedisUtil;
import cn.xpleaf.spider.utils.SpiderUtil;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.Jedis;

import java.util.List;
import java.util.Set;

/**
 * 每天定时从url仓库中获取种子url,添加进高优先级列表
 */
public class UrlJob implements Job {

    // log4j日志记录
    private Logger logger = LoggerFactory.getLogger(UrlJob.class);

    @Override
    public void execute(JobExecutionContext context) throws JobExecutionException {
        /**
         * 1.从指定url种子仓库获取种子url
         * 2.将种子url添加进高优先级列表
         */
        Jedis jedis = JedisUtil.getJedis();
        Set<String> seedUrls = jedis.smembers(SpiderConstants.SPIDER_SEED_URLS_KEY);  // spider.seed.urls Redis数据类型为set,防止重复添加种子url
        for(String seedUrl : seedUrls) {
            String domain = SpiderUtil.getTopDomain(seedUrl);   // 种子url的顶级域名
            jedis.sadd(domain + SpiderConstants.SPIDER_DOMAIN_HIGHER_SUFFIX, seedUrl);
            logger.info("获取种子:{}", seedUrl);
        }
        JedisUtil.returnJedis(jedis);
//        System.out.println("Scheduler Job Test...");
    }

}