package org.codelibs.riverweb.crawler; import java.util.Date; import org.codelibs.fess.crawler.CrawlerThread; import org.codelibs.fess.crawler.client.CrawlerClient; import org.codelibs.fess.crawler.client.EsClient; import org.codelibs.fess.crawler.entity.UrlQueue; import org.codelibs.riverweb.config.RiverConfig; import org.codelibs.riverweb.config.RiverConfigManager; import org.codelibs.riverweb.util.ConversionUtil; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.sort.SortOrder; import org.lastaflute.di.core.SingletonLaContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class RwCrawlerThread extends CrawlerThread { private static final Logger logger = LoggerFactory.getLogger(RwCrawlerThread.class); @Override protected boolean isContentUpdated(final CrawlerClient client, final UrlQueue<?> urlQueue) { final RiverConfigManager riverConfigManager = SingletonLaContainer.getComponent(RiverConfigManager.class); final RiverConfig riverConfig = riverConfigManager.get(crawlerContext.getSessionId()); if (riverConfig.isIncremental()) { final EsClient esClient = SingletonLaContainer.getComponent(EsClient.class); try { final SearchResponse response = esClient.prepareSearch(riverConfig.getIndex()).setTypes(riverConfig.getType()) .setQuery(QueryBuilders.termQuery("url", urlQueue.getUrl())).addField("lastModified") .addSort("lastModified", SortOrder.DESC).execute().actionGet(); final SearchHits hits = response.getHits(); if (hits.getTotalHits() > 0) { final SearchHitField lastModifiedField = hits.getAt(0).getFields().get("lastModified"); if (lastModifiedField != null) { final Date lastModified = ConversionUtil.convert(lastModifiedField.getValue(), Date.class); if (lastModified != null) { urlQueue.setLastModified(lastModified.getTime()); } } } } catch (final Exception e) { logger.debug("Failed to retrieve lastModified.", e); } } return super.isContentUpdated(client, urlQueue); } }