/*
 * APDPlat - Application Product Development Platform
 * Copyright (c) 2013, 杨尚川, [email protected]
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.apdplat.superword.tools;

import org.apache.commons.lang.StringUtils;
import org.apdplat.superword.tools.WordLinker.Dictionary;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

/**
 * Created by ysc on 12/2/15.
 */
public class Definition {
    private static final Logger LOGGER = LoggerFactory.getLogger(Definition.class);

    public static final String ICIBA_CSS_PATH = "ul.base-list li";
    //使用 | 分割多个CSSPATH, 如果第一个CSSPATH未提取到内容, 则使用第二个, 以此类推
    public static final String YOUDAO_CSS_PATH = "div#phrsListTab.trans-wrapper.clearfix div.trans-container ul li" +
            " | div.trans-container ul p.wordGroup";
    public static final String COLLINS_CSS_PATH = "html body div#wrapper div.content.english div.dictionary div.definition_wrapper.english div.definition_main div.definition_content.col.main_bar";
    public static final String CAMBRIDGE_CSS_PATH = "html body div.wrapper.responsive_container div.cdo-dblclick-area div.responsive_row div.responsive_cell_center div.cdo-section div#entryContent.entrybox.english";
    public static final String MACMILLAN_CSS_PATH = "html body div.responsive_container div.responsive_row div#rightcol.responsive_cell_center_plus_right div#contentpanel div#entryContent div.responsive_cell_center_plus_right div.HOMOGRAPH";
    public static final String HERITAGE_CSS_PATH = "html body div#content.container div.container3 div#results table tbody tr td div.pseg div.ds-list";
    public static final String WIKTIONARY_CSS_PATH = "html body div#content.mw-body div#bodyContent.mw-body-content div#mw-content-text.mw-content-ltr";
    public static final String WORDNET_CSS_PATH = "html body div.form";
    public static final String RANDOMHOUSE_CSS_PATH = "html body div.content-container.main-area div.row div.center-well-container section#source-luna.source-wrapper.source-luna.is-pm-btn-show.pm-btn-spot div.source-box.oneClick-area section.luna-box div.source-data div.def-list section.def-pbk.ce-spot div.def-set div.def-content";

    private static final String ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    private static final String ENCODING = "gzip, deflate";
    private static final String LANGUAGE = "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
    private static final String CONNECTION = "keep-alive";
    private static final String HOST = "www.iciba.com";
    private static final String REFERER = "http://www.iciba.com/";
    private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:36.0) Gecko/20100101 Firefox/36.0";

    public static String getDefinitionString(Dictionary dictionary, String word, String joinString) {
        return concat(getDefinition(dictionary, word), joinString);
    }

    public static String concat(List<String> list, String joinString){
        if(list.isEmpty()){
            return "";
        }
        StringBuilder string = new StringBuilder();
        list.forEach(d -> string.append(d).append(joinString));
        int len = string.length()-joinString.length();
        if(len < 1){
            return "";
        }
        string.setLength(len);
        return string.toString();
    }

    public static List<String> getDefinition(Dictionary dictionary, String word){
        switch (dictionary){
            case ICIBA: return getDefinitionForICIBA(word);
            case YOUDAO: return getDefinitionForYOUDAO(word);
            case COLLINS: return getDefinitionForCOLLINS(word);
            case WEBSTER: return getDefinitionForWEBSTER(word);
            case OXFORD: return getDefinitionForOXFORD(word);
            case CAMBRIDGE: return getDefinitionForCAMBRIDGE(word);
            case MACMILLAN: return getDefinitionForMACMILLAN(word);
            case HERITAGE: return getDefinitionForHERITAGE(word);
            case WIKTIONARY: return getDefinitionForWIKTIONARY(word);
            case WORDNET: return getDefinitionForWORDNET(word);
            case RANDOMHOUSE: return getDefinitionForRANDOMHOUSE(word);
        }
        return getDefinitionForICIBA(word);
    }

    public static List<String> getDefinitionForICIBA(String word){
        return parseDefinition(WordLinker.ICIBA + word, ICIBA_CSS_PATH, word, Dictionary.ICIBA);
    }
    public static List<String> getDefinitionForYOUDAO(String word){
        return parseDefinition(WordLinker.YOUDAO + word, YOUDAO_CSS_PATH, word, Dictionary.YOUDAO);
    }
    public static List<String> getDefinitionForCOLLINS(String word){
        return parseDefinition(WordLinker.COLLINS + word, COLLINS_CSS_PATH, word, Dictionary.COLLINS);
    }
    public static List<String> getDefinitionForWEBSTER(String word){
        return parseDefinition(WordLinker.WEBSTER + word, null, word, Dictionary.WEBSTER);
    }
    public static List<String> getDefinitionForOXFORD(String word){
        return parseDefinition(WordLinker.OXFORD + word, null, word, Dictionary.OXFORD);
    }
    public static List<String> getDefinitionForCAMBRIDGE(String word){
        return parseDefinition(WordLinker.CAMBRIDGE + word, CAMBRIDGE_CSS_PATH, word, Dictionary.CAMBRIDGE);
    }
    public static List<String> getDefinitionForMACMILLAN(String word){
        return parseDefinition(WordLinker.MACMILLAN + word, MACMILLAN_CSS_PATH, word, Dictionary.MACMILLAN);
    }
    public static List<String> getDefinitionForHERITAGE(String word){
        return parseDefinition(WordLinker.HERITAGE + word, HERITAGE_CSS_PATH, word, Dictionary.HERITAGE);
    }
    public static List<String> getDefinitionForWIKTIONARY(String word){
        return parseDefinition(WordLinker.WIKTIONARY + word, WIKTIONARY_CSS_PATH, word, Dictionary.WIKTIONARY);
    }
    public static List<String> getDefinitionForWORDNET(String word){
        return parseDefinition(WordLinker.WORDNET + word, WORDNET_CSS_PATH, word, Dictionary.WORDNET);
    }
    public static List<String> getDefinitionForRANDOMHOUSE(String word){
        return parseDefinition(WordLinker.RANDOMHOUSE + word, RANDOMHOUSE_CSS_PATH, word, Dictionary.RANDOMHOUSE);
    }

    public static List<String> parseDefinition(String url, String cssPath, String word, Dictionary dictionary){
        String wordDefinition = MySQLUtils.getWordDefinition(word, dictionary.name());
        if(StringUtils.isNotBlank(wordDefinition)) {
            return Arrays.asList(wordDefinition.split("<br/>"));
        }
        String html = getContent(url);
        List<String> list = parseDefinitionFromHtml(html, cssPath, word, dictionary);
        if(!list.isEmpty()){
            MySQLUtils.saveWordDefinition(word, dictionary.name(), concat(list, "<br/>"));
        }
        return list;
    }

    public static List<String> parseDefinitionFromHtml(String html, String cssPath, String word, Dictionary dictionary){
        if(dictionary == Dictionary.OXFORD){
            return parseDefinitionForOxford(html, null);
        }
        if(dictionary == Dictionary.WEBSTER){
            return parseDefinitionForWebster(html, null);
        }
        List<String> list = new ArrayList<>();
        try {
            Document document = Jsoup.parse(html);
            for(String cp : cssPath.split("\\|")) {
                cp = cp.trim();
                if(StringUtils.isBlank(cp)){
                    continue;
                }
                for (Element element : document.select(cp)) {
                    String definition = element.text();
                    if (StringUtils.isNotBlank(definition)) {
                        definition = definition.trim();
                        if (!definition.startsWith("变形")) {
                            list.add(definition);
                        }
                    }
                }
                if(!list.isEmpty()){
                    break;
                }
            }
        } catch (Exception e){
            LOGGER.error("解析定义出错:" + word, e);
        }
        return list;
    }

    public static List<String> parseDefinitionForWebster(String html, String cssPath){
        List<String> list = new ArrayList<>();
        try {
            for (Element element : Jsoup.parse(html).select("div.tense-box.quick-def-box.simple-def-box.card-box.def-text div.inner-box-wrapper")) {
                StringBuilder definition = new StringBuilder();
                String partOfSpeech = element.select("div.word-attributes span.main-attr em").text().trim();
                for (Element defElement : element.select("div.definition-block.def-text ul.definition-list.no-count li p.definition-inner-item span")){
                    String def = defElement.text().trim();
                    if(def.length() < 3){
                        continue;
                    }
                    if(Character.isAlphabetic(def.charAt(0))){
                        def = ": " + def;
                    }else{
                        int index = 0;
                        while(!Character.isAlphabetic(def.charAt(++index))){
                            //
                        }
                        def = ": " + def.substring(index);
                    }
                    definition.append(partOfSpeech)
                            .append(" ")
                            .append(def);
                    list.add(definition.toString());
                    definition.setLength(0);
                }
            }
        } catch (Exception e){
            LOGGER.error("解析定义出错:", e);
        }
        return list;
    }

    public static List<String> parseDefinitionForOxford(String html, String cssPath){
        List<String> list = new ArrayList<>();
        try {
            for (Element element : Jsoup.parse(html).select("section.se1.senseGroup")) {
                StringBuilder definition = new StringBuilder();
                String partOfSpeech = element.select("span.partOfSpeech").text().trim();
                for (Element defElement : element.select("div.senseInnerWrapper")){
                    String seq = defElement.select("span.iteration").text().trim();
                    String def = defElement.select("span.definition").text().trim();
                    if(def.endsWith(":")){
                        def = def.substring(0, def.length()-1);
                    }
                    definition.append(partOfSpeech)
                            .append(" ")
                            .append(seq)
                            .append(" ")
                            .append(def);
                    list.add(definition.toString());
                    definition.setLength(0);
                }
            }
        } catch (Exception e){
            LOGGER.error("解析定义出错:", e);
        }
        return list;
    }

    public static String getContent(String url) {
        long start = System.currentTimeMillis();
        String html = _getContent(url, 1000);
        LOGGER.info("获取定义耗时: {}", TimeUtils.getTimeDes(System.currentTimeMillis()-start));
        int times = 0;
        while(StringUtils.isNotBlank(html) && html.contains("非常抱歉,来自您ip的请求异常频繁")){
            //使用新的IP地址
            ProxyIp.toNewIp();
            html = _getContent(url);
            if(++times > 2){
                break;
            }
        }
        return html;
    }

    private static String _getContent(String url, int timeout) {
        Future<String> future = ThreadPool.EXECUTOR_SERVICE.submit(()->_getContent(url));
        try {
            Thread.sleep(timeout);
            return future.get(1, TimeUnit.NANOSECONDS);
        } catch (Throwable e) {
            LOGGER.error("获取网页异常", e);
        }
        return "";
    }

    private static String _getContent(String url) {
        Connection conn = Jsoup.connect(url)
                .header("Accept", ACCEPT)
                .header("Accept-Encoding", ENCODING)
                .header("Accept-Language", LANGUAGE)
                .header("Connection", CONNECTION)
                .header("Referer", REFERER)
                .header("Host", HOST)
                .header("User-Agent", USER_AGENT)
                .timeout(1000)
                .ignoreContentType(true);
        String html = "";
        try {
            html = conn.post().html();
            html = html.replaceAll("[\n\r]", "");
        }catch (Exception e){
            LOGGER.error("获取URL:" + url + "页面出错", e);
        }
        return html;
    }

    public static void main(String[] args) {
        //getDefinitionForOXFORD("make").forEach(System.out::println);
        getDefinitionForWEBSTER("make").forEach(System.out::println);
    }
}