package org.apdplat.word.analysis;

import org.apdplat.word.WordSegmenter;
import org.apdplat.word.corpus.Bigram;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

/**
 * 判定句子是有意义的人话的可能性
 * 假设使用有限集合的字符随机生成一句话,如何来判断这句话是有意义的人话的可能性呢?
 * 为了降低难度,可以把“有限集合的字符”降低难度变成“有限集合的词”。
 * 假设常用汉字有3000,随机生成的句子长度为10,则可生成的总的句子数目为:
 * 3000*3000... 也就是3000自乘10次。
 * 所谓智能,是指人的智慧和行动能力,如果计算机不能真正解决这个问题,怎么能谈得上智能呢?
 * 随处可以见到的什么什么智能,什么什么又通过图灵测试,浮躁之风让人目不暇接。
 * Created by ysc on 12/21/15.
 */
public class SentenceIdentify {
    private static final Logger LOGGER = LoggerFactory.getLogger(SentenceIdentify.class);
    private static final List<String> WORDS = new ArrayList<>();
    static {
        try {
            Utils.readResource("/dic.txt").forEach(WORDS::add);
        }catch (Exception e){
            LOGGER.error("load words failed", e);
        }
    }

    public static float identify(String sentence){
        List<Word> words = WordSegmenter.segWithStopWords(sentence);
        System.out.println("随机单词: "+words);
        System.out.println("生成句子: "+sentence);
        return Bigram.sentenceScore(words);
    }

    public static List<String> generateRandomSentences(int count){
        List<String> sentences = new ArrayList<>();
        for(int i=0; i<count; i++){
            StringBuilder sentence = new StringBuilder();
            int len = new Random(System.nanoTime()).nextInt(5)+5;
            for(int j=0; j<len; j++){
                sentence.append(WORDS.get(new Random(System.nanoTime()).nextInt(WORDS.size())));
            }
            sentences.add(sentence.toString());
            sentence.setLength(0);
        }
        return sentences;
    }

    private static void run(String encoding) {
        try(BufferedReader reader = new BufferedReader(new InputStreamReader(System.in, encoding))){
            String line = null;
            while((line = reader.readLine()) != null){
                if("exit".equals(line)){
                    System.exit(0);
                    LOGGER.info("退出");
                    return;
                }
                if(line.trim().equals("")){
                    continue;
                }
                processSentence(line.split(" "));
                showUsage();
            }
        } catch (IOException ex) {
            LOGGER.error("程序中断:", ex);
        }
    }

    private static void showUsage() {
        System.out.println("");
        System.out.println("********************************************");
        System.out.println("用法: 输入句子并回车");
        System.out.println("输入exit退出程序");
        System.out.println("********************************************");
    }

    private static void processSentence(String[] args) {
        for (String item : args){
            System.out.println("句子概率: " + identify(item));
        }
    }

    public static List<Map.Entry<String, Float>> evaluation(List<String> sentences){
        Map<String, Float> map = new ConcurrentHashMap<>();
        sentences.parallelStream().forEach(sentence -> {
            float score = identify(sentence);
            map.put(sentence, score);
        });
        return map.entrySet().stream().sorted((a,b)->b.getValue().compareTo(a.getValue())).collect(Collectors.toList());
    }

    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        list.add("我爱读书");
        list.add("我爱学习");
        list.add("我是一个人");
        list.add("我是一个男人你是一个女人");
        list.add("中话眼录学打了啊一有");
        list.add("天我滑去人够");
        list.addAll(generateRandomSentences(94));
        AtomicInteger i = new AtomicInteger();
        evaluation(list).forEach(entry->{
            System.out.println(i.incrementAndGet() + ". 句子: " + entry.getKey() + ", 概率: " + entry.getValue());
        });
        String encoding = "utf-8";
        if(args==null || args.length == 0){
            showUsage();
            run(encoding);
        }else if(Charset.isSupported(args[0])){
            showUsage();
            run(args[0]);
        }else{
            processSentence(args);
            //非交互模式,退出JVM
            System.exit(0);
        }
    }
}