# -*- coding: utf-8 -*-
import sys
import os
import numpy as np
import  nltk
from scipy import spatial
from sklearn.metrics.pairwise import cosine_similarity
sys.path.append(os.path.abspath("../embedding"))
from fasttext_embedding import fastTextEmbedder
# from elmo_embedding import elmo_embedding
# from bert_embedding import bert_embedding
# # bert-serving-start -model_dir "C:/Users/Hussein/Documents/Research/FYP-Arabic NLP/bert/multilingual_L-12_H-768_A-12" -num_worker=1

class embeddingReader:
    def __init__(self, embedder):
        self.embedder = embedder

    def concatenateString(self, paragraph, start, length):
        final_string = paragraph[start]
        for i in range(1, length):
            final_string += " " + paragraph[start + i]
        return final_string

    def get_answer_canditates(self, paragraph):
        para_sents = nltk.sent_tokenize(paragraph)
        candidates = []
        for sent in para_sents:
            para_words = sent.split()
            for i in range(0, len(para_words)):
                for j in range(1, min(15, len(para_words) - i + 1)):
                    candidate = self.concatenateString(para_words, i, j)
                    candidates.append(candidate)
        return candidates

    def read(self, P, Q):
        A = self.get_answer_canditates(P)
        A_embed = []
        for a in A:
            A_embed.append(self.embedder.embed(a))
        Q_embed = self.embedder.embed(Q)
        similarities_raw = cosine_similarity(A_embed, Q_embed.reshape(1, -1))
        similarities = [s[0] for s in similarities_raw]
        indices_sorted = np.argsort(similarities)[::-1]  # reverse order
        return A[indices_sorted[0]]