"""
file: annotation_gui.py

author: Yoann Dupont

MIT License

Copyright (c) 2018 Yoann Dupont

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

from __future__ import print_function

import codecs
import glob
import logging
import os
import re
import sys

try:
    import Tkinter as tkinter
except ImportError:
    import tkinter

try:
    import ttk
except ImportError:
    from tkinter import ttk

try:
    from tkFont import Font
except ImportError:
    from tkinter.font import Font
try:
    import tkFileDialog
    tkinter.filedialog = tkFileDialog
except ImportError:
    import tkinter.filedialog
try:
    import ScrolledText
    tkinter.scrolledtext = ScrolledText
except ImportError:
    import tkinter.scrolledtext
try:
    import tkMessageBox
    tkinter.messagebox = tkMessageBox
except ImportError:
    import tkinter.messagebox

import sem
from sem.constants import NUL
from sem.misc import documents_from_list
from sem.storage.document import Document, SEMCorpus
from sem.storage.annotation import Tag, Annotation
from sem.logger import extended_handler
import sem.importers
from sem.gui.misc import find_potential_separator, find_occurrences, random_color, Adder2
from sem.gui.components import SEMTkTrainInterface, SearchFrame
from sem.gui.components import SemTkMasterSelector, SemTkLangSelector # TODO: remove
from sem.storage.annotation import str2filter
import sem.modules.tagger

annotation_gui_logger = logging.getLogger("sem.annotation_gui")
annotation_gui_logger.addHandler(extended_handler)

def update_annotations(document, annotation_name, annotations):
    annots = Annotation(annotation_name)
    annots.annotations = annotations
    try:
        reference = document.annotation(annotation_name).reference
    except:
        reference = None
    document.add_annotation(annots)
    if reference:
        document.set_reference(annotation_name, reference.name)

def check_in_tagset(tag, tagset):
    ok = tag in tagset
    if not ok:
        for the_tag in tagset:
            ok = the_tag.startswith(tag)
            if ok:
                break
    return ok

class AnnotationTool(tkinter.Frame):
    def __init__(self, parent, log_level, documents=None, tagset=None, *args, **kwargs):
        tkinter.Frame.__init__(self, parent, *args, **kwargs)
        
        annotation_gui_logger.setLevel(log_level)
        
        self.bind_all("<Alt-F4>", self.exit)
        
        self.resource_dir = os.path.join(sem.SEM_RESOURCE_DIR)
        
        self.parent = parent
        
        self.user = None
        self.doc = None
        self.doc_is_modified = False
        self.annotation_name = None
        self.annotations = []
        self.annotations_tick = 0
        
        self.shortcuts = [
            ["Ctrl+o", ["open file", self.openfile_gui], [[self, True]]], # True = bind_all
            ["Ctrl+Shift+o", ["open url", self.openurl], [[self, True]]], # True = bind_all
            ["Ctrl+s", ["save", self.save], [[self, True]]], # True = bind_all
            ["Ctrl+t", ["train", self.train], [[self, True]]], # True = bind_all
            ["Ctrl+f", ["find", self.find_in_text], [[self, True]]], # True = bind_all
        ]
        
        
        self.lines_lengths = []
        
        #
        # menu
        #
        self.global_menu = tkinter.Menu(self.parent)
        # file menu
        self.file_menu = tkinter.Menu(self.global_menu, tearoff=False)
        self.global_menu.add_cascade(label="File", underline=0, menu=self.file_menu)
        self.file_menu.add_command(label="Open...", underline=0, command=self.openfile_gui, accelerator="Ctrl+O")
        self.file_menu.add_command(label="Open url...", underline=5, command=self.openurl, accelerator="Ctrl+Shift+O")
        self.file_menu.add_command(label="Save to...", underline=0, command=self.save, accelerator="Ctrl+S")
        self.saveas_menu = tkinter.Menu(self.file_menu, tearoff=False)
        self.file_menu.add_cascade(label="Save as...", underline=5, menu=self.saveas_menu)
        self.saveas_menu.add_command(label="BRAT corpus", underline=0, command=self.save_brat)
        self.saveas_menu.add_command(label="GATE corpus", underline=0, command=self.save_gate)
        self.saveas_menu.add_command(label="TEI ANALEC corpus", underline=4, command=self.save_tei_analec)
        self.saveas_menu.add_command(label="TEI REDEN corpus", underline=4, command=self.save_tei_reden)
        self.saveas_menu.add_command(label="JSON corpus", underline=0, command=self.save_json)
        self.file_menu.entryconfig("Save to...", state=tkinter.DISABLED)
        self.file_menu.entryconfig("Save as...", state=tkinter.DISABLED)
        self.file_menu.add_separator()
        self.file_menu.add_command(label="load tagset...", underline=5, command=self.load_tagset_gui)
        self.file_menu.add_command(label="Load master...", underline=5, command=self.load_pipeline)
        # edit menu
        self.edit_menu = tkinter.Menu(self.global_menu, tearoff=False)
        self.global_menu.add_cascade(label="Edit", underline=0, menu=self.edit_menu)
        self.edit_menu.add_command(label="Preferences...", command=self.preferences)
        # ? menu
        self.qm_menu = tkinter.Menu(self.global_menu, tearoff=False)
        self.global_menu.add_cascade(label="?", menu=self.qm_menu)
        self.qm_menu.add_command(label="About SEM...", command=self.about_sem)
        # final
        self.parent.config(menu=self.global_menu)
        
        self.new_type = tkinter.StringVar()
        self.SPARE_COLORS_DEFAULT = []
        self.SPARE_COLORS_DEFAULT = [{"background":"#CCCCCC", "foreground":"#000000"}, {'foreground': '#374251', 'background': '#9ca9bc'}, {'foreground': '#4b3054', 'background': '#b28fbf'}, {'foreground': '#625e2d', 'background': '#d0cb99'}, {'foreground': '#454331', 'background': '#a7a383'}, {'foreground': '#79a602', 'background': '#e7fea8'}, {'background': '#C8A9DC', 'foreground': '#542D6E'}, {'background': '#C9B297', 'foreground': '#5C4830'}, {'foreground': '#426722', 'background': '#aad684'}, {'foreground': '#886c11', 'background': '#f1da91'}, {'foreground': '#275a5f', 'background': '#85c6cc'}, {'foreground': '#0a9b47', 'background': '#a3fac8'}, {'foreground': '#729413', 'background': '#e3f5af'}, {'foreground': '#a22800', 'background': '#ffb299'}, {'foreground': '#254084', 'background': '#bccaed'}, {'foreground': '#601194', 'background': '#d7a8f6'}, {'foreground': '#6c4c45', 'background': '#e6dad7'}, {'foreground': '#1461a1', 'background': '#cce5f9'}, {'foreground': '#8a570d', 'background': '#f4c888'}, {'foreground': '#813058', 'background': '#eecfde'}]
        self.SPARE_COLORS_DEFAULT.extend([{"background":"#DDFFDD", "foreground":"#008800"}, {"background":"#CCCCFF", "foreground":"#0000FF"}, {"background":"#CCEEEE", "foreground":"#008888"}, {"background":"#FFCCCC", "foreground":"#FF0000"}]) # at the end for "pop"
        self.spare_colors = self.SPARE_COLORS_DEFAULT[:]
        
        self.bind_all("<Control-o>", self.openfile_gui)
        self.bind_all("<Control-O>", self.openurl)
        self.bind_all("<Control-s>", self.save)
        self.bind_all("<Control-t>", self.train)
        self.bind_all("<Control-f>", self.find_in_text)
        self.focus_set()
        
        self.bind_all("<Tab>", self.tab)
        self.bind_all("<Shift-Tab>", self.shift_tab)
        
        self.available_chars_set = list(u"abcdefghijklmnopqrstuvwxyz") + [u'F1', u'F2', u'F3', u'F4', u'F5', u'F6', u'F7', u'F8', u'F9', u'F10', u'F11', u'F12']
        self.SELECT_TYPE = u"-- select type --"
        self.wish_to_add = []
        
        self.current_annotations = Annotation("CurrentAnnotations")
        self.adder = None

        self.toolbar = ttk.Frame(self)
        self.toolbar.pack(side="top", fill="x")

        self.train_btn = ttk.Button(self.toolbar, text="train", command=self.train)
        self.train_btn.pack(side="left")
        self.train_btn.configure(state=tkinter.DISABLED)

        self.tag_document_btn = ttk.Button(self.toolbar, text="tag document", command=self.tag_document)
        self.tag_document_btn.pack(side="left")
        self.tag_document_btn.configure(state=tkinter.DISABLED)

        #self.load_tagset_btn = ttk.Button(self.toolbar, text="load tagset", command=self.load_tagset_gui)
        #self.load_tagset_btn.pack(side="left")

        self.type_combos = []
        self.add_type_lbls = []
        self.annotation_row = ttk.PanedWindow(self, orient="horizontal")
        
        self.corpus_tree = ttk.Treeview(self.annotation_row)
        self.corpus_tree_scrollbar = ttk.Scrollbar(self.annotation_row, command=self.corpus_tree.yview)
        self.corpus_tree.configure(yscroll=self.corpus_tree_scrollbar.set)
        self.corpus_tree.heading("#0", text="corpus", anchor=tkinter.W)
        self.corpus_tree.bind("<<TreeviewSelect>>", self.load_document)
        self.corpus_documents = []
        self.corpus_id2doc = {}
        self.corpus_doc2id = {}
        
        self.text = tkinter.scrolledtext.ScrolledText(self.annotation_row, wrap=tkinter.WORD, font="Helvetica")
        self.text.configure(state="disabled")
        self.text.bind("<Shift-Tab>", self.shift_tab)
        self.search = SearchFrame(self.text)
        for char in self.available_chars_set:
            self.text.bind("<{0}>".format(char), self.handle_char)
            self.parent.bind("<{0}>".format(char), self.handle_char)
            if char.islower():
                self.text.bind("<{0}>".format(char.upper()), self.handle_char)
                self.parent.bind("<{0}>".format(char.upper()), self.handle_char)
        
        self.tree = ttk.Treeview(self.annotation_row)
        self.tree_scrollbar = ttk.Scrollbar(self.annotation_row, command=self.tree.yview)
        self.tree.configure(yscroll=self.tree_scrollbar.set)
        self.tree.heading("#0", text="annotation sets", anchor=tkinter.W)
        self.tree.bind("<<TreeviewSelect>>", self.select_from_tree)
        self.tree_ids = {}
        self.annot2treeitems = {}
        
        self.annotation_row.add(self.corpus_tree)
        self.annotation_row.add(self.corpus_tree_scrollbar)
        self.annotation_row.add(self.text)
        self.annotation_row.add(self.tree)
        self.annotation_row.add(self.tree_scrollbar)
        self.annotation_row.pack(side="left", fill="both", expand=True)
        
        self.text.bind("<Button-1>", self.click)
        self.text.bind("<Delete>", self.delete)
        self.parent.bind("<Delete>", self.delete)
        self.text.bind("<Shift-Delete>", self.delete_all)
        self.parent.bind("<Shift-Delete>", self.delete_all)
        self.text.bind("<Escape>", self.unselect)
        self.parent.bind("<Escape>", self.unselect)
        
        self.tree.bind("<Delete>", self.delete)
        self.tree.bind("<Shift-Delete>", self.delete_all)
        
        ## configuring a tag called BOLD
        bold_font = Font(self.text)
        bold_font.configure(weight="bold")
        self.text.tag_configure("BOLD", font=bold_font)
        
        self.workflow = None
        
        self.position2annots = {}
        self.tree_ids = {}
        self.annot2treeitems = {}
        self.ner2history = {}
        
        # preferences
        self._whole_word = tkinter.BooleanVar()
        self._whole_word.set(True)
        self.wikinews_format = tkinter.BooleanVar()
        self.wikinews_format.set(False)
        
        self.pipeline = None
        
        if tagset:
            self.load_tagset(tagset)
        if documents:
            doc_list = []
            for doc in documents:
                doc_list.extend(glob.glob(doc))
            self.openfile(doc_list)
            ident = self.corpus_doc2id[self.corpus_documents[0].name]
            self.corpus_tree.selection_set(ident)
            self.corpus_tree.focus(ident)
            self.corpus_tree.see(ident)
            self.load_document()
        #skip_auth=> self.auth()
    
    @property
    def whole_word(self):
        return bool(self._whole_word.get())
    
    def exit(self, event=None):
        self.destroy()
        self.parent.destroy()
    
    def auth(self, event=None):
        authTop = tkinter.Toplevel()
        authTop.grab_set()
        auth_login = tkinter.StringVar(authTop, value="admin")
        auth_pw = tkinter.StringVar(authTop, value="")
        
        def close():
            sys.exit(0)
        
        authTop.protocol('WM_DELETE_WINDOW', close)
        
        def check_auth():
            import time, hashlib
            pwd = auth_pw.get()
            if pwd == "":
                print("Please enter non empty password")
            try:
                content = open(".users").read()
                if "d033e22ae348aeb5660fc2140aec35850c4da997aee5aca44055f2cd2f2ce4266909b69a5d96dad2\n" not in content:
                    print("Something fishy about your .user file, rewriting it with admin user only.")
                    with codecs.open(".users", "w") as O:
                        O.write("d033e22ae348aeb5660fc2140aec35850c4da997aee5aca44055f2cd2f2ce4266909b69a5d96dad2\n")
                    time.sleep(5.0)
                    sys.exit(1)
                h = hashlib.sha1()
                h.update(auth_login.get())
                login = h.hexdigest()
                h = hashlib.sha1()
                h.update(pwd)
                pw = h.hexdigest()
                checked = login + pw in content
                if checked:
                    self.user = auth_login.get()[:]
                    tkinter.messagebox.showinfo("Login success","Logged succesfuly as {0}".format(self.user))
                    
                    if self.user == "admin":
                        self.add_user_btn = ttk.Button(self.toolbar, text="add user", command=self.add_user)
                        self.add_user_btn.pack(side="left")
                    
                    authTop.destroy()
                else:
                    tkinter.messagebox.showerror("Login error", "Wrong login/password")
            except IOError:
                with codecs.open(".users", "w") as O:
                    O.write("d033e22ae348aeb5660fc2140aec35850c4da997aee5aca44055f2cd2f2ce4266909b69a5d96dad2\n")
                print("Could not find .user file, rewriting it with admin user only.")
        
        authLabel = tkinter.Label(authTop, text="Enter credentials:")
        authLabel.grid(row=0, column=0, sticky=tkinter.W+tkinter.E, columnspan=2)
        
        tkinter.Label(authTop, text='login').grid(row=1, column=0, sticky=tkinter.W)
        auth_login_entry = tkinter.Entry(authTop, textvariable=auth_login)
        auth_login_entry.grid(row=1, column=1, sticky=tkinter.W)
        tkinter.Label(authTop, text='password').grid(row=2, column=0, sticky=tkinter.W)
        auth_pw_entry = tkinter.Entry(authTop, textvariable=auth_pw, show="*")
        auth_pw_entry.grid(row=2, column=1, sticky=tkinter.W)
        
        login_btn = ttk.Button(authTop, text="login", command=check_auth)
        login_btn.grid(row=3, column=0, sticky=tkinter.W+tkinter.E, columnspan=2)
        
    def add_user(self, event=None):
        authTop = tkinter.Toplevel()
        authTop.grab_set()
        auth_login = tkinter.StringVar(authTop, value="admin")
        auth_pw = tkinter.StringVar(authTop, value="")
        
        def check_auth():
            import hashlib
            pwd = auth_pw.get()
            if pwd == "":
                print("Please enter non empty password")
                return
            try:
                lines = [line.strip() for line in open(".users").readlines()]
                if "d033e22ae348aeb5660fc2140aec35850c4da997aee5aca44055f2cd2f2ce4266909b69a5d96dad2" not in lines:
                    print("Something fishy about your .user file, rewriting it with admin user only.")
                    with codecs.open(".users", "w") as O:
                        O.write("d033e22ae348aeb5660fc2140aec35850c4da997aee5aca44055f2cd2f2ce4266909b69a5d96dad2\n")
                h = hashlib.sha1()
                h.update(auth_login.get())
                login = h.hexdigest()
                h = hashlib.sha1()
                h.update(auth_pw.get())
                pw = h.hexdigest()
                for line in lines:
                    if line.startswith(login):
                        tkinter.messagebox.showerror("Cannot add user", "User {0} already exists".format(auth_login.get()))
                        return
                
                with codecs.open(".users", "a") as O:
                    O.write("{0}{1}\n".format(login, pw))
                tkinter.messagebox.showinfo("New user","Succesfuly added user {0}".format(auth_login.get()))
                authTop.destroy()
            except IOError:
                with codecs.open(".users", "w") as O:
                    O.write("d033e22ae348aeb5660fc2140aec35850c4da997aee5aca44055f2cd2f2ce4266909b69a5d96dad2\n")
                print("Could not find .user file, rewriting it with admin user only.")
        
        authLabel = tkinter.Label(authTop, text="Enter credentials:")
        authLabel.grid(row=0, column=0, sticky=tkinter.W+tkinter.E, columnspan=2)
        
        tkinter.Label(authTop, text='login').grid(row=1, column=0, sticky=tkinter.W)
        auth_login_entry = tkinter.Entry(authTop, textvariable=auth_login)
        auth_login_entry.grid(row=1, column=1, sticky=tkinter.W)
        tkinter.Label(authTop, text='password').grid(row=2, column=0, sticky=tkinter.W)
        auth_pw_entry = tkinter.Entry(authTop, textvariable=auth_pw, show="*")
        auth_pw_entry.grid(row=2, column=1, sticky=tkinter.W)
        
        login_btn = ttk.Button(authTop, text="login", command=check_auth)
        login_btn.grid(row=3, column=0, sticky=tkinter.W+tkinter.E, columnspan=2)
    
    #
    # file menu methods
    #
    
    def openfile(self, filenames):
        chunks_to_load = ([self.annotation_name] if self.annotation_name else None)
        
        documents = []
        names = set()
        for filename in filenames:
            if filename.endswith(".sem.xml") or filename.endswith(".sem"):
                try:
                    docs = SEMCorpus.from_xml(filename, chunks_to_load=chunks_to_load, load_subtypes=True).documents
                    for doc in docs: # using reference annotations
                        for annotation_name in doc.annotations.keys():
                            doc.add_annotation(Annotation(annotation_name, reference=None, annotations=doc.annotation(annotation_name).get_reference_annotations()))
                    #documents.extend(docs)
                    for doc in [doc for doc in docs if doc.name not in names]:
                        documents.append(doc)
                        names.add(doc.name)
                except:
                    doc = Document.from_xml(filename, chunks_to_load=chunks_to_load, load_subtypes=True)
                    if doc.name not in names:
                        names.add(doc.name)
                        documents.append(doc)
                        for annotation_name in documents[-1].annotations.keys():
                            documents[-1].add_annotation(Annotation(annotation_name, reference=None, annotations=documents[-1].annotation(annotation_name).get_reference_annotations()))
            else:
                doc = sem.importers.load(filename, encoding="utf-8", tagset_name=self.annotation_name)
                if doc.name not in names:
                    documents.append(doc)
                    names.add(doc.name)
        
        if documents == []: return
        
        added = False
        for document in documents:
            tmp = self.add_document(document)
            added = added or tmp
        if not added:
            return
        
        self.load_document()
        
        self.train_btn.configure(state=tkinter.NORMAL)
        self.file_menu.entryconfig("Save to...", state=tkinter.NORMAL)
        self.file_menu.entryconfig("Save as...", state=tkinter.NORMAL)
        if self.adder is not None:
            self.adder.current_hierarchy_level = 0
            self.update_level()
    
    def openfile_gui(self, event=None):
        filenames = tkinter.filedialog.askopenfilenames(filetypes=[("SEM readable files", (".txt", ".sem.xml", ".sem", ".ann")), ("text files", ".txt"), ("BRAT files", (".txt", ".ann")), ("SEM XML files", ("*.sem.xml", ".sem")), ("All files", ".*")])
        if filenames == []: return
        self.openfile(filenames)
    
    def openurl(self, event=None):
        import urllib
        toplevel = tkinter.Toplevel()
        
        self.url = tkinter.StringVar()
        
        def cancel(event=None):
            self.url.set("")
            toplevel.destroy()
        def ok(event=None):
            document = sem.importers.from_url(self.url.get(), wikinews_format=bool(self.wikinews_format.get()), strip_html=True)
            if document is None: return
        
            added = self.add_document(document)
            if not added: return
            
            self.load_document()
            
            self.train_btn.configure(state=tkinter.NORMAL)
            self.file_menu.entryconfig("Save to...", state=tkinter.NORMAL)
            self.file_menu.entryconfig("Save as...", state=tkinter.NORMAL)
            cancel()
        
        label1 = tkinter.Label(toplevel, text="enter url:")
        label1.pack()
        
        text = ttk.Entry(toplevel, textvariable=self.url)
        text.pack()
        text.focus_set()
        
        c = ttk.Checkbutton(toplevel, text="Use Wikinews format", variable=self.wikinews_format)
        c.pack()
        
        toolbar = ttk.Frame(toplevel)
        toolbar.pack(side="top", fill="x")
        ok_btn = ttk.Button(toolbar, text="OK", command=ok)
        ok_btn.pack(side="left")
        cancel_btn = ttk.Button(toolbar, text="cancel", command=cancel)
        cancel_btn.pack(side="left")
        toplevel.bind('<Return>', ok)
        toplevel.bind('<Escape>', cancel)
        
        toolbar.pack()
    
    def add_tag(self, value, start, end):
        if type(start) == int:
            start_pos = self.charindex2position(start)
        else:
            start_pos = start
        if type(end) == int:
            end_pos = self.charindex2position(end)
        else:
            end_pos = end
        
        pos = (start, end)
        self.position2annots[pos] = self.position2annots.get(pos, set())
        
        if value not in self.position2annots[pos]:
            self.text.tag_add(value, start_pos, end_pos)
            self.position2annots[pos].add(value)
            self.doc_is_modified = True
    
    def save(self, event=None):
        filename = tkinter.filedialog.asksaveasfilename(defaultextension=".sem.xml")
        if filename == u"": return
        
        self.unselect()
        if self.doc_is_modified:
            update_annotations(self.doc, self.annotation_name, self.current_annotations.annotations)
        
        corpus = SEMCorpus(documents=self.corpus_documents)
        with codecs.open(filename, "w", "utf-8") as O:
            corpus.write(O)
    
    def save_as_format(self, output_directory, fmt):
        if not output_directory: return
        
        if self.doc_is_modified:
            update_annotations(self.doc, self.annotation_name, self.current_annotations.annotations)
        
        corpus = SEMCorpus(documents=self.corpus_documents)
        exporter = sem.exporters.get_exporter(fmt)()
        couples = {"ner":self.annotation_name}
        for document in corpus:
            name = os.path.basename(document.name).replace(":", "")
            out_path = os.path.join(output_directory, "{0}.{1}".format(os.path.splitext(name)[0], exporter.extension()))
            if fmt == "brat":
                if not name.endswith(".txt"):
                    name += ".txt"
                with codecs.open(os.path.join(output_directory, name), "w", "utf-8") as O:
                    O.write(document.content)
            exporter.document_to_file(document, couples, out_path, encoding="utf-8")
    
    def save_brat(self, event=None):
        output_directory = tkinter.filedialog.askdirectory(initialdir=sem.SEM_DATA_DIR)
        self.save_as_format(output_directory, "brat")
    
    def save_gate(self, event=None):
        output_directory = tkinter.filedialog.askdirectory(initialdir=sem.SEM_DATA_DIR)
        self.save_as_format(output_directory, "gate")
    
    def save_tei_analec(self, event=None):
        output_directory = tkinter.filedialog.askdirectory(initialdir=sem.SEM_DATA_DIR)
        self.save_as_format(output_directory, "tei_analec")
    
    def save_tei_reden(self, event=None):
        output_directory = tkinter.filedialog.askdirectory(initialdir=sem.SEM_DATA_DIR)
        self.save_as_format(output_directory, "tei_reden")
    
    def save_json(self, event=None):
        output_directory = tkinter.filedialog.askdirectory(initialdir=sem.SEM_DATA_DIR)
        self.save_as_format(output_directory, "jason")
    
    #
    # Edit menu methods
    #
    
    def preferences(self, event=None):
        preferenceTop = tkinter.Toplevel()
        preferenceTop.focus_set()
        
        notebook = ttk.Notebook(preferenceTop)
        
        frame1 = ttk.Frame(notebook)
        notebook.add(frame1, text='general')
        frame2 = ttk.Frame(notebook)
        notebook.add(frame2, text='shortcuts')

        c = ttk.Checkbutton(frame1, text="Match whole word when broadcasting annotation", variable=self._whole_word)
        c.pack()
        
        shortcuts_vars = []
        shortcuts_gui = []
        cur_row = 0
        j = -1
        frame_list = []
        frame_list.append(ttk.LabelFrame(frame2, text="common shortcuts"))
        frame_list[-1].pack(fill="both", expand="yes")
        for i, shortcut in enumerate(self.shortcuts):
            j += 1
            key, cmd, bindings = shortcut
            name, command = cmd
            shortcuts_vars.append(tkinter.StringVar(frame_list[-1], value=key))
            tkinter.Label(frame_list[-1], text=name).grid(row=cur_row, column=0, sticky=tkinter.W)
            entry = tkinter.Entry(frame_list[-1], textvariable=shortcuts_vars[j])
            entry.grid(row=cur_row, column=1)
            cur_row += 1
        notebook.pack()
    
    #
    # ? menu methods
    #
    
    def about_sem(self, event=None):
        options = {"background":"white"}
        aboutTop = tkinter.Toplevel(**options)
        aboutTop.title("About SEM")
        aboutTop.focus_set()
        
        two_cols = []
        two_cols.append(("author:", "Yoann Dupont"))
        two_cols.append(("mail:", "yoa.dupont@gmail.com"))
        two_cols.append(("website:", "http://www.lattice.cnrs.fr/sites/itellier/SEM.html"))
        two_cols.append(("github:", "https://github.com/YoannDupont/SEM"))
        two_cols.append(("online app:", "apps.lattice.cnrs.fr/sem"))
        
        x = 0
        label = ttk.Label(aboutTop, text=sem.full_name(), **options)
        label.grid(row=x, column=0)
        x += 1
        ttk.Label(aboutTop, text="", **options).grid(row=x, column=0)
        x += 1
        ttk.Label(aboutTop, text="", **options).grid(row=x, column=0)
        x += 1
        for key, val in two_cols:
            label = ttk.Label(aboutTop, text=key, **options)
            label.grid(row=x, column=0, sticky="w")
            label = ttk.Label(aboutTop, text=val, **options)
            label.grid(row=x, column=1, sticky="w")
            x += 1
            ttk.Label(aboutTop, text="", **options).grid(row=x, column=0)
            x += 1
    
    #
    # global methods
    #
    
    def train(self, event=None):
        if self.doc_is_modified:
            update_annotations(self.doc, self.annotation_name, self.current_annotations.annotations)
        
        train_interface = SEMTkTrainInterface(self.corpus_documents)
    
    def handle_char(self, event):
        if self.adder is None:
            return
        if not self.adder.current_annotation:
            try:
                self.text.index("sel.first")
            except tkinter.TclError:
                return
        
        the_type = self.adder.type_from_letter(event.keysym)
        the_type = the_type or self.adder.type_from_letter(event.keysym.lower())
        if the_type is None:
            return
        if event.keysym.islower():
            fst = (self.charindex2position(self.adder.current_annotation.lb) if self.adder.current_annotation else self.text.index("sel.first"))
            lst = (self.charindex2position(self.adder.current_annotation.ub) if self.adder.current_annotation else self.text.index("sel.last"))
            self.wish_to_add = [self.adder.type_from_letter(event.keysym), fst, lst]
            self.add_annotation(event, remove_focus=False)
        else:
            if self.adder.current_annotation is not None:
                start = self.charindex2position(self.adder.current_annotation.lb)
                end = self.charindex2position(self.adder.current_annotation.ub)
            else:
                start, end = ("sel.first", "sel.last")
            try:
                for match in find_occurrences(self.text.get(start, end), self.doc.content, whole_word=self.whole_word):
                    cur_start, cur_end = self.charindex2position(match.start()), self.charindex2position(match.end())
                    if Tag(the_type, match.start(), match.end()) not in self.current_annotations:
                        self.wish_to_add = [the_type, cur_start, cur_end]
                        self.add_annotation(None, remove_focus=False)
            except tkinter.TclError:
                raise
            self.unselect()
    
    def add_annotation(self, event, remove_focus=True):
        cur_type = self.type_combos[self.adder.current_hierarchy_level].get()
        if cur_type.strip() != u"" and cur_type != self.SELECT_TYPE:
            first = "sel.first"
            last = "sel.last"
            value = cur_type.split()[0]
        elif self.wish_to_add is not None:
            value, first, last = self.wish_to_add
        else:
            return
        
        text_select = False
        try:
            text_select = True
            if first == "sel.first":
                first = self.text.index("sel.first")
            if last == "sel.last":
                last = self.text.index("sel.last")
            
        except tkinter.TclError: # no selection
            return
        
        self.doc_is_modified = True
        if self.adder.current_hierarchy_level == 0:
            pos = (self.position2charindex(first), self.position2charindex(last))
            greater = [annot for annot in self.current_annotations if annot.lb<=pos[0] and annot.ub>=pos[1] and value==annot.value]
            tag = Tag(value, pos[0], pos[1])
            tag.levels = [value]
            if tag not in self.current_annotations and len(greater)==0:
                self.text.tag_add(value, first, last)
                self.adder.current_annotation = tag
                index = 0
                for annot in self.current_annotations:
                    if annot.lb > tag.lb:
                        break
                    index += 1
                key = u"{}".format(tag) # TODO: PY2
                item = self.tree.insert(self.tree_ids[self.annotation_name], index, text=u'{0} "{1}" [{2}:{3}]'.format(value, self.doc.content[pos[0] : pos[1]], pos[0], pos[1]))
                self.treeitem2annot[item] = tag
                self.annot2treeitems[self.annotation_name][key] = item
                self.current_annotations.add(tag)
                item2 = self.tree.insert(self.tree_ids["history"], 0, text=u'{0} "{1}" [{2}:{3}]'.format(value, self.doc.content[pos[0] : pos[1]], pos[0], pos[1]))
                self.treeitem2annot[item2] = tag
                self.annot2treeitems["history"][key] = item2
                self.ner2history[item] = item2
                self.adder.current_annotation = tag
            
            self.text.tag_remove("BOLD",  "1.0", 'end')
            self.type_combos[0].current(0)
        else:
            lb = self.position2charindex(first)
            ub = self.position2charindex(last)
            if self.wish_to_add is not None:
                for annot in self.current_annotations:
                    if annot.levels == []:
                        annot.levels = annot.value.split(u".")
                    if annot.getLevel(0) == self.adder.current_annotation.getLevel(0) and annot.lb == lb and annot.ub == ub:
                        tree_item_str = self.locate_tree_item()
                        tree_item =  self.tree.item(tree_item_str)
                        if check_in_tagset(annot.getValue(), self.tagset):
                            annot.setLevel(self.adder.current_hierarchy_level, self.wish_to_add[0])
                            new_text = u'{0} "{1}" [{2}:{3}]'.format(annot.getValue(), self.doc.content[annot.lb : annot.ub], lb, ub)
                            self.tree.item(tree_item_str, text=new_text)
                            prev_item = self.ner2history.get(tree_item_str)
                            if prev_item:
                                self.tree.delete(prev_item)
                                del self.treeitem2annot[prev_item]
                            new_item = self.tree.insert(self.tree_ids["history"], 0, text=new_text)
                            self.ner2history[tree_item_str] = new_item
                            self.treeitem2annot[new_item] = self.treeitem2annot[tree_item_str]
                            self.annot2treeitems["history"][u"{}".format(Tag(annot.getLevel(0), lb, ub))] = new_item # TODO: PY2
        if remove_focus:
            self.wish_to_add = None
            self.adder.current_annotation = None
            self.adder.current_hierarchy_level = 0
            self.update_level()
        else:
            self.text.tag_add("BOLD", first, last)
    
    def click(self, event):
        if self.doc is None or self.adder is None:
            return
        
        self.text.tag_remove("BOLD",  "1.0", 'end')
        prev_selection = self.adder.current_annotation
        self.adder.current_annotation = None
        self.wish_to_add = None
        index = event.widget.index("@{0},{1}".format(event.x, event.y))
        names = list(self.text.tag_names(index))
        charindex = self.position2charindex(index)
        try:
            names.remove("sel")
        except ValueError:
            pass
        
        annotations = [annotation for annotation in self.current_annotations if annotation.lb <= charindex and charindex <= annotation.ub]
        
        if annotations != self.annotations:
            self.annotations = annotations
            self.annotations_tick = -1
        self.annotations_tick += 1
        if self.annotations_tick >= len(self.annotations):
            self.annotations_tick = 0
        if len(self.annotations) > 0:
            curr_annot = self.annotations[self.annotations_tick]
            ci2p = self.charindex2position
            self.text.tag_add("BOLD", ci2p(curr_annot.lb), ci2p(curr_annot.ub))
            self.text.tag_remove(curr_annot.value, ci2p(curr_annot.lb), ci2p(curr_annot.ub))
            self.text.tag_add(curr_annot.value, ci2p(curr_annot.lb), ci2p(curr_annot.ub))
            self.adder.current_annotation = curr_annot
        
        tree_item = self.locate_tree_item()
        if tree_item is not None:
            self.tree.selection_set(tree_item)
            self.tree.focus(tree_item)
            self.select_from_tree()
            self.tree.see(tree_item)
        else:
            self.unselect()
        if len(annotations) == 0 or prev_selection != self.adder.current_annotation:
            self.adder.current_hierarchy_level = 0
            self.update_level()
    
    def locate_tree_item(self):
        if self.adder.current_annotation is None:
            return None
        if self.wish_to_add:
            lb = self.position2charindex(self.wish_to_add[1])
            ub = self.position2charindex(self.wish_to_add[2])
        else:
            lb = self.adder.current_annotation.lb
            ub = self.adder.current_annotation.ub
        ner_root = self.tree.get_children()[0]
        id = None
        value = self.adder.current_annotation.getLevel(0)
        bounds = "[{}:{}]".format(lb, ub)
        for child in self.tree.get_children(ner_root):
            text = self.tree.item(child)["text"]
            ok = text.startswith(value) and text.endswith(bounds)
            if ok:
                return child
        return None
    
    def select_from_tree(self, event=None):
        parent = self.tree.parent(self.tree.selection()[0])
        if not parent: return
        
        annot = self.treeitem2annot[self.tree.selection()[0]]
        lb_str = self.charindex2position(annot.lb)
        ub_str = self.charindex2position(annot.ub)
        
        self.text.tag_remove("BOLD", "1.0", 'end')
        self.text.tag_add("BOLD", lb_str, ub_str)
        self.adder.current_annotation = annot
        self.wish_to_add = None
        
        self.text.mark_set("insert", ub_str)
        self.text.see("insert")
    
    def unselect(self, event=None):
        self.text.tag_remove("BOLD",  "1.0", 'end')
        self.wish_to_add = None
        if self.adder:
            self.adder.current_annotation = None
            self.adder.current_hierarchy_level = 0
            self.update_level()
    
    def delete(self, event):
        self.text.tag_remove("BOLD",  "1.0", 'end')
        
        if self.adder.current_annotation is None: return
        
        value = self.adder.current_annotation.getValue()
        lb = self.adder.current_annotation.lb
        ub = self.adder.current_annotation.ub
        matching = [a for a in self.current_annotations if a.lb==lb and a.ub==ub and a.getValue()==value]
        greater = [a for a in self.current_annotations if a.lb<=lb and a.ub>=ub and a.getValue()==value and not a in matching]
        try:
            greater.remove(matching[0])
        except:
            pass
        for annotation in matching:
            if len(greater) == 0:
                self.text.tag_remove(self.adder.current_annotation.getLevel(0), self.charindex2position(annotation.lb), self.charindex2position(annotation.ub))
            tag = Tag(self.adder.current_annotation.getLevel(0), annotation.lb, annotation.ub)
            key = u"{}".format(tag) # TODO: PY2
            for v in self.annot2treeitems.values():
                item = v.get(key, None)
                if item is not None:
                    self.tree.delete(item)
                    if tag in self.current_annotations:
                        self.current_annotations.remove(self.adder.current_annotation)
                    del v[key]
                    del self.treeitem2annot[item]
        
        self.current_annotations.remove(self.adder.current_annotation)
        self.adder.current_annotation = None
        self.adder.current_hierarchy_level = 0
        self.update_level()
        self.doc_is_modified = True
    
    def delete_all(self, event):
        if self.adder.current_annotation is not None:
            value = self.adder.current_annotation.value
            start = self.adder.current_annotation.lb
            end = self.adder.current_annotation.ub
            for occ in find_occurrences(self.doc.content[start:end], self.doc.content, whole_word=False):
                self.adder.current_annotation = Tag(value, occ.start(), occ.end())
                self.delete(event)
        self.adder.current_annotation = None
        self.adder.current_hierarchy_level = 0
        self.update_level()
        self.text.tag_remove("BOLD",  "1.0", 'end')
        self.doc_is_modified = True
    
    def position2charindex(self, position):
        line, index = [int(e) for e in position.split(".")]
        return sum(self.lines_lengths[:line]) + index
    
    def charindex2position(self, charindex):
        lengths = self.lines_lengths
        cur = 0
        line = 1
        while cur+lengths[line] <= charindex:
            cur += lengths[line]
            line += 1
        offset = charindex - cur
        return u"{0}.{1}".format(line, offset)
    
    def tab(self, event=None):
        self.adder.up_one_level()
        self.update_level()
    
    def shift_tab(self, event=None):
        self.adder.down_one_level()
        self.update_level()
    
    def update_level(self):
        for i in range(self.adder.max_depth()):
            if i != self.adder.current_hierarchy_level:
                self.type_combos[i].configure(state=tkinter.DISABLED)
            else:
                levels = (self.adder.current_annotation.levels if self.adder.current_annotation is not None else [])
                subtrie = self.adder.shortcut_trie.goto(levels)
                keys = sorted([key for key in subtrie if key != NUL])
                for j in range(len(keys)):
                    shortcut = subtrie[keys[j]][NUL]
                    keys[j] += " ({0} or Shift+{0})".format(shortcut)
                self.type_combos[i]["values"] = [self.SELECT_TYPE] + keys
                self.type_combos[i].configure(state="readonly")
    
    def add_document(self, document):
        found = self.doc is not None and any([document.name == doc.name for doc in self.corpus_documents])
        if found:
            id = self.corpus_id2doc[self.doc.name]
            self.corpus_tree.selection_set(id)
            self.corpus_tree.focus(id)
            self.corpus_tree.see(id)
        else:
            id = self.corpus_tree.insert("", len(self.corpus_tree.get_children()), text=document.name)
            self.corpus_id2doc[id] = document
            self.corpus_doc2id[document.name] = id
            self.corpus_documents.append(document)
            self.corpus_tree.selection_set(id)
            self.corpus_tree.focus(id)
            self.corpus_tree.see(id)
        return not found
    
    def load_document(self, event=None, same_doc=False):
        try:
            selection = self.corpus_tree.selection()[0]
        except IndexError:
            return
        
        document = self.corpus_id2doc[selection]
        if self.doc is None or (document.name != self.doc.name or same_doc):
            if self.doc is not None and self.doc_is_modified:
                update_annotations(self.doc, self.annotation_name, self.current_annotations.annotations)
            
            self.doc = document
            
            previous_tree = self.tree_ids.get(self.annotation_name, None)
            if previous_tree:
                self.tree.delete(previous_tree)
            previous_tree = self.tree_ids.get("history", None)
            if previous_tree:
                self.tree.delete(previous_tree)
            
            if self.annotation_name is not None:
                self.tree_ids[self.annotation_name] = self.tree.insert("", len(self.tree_ids)+1, text=self.annotation_name)
                self.tree_ids["history"] = self.tree.insert("", len(self.tree_ids)+1, text="history")
                self.annot2treeitems[self.annotation_name] = {}
            self.annot2treeitems["history"] = {}
            self.treeitem2annot = {}
            self.position2annots = {}
            self.current_annotations = Annotation("CurrentAnnotations")
            if self.adder is not None:
                self.adder.current_annotation = None
            self.wish_to_add = None
            self.lines_lengths = [0] + [len(line)+1 for line in self.doc.content.split(u"\n")]
            
            self.text.configure(state="normal")
            for tag_name in self.text.tag_names():
                self.text.tag_remove(tag_name, "1.0", "end")
            self.text.delete("1.0", "end")
            self.text.insert("end", self.doc.content)
            self.text.tag_remove("BOLD",  "1.0", 'end')
            self.text.configure(state="disabled")
            
            if self.doc.annotation(self.annotation_name):
                annots = self.doc.annotation(self.annotation_name).get_reference_annotations()
                for nth_annot, annot in enumerate(annots):
                    annot.levels = annot.value.split(u".")
                    self.add_tag(annot.levels[0], annot.lb, annot.ub)
                    item = self.tree.insert(self.tree_ids[self.annotation_name], len(self.annot2treeitems[self.annotation_name])+1, text=u'{0} "{1}" [{2}:{3}]'.format(annot.value, self.doc.content[annot.lb : annot.ub], annot.lb, annot.ub))
                    self.annot2treeitems[self.annotation_name][u"{}".format(annot)] = item # TODO: PY2
                    annot.ids[self.annotation_name] = item
                    self.treeitem2annot[item] = annot
                    self.current_annotations.add(annot)
                    if self.adder.shortcut_trie.goto(annot.levels[0]) is None:
                        separator = find_potential_separator(annot.value)
                        if separator is not None:
                            splitted = annot.value.split(separator)
                            for depth, type_to_add in enumerate(splitted,0):
                                self.doc.annotation(self.annotation_name)[nth_annot].setLevel(depth, type_to_add)
                        else:
                            self.doc.annotation(self.annotation_name)[nth_annot].setLevel(0, annot.value)
        self.doc_is_modified = False
    
    def load_tagset(self, filename):
        if self.doc and self.doc_is_modified:
            update_annotations(self.doc, self.annotation_name, self.current_annotations.annotations)
        
        tagset_name = os.path.splitext(os.path.basename(filename))[0]
        tagset = []
        with codecs.open(filename, "rU", "utf-8") as I:
            for line in I:
                tagset.append(line.strip())
        tagset = [tag.split(u"#",1)[0] for tag in tagset]
        tagset = [tag for tag in tagset if tag != u""]
        
        self.spare_colors = self.SPARE_COLORS_DEFAULT[:]
        self.annotation_name = tagset_name
        self.tagset = set(tagset)
        
        for combo in self.type_combos:
            combo.destroy()
        self.type_combos = []
        for add_type_lbl in self.add_type_lbls:
            add_type_lbl.destroy()
        self.add_type_lbls = [ttk.Label(self.toolbar, text="add type:")]
        self.add_type_lbls[0].pack(side="left")
        
        for child in self.tree.get_children():
            self.tree.delete(child)
        self.tree_ids[self.annotation_name] = self.tree.insert("", len(self.tree_ids)+1, text=self.annotation_name)
        self.tree_ids["history"] = self.tree.insert("", len(self.tree_ids)+1, text="history")
        self.annot2treeitems[self.annotation_name] = {}
        
        self.type_combos.append(ttk.Combobox(self.toolbar))
        self.type_combos[0]["values"] = [self.SELECT_TYPE]
        self.type_combos[0].bind("<<ComboboxSelected>>", self.add_annotation)
        self.type_combos[0].pack(side="left")
        
        self.adder = Adder2.from_tagset(tagset)
        for depth in range(self.adder.max_depth()):
            ## label
            self.add_type_lbls.append(ttk.Label(self.toolbar, text="add {0}type:".format("sub"*(depth))))
            self.add_type_lbls[depth].pack(side="left")
            # combobox
            self.type_combos.append(ttk.Combobox(self.toolbar))
            self.type_combos[depth]["values"] = [self.SELECT_TYPE]
            self.type_combos[depth].bind("<<ComboboxSelected>>", self.add_annotation)
            self.type_combos[depth].pack(side="left")
            for tag in sorted(set([t[depth] for t in self.adder.levels if len(t) > depth])):
                if len(self.type_combos) > 0:
                    self.type_combos[depth]["values"] = list(self.type_combos[depth]["values"]) + [tag]
                if depth == 0:
                    if len(self.spare_colors) > 0:
                        self.color = self.spare_colors.pop()
                    else:
                        self.color = random_color()
                    self.text.tag_configure(tag, **self.color)
        self.update_level()
        self.doc = None
        self.load_document()
    
    def load_tagset_gui(self, event=None):
        filename = tkinter.filedialog.askopenfilename(filetypes=[("text files", ".txt"), ("All files", ".*")], initialdir=os.path.join(sem.SEM_DATA_DIR, "resources", "tagsets"))
        
        if len(filename) == 0: return
        
        self.load_tagset(filename)
    
    def find_in_text(self, event=None):
        self.search.find_in_text(event=event)
    
    def load_pipeline(self, event=None):
        top = tkinter.Toplevel()
        master_selector = SemTkMasterSelector(top, os.path.join(sem.SEM_DATA_DIR, "resources"))
        lang_selector = SemTkLangSelector(top, os.path.join(sem.SEM_DATA_DIR, "resources"))
        lang_selector.master_selector = master_selector
        vars_cur_row = 0
        vars_cur_row, _ = lang_selector.grid(row=vars_cur_row, column=0)
        vars_cur_row, _ = master_selector.grid(row=vars_cur_row, column=0)
        
        def cancel(event=None):
            if self.pipeline is not None:
                self.tag_document_btn.configure(state=tkinter.NORMAL)
            top.destroy()
        def ok(event=None):
            path = master_selector.workflow()
            pipeline, _, _, _ = sem.modules.tagger.load_master(path)
            self.pipeline = pipeline
            cancel()
        
        ok_btn = ttk.Button(top, text="load workflow", command=ok)
        ok_btn.grid(row=vars_cur_row, column=0)
        cancel_btn = ttk.Button(top, text="cancel", command=cancel)
        cancel_btn.grid(row=vars_cur_row, column=1)
    
    def tag_document(self, event=None):
        if self.pipeline is None:
            return
        
        self.pipeline.process_document(self.doc)
        self.doc_is_modified = True
        for key in self.doc.annotations:
            annotation = self.doc.annotation(key)
            self.doc.add_annotation(Annotation(annotation.name, annotations=annotation.get_reference_annotations()))
        self.current_annotations = self.doc.annotation(self.annotation_name)
        self.load_document(same_doc=True)

_subparsers = sem.argument_subparsers

parser = _subparsers.add_parser(os.path.splitext(os.path.basename(__file__))[0], description="An annotation tool for SEM.")

parser.add_argument("-d", "--documents", nargs="*",
                    help="Documents to load at startup.")
parser.add_argument("-t", "--tagset",
                    help="The tagser to load at startup.")
parser.add_argument("-l", "--log", dest="log_level", choices=("DEBUG","INFO","WARNING","ERROR","CRITICAL"), default="WARNING",
                    help="Increase log level (default: %(default)s)")

def main(args):
    root = tkinter.Tk()
    root.title("SEM")
    AnnotationTool(root, args.log_level, documents=args.documents, tagset=args.tagset).pack(expand=1, fill="both")
    root.mainloop()