import nltk
from nltk import word_tokenize
from nltk import WordNetLemmatizer
from collections import Counter
from nltk import NaiveBayesClassifier, classify

def Process(data):
  lemmatizer = WordNetLemmatizer()
  return [lemmatizer.lemmatize(word.lower()) for word in word_tokenize(unicode(sentence,errors='ignore'))]
 

def Features_Extraction(text, setting):
  if setting=='bow':
   # Bow means bag-of-words
    return {word: count for word, count in Counter(Process(text)).items() if not word in stop}
  else:
    return {word: True for word in Process(text) if not word in stop}
    
features = [(Features_Extraction(email, 'bow'), label) for (email, label)
in emails]

def training_Model (Features, samples):
  Size = int(len(Features) * samples)
  training , testing = Features[:Size], Features[Size:]
  print ('Training = ' + str(len(training)) + ' emails')
  print ('Testing = ' + str(len(testing)) + ' emails')
  
classifier = NaiveBayesClassifier.train(training)

def evaluate(training, tesing, classifier):
  print ('Training Accuracy is ' + str(classify.accuracy(classifier,train_set)))
  print ('Testing Accuracy i ' + str(classify.accuracy(classifier,test_set)))