import argparse
import sys, os
sys.path.append("../")

import cv2
import numpy as np

import face_detection_utilities as fdu

import model.myVGG as vgg
import speech_recognition as sr

windowsName = 'Preview Screen'

parser = argparse.ArgumentParser(description='A live emotion recognition from webcam')

args = parser.parse_args()
FACE_SHAPE = (48, 48)

model = vgg.VGG_16('my_model_weights_83.h5')
#model = vgg.VGG_16()

emo     = ['Angry', 'Fear', 'Happy',
           'Sad', 'Surprise', 'Neutral']

def speechRecognition():
    # obtain audio from the microphone 
    print("Press 'y' to start~")
    inputdata = input()
    if inputdata == 'y':
        inputdata = 0
        r = sr.Recognizer()
        with sr.Microphone() as source:
            print("Say something!")
            audio = r.listen(source)
        # recognize speech using Google Speech Recognition
        try:
        # for testing purposes, we're just using the default API key
        # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
        # instead of `r.recognize_google(audio)`
            recSuccess = 1
            recContent = r.recognize_google(audio)
            print("Speech Recognition thinks you said " + recContent)#,language="cmn-Hant-TW")
            return recContent
        except sr.UnknownValueError:
            print("Could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))

def refreshFrame(frame, faceCoordinates):
    if faceCoordinates is not None:
        fdu.drawFace(frame, faceCoordinates)
    cv2.imshow(windowsName, frame)


def showScreenAndDectect(capture):
    print("Face dececting...")
    cnt = 5;
    while cnt:
        flag, frame = capture.read()
        faceCoordinates = fdu.getFaceCoordinates(frame)
        refreshFrame(frame, faceCoordinates)
        
        if faceCoordinates is not None:
            cnt -= 1
            face_img = fdu.preprocess(frame, faceCoordinates, face_shape=FACE_SHAPE)
            #cv2.imshow(windowsName, face_img)

            input_img = np.expand_dims(face_img, axis=0)
            input_img = np.expand_dims(input_img, axis=0)

            result = model.predict(input_img)[0]
            if cnt == 4:
                tot_result = result
            else:
                tot_result += result
            index = np.argmax(result)
            print ('Frame',5-cnt,':', emo[index], 'prob:', max(result))
            #index = np.argmax(result)
            #print (emo[index], 'prob:', max(result))
            # print(face_img.shape)
            # emotion = class_label[result_index]
            # print(emotion)
    index = np.argmax(tot_result)
    print ('Final decision:',emo[index], 'prob:', max(tot_result))
    return emo[index]

def getCameraStreaming():
    capture = cv2.VideoCapture(0)
    if not capture:
        print("Failed to capture video streaming ")
        sys.exit(1)
    else:
        print("Successed to capture video streaming")
        
    return capture

def main():
    '''
    Arguments to be set:
        showCam : determine if show the camera preview screen.
    '''
    print("Enter main() function")
    
    capture = getCameraStreaming()

    cv2.startWindowThread()
    cv2.namedWindow(windowsName, cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty(windowsName, cv2.WND_PROP_FULLSCREEN, cv2.WND_PROP_FULLSCREEN)
    
    while True:
        recContent = speechRecognition()
        if recContent is not None:
            emotion = showScreenAndDectect(capture)
            if emotion == "Angry":
                emoji = " >:O"
            elif emotion == "Fear":
                emoji = " :-S"
            elif emotion == "Happy":
                emoji = " :-D"
            elif emotion == "Sad":
                emoji = " :'("
            elif emotion == "Surprise":
                emoji = " :-O"
            else:
                emoji = " "
            print("Output result: " + recContent + emoji)

if __name__ == '__main__':
    main()