_author__ = 'Santanu Pattanayak'

import numpy as np
np.random.seed(1000)

import os
import glob
import cv2
import datetime
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.metrics import cohen_kappa_score
from keras.models import Sequential,Model
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import GlobalMaxPooling2D,GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import log_loss
import keras
from keras import __version__ as keras_version
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers 
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, Callback
from keras.applications.resnet50 import preprocess_input
import h5py
import argparse
from sklearn.externals import joblib
import json
import keras 
from pathlib import Path

def get_im_cv2(path,dim=224):
    img = cv2.imread(path)
    resized = cv2.resize(img, (dim,dim), cv2.INTER_LINEAR)
    return resized

# Pre Process the Images based on the ImageNet pre-trained model Image transformation
def pre_process(img):
    img[:,:,0] = img[:,:,0] - 103.939
    img[:,:,1] = img[:,:,0] - 116.779
    img[:,:,2] = img[:,:,0] - 123.68
    return img
	   

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self,files,labels,batch_size=32,n_classes=5,dim=(224,224,3),shuffle=True):
        'Initialization'
        self.labels = labels
        self.files = files
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.dim = dim
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.files) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of files to be processed in the batch
        list_files = [self.files[k] for k in indexes]
        labels    = [self.labels[k] for k in indexes] 

        # Generate data
        X, y = self.__data_generation(list_files,labels)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.files))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self,list_files,labels):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization

        X = np.empty((len(list_files),self.dim[0],self.dim[1],self.dim[2]))
        y = np.empty((len(list_files)),dtype=int)
     #   print(X.shape,y.shape)

        # Generate data
        k = -1 
        for i,f in enumerate(list_files):
            #      print(f)
            img = get_im_cv2(f,dim=self.dim[0])
            img = pre_process(img)
            label = labels[i]
            #label = keras.utils.np_utils.to_categorical(label,self.n_classes)
            X[i,] = img
            y[i,] = label
       # print(X.shape,y.shape)    
        return X,y



class TransferLearning:


	def __init__(self):
		parser = argparse.ArgumentParser(description='Process the inputs')
		parser.add_argument('--path',help='image directory')
		parser.add_argument('--class_folders',help='class images folder names')
		parser.add_argument('--dim',type=int,help='Image dimensions to process')
		parser.add_argument('--lr',type=float,help='learning rate',default=1e-4)
		parser.add_argument('--batch_size',type=int,help='batch size')
		parser.add_argument('--epochs',type=int,help='no of epochs to train')
		parser.add_argument('--initial_layers_to_freeze',type=int,help='the initial layers to freeze')
		parser.add_argument('--model',help='Standard Model to load',default='InceptionV3')
		parser.add_argument('--folds',type=int,help='num of cross validation folds',default=5)
		parser.add_argument('--mode',help='train or validation',default='train')
		parser.add_argument('--model_save_dest',help='dict wit model paths')
		parser.add_argument('--outdir',help='output directory')
		
		args = parser.parse_args()
		self.path = args.path
		self.class_folders = json.loads(args.class_folders)
		self.dim  = int(args.dim)
		self.lr   = float(args.lr)
		self.batch_size = int(args.batch_size)
		self.epochs =  int(args.epochs)
		self.initial_layers_to_freeze = int(args.initial_layers_to_freeze)
		self.model = args.model
		self.folds = int(args.folds)
		self.mode = args.mode
		self.model_save_dest = args.model_save_dest
		self.outdir = args.outdir
	
	
	def get_im_cv2(self,path,dim=224):
		img = cv2.imread(path)
		resized = cv2.resize(img, (dim,dim), cv2.INTER_LINEAR)
		return resized

	# Pre Process the Images based on the ImageNet pre-trained model Image transformation
	def pre_process(self,img):
		img[:,:,0] = img[:,:,0] - 103.939
		img[:,:,1] = img[:,:,0] - 116.779
		img[:,:,2] = img[:,:,0] - 123.68
		return img
	   
	# Function to build X, y in numpy format based on the train/validation datasets
	def read_data(self,class_folders,path,num_class,dim,train_val='train'):
		labels = []
		file_list = []
		for c in class_folders:
			path_class = path + str(train_val) + '/' + str(c)
			files = os.listdir(path_class)
			files = [(path_class + '/' + f) for f in files]
			file_list += files
			labels += len(files)*[int(c.split('class')[1])]

		return file_list,labels
		
	def inception_pseudo(self,dim=224,freeze_layers=30,full_freeze='N'):
		model = InceptionV3(weights='imagenet',include_top=False)
		x = model.output
		x = GlobalAveragePooling2D()(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		out = Dense(1)(x)
		model_final = Model(input = model.input,outputs=out)
		if full_freeze != 'N':
			for layer in model.layers[0:freeze_layers]:
				layer.trainable = False
		return model_final

	# ResNet50 Model for transfer Learning 
	def resnet_pseudo(self,dim=224,freeze_layers=10,full_freeze='N'):
		model = ResNet50(weights='imagenet',include_top=False)
		x = model.output
		x = GlobalAveragePooling2D()(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		out = Dense(1)(x)
		model_final = Model(input = model.input,outputs=out)
		if full_freeze != 'N':
			for layer in model.layers[0:freeze_layers]:
				layer.trainable = False
		return model_final

	# VGG16 Model for transfer Learning 

	def VGG16_pseudo(self,dim=224,freeze_layers=10,full_freeze='N'):
		model = VGG16(weights='imagenet',include_top=False)
		x = model.output
		x = GlobalAveragePooling2D()(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		out = Dense(1)(x)
		model_final = Model(input = model.input,outputs=out)
		if full_freeze != 'N':
			for layer in model.layers[0:freeze_layers]:
				layer.trainable = False
		return model_final


	def train_model(self,file_list,labels,n_fold=5,batch_size=16,epochs=40,dim=224,lr=1e-5,model='ResNet50'):
		model_save_dest = {}
		k = 0
		kf = KFold(n_splits=n_fold, random_state=0, shuffle=True)

		for train_index,test_index in kf.split(file_list):


			k += 1
			file_list = np.array(file_list)
			labels   = np.array(labels)
			train_files,train_labels  = file_list[train_index],labels[train_index]
			val_files,val_labels  = file_list[test_index],labels[test_index]
			
			if model == 'Resnet50':
				model_final = self.resnet_pseudo(dim=224,freeze_layers=10,full_freeze='N')
			
			if model == 'VGG16':
				model_final = self.VGG16_pseudo(dim=224,freeze_layers=10,full_freeze='N') 
			
			if model == 'InceptionV3':
				model_final = self.inception_pseudo(dim=224,freeze_layers=10,full_freeze='N')
				
			adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
			model_final.compile(optimizer=adam, loss=["mse"],metrics=['mse'])
			reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.50,patience=3, min_lr=0.000001)
			early = EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)
			logger = CSVLogger('keras-5fold-run-01-v1-epochs_ib.log', separator=',', append=False)
			checkpoint = ModelCheckpoint(
								'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) + '-run-' + str('%02d' % (1 + 1)) + '.check',
								monitor='val_loss', mode='min',
								save_best_only=True,
								verbose=1) 
			callbacks = [reduce_lr,early,checkpoint,logger]
			train_gen = DataGenerator(train_files,train_labels,batch_size=32,n_classes=len(self.class_folders),dim=(self.dim,self.dim,3),shuffle=True)
			val_gen = DataGenerator(val_files,val_labels,batch_size=32,n_classes=len(self.class_folders),dim=(self.dim,self.dim,3),shuffle=True)
			model_final.fit_generator(train_gen,epochs=epochs,verbose=1,validation_data=(val_gen),callbacks=callbacks)
			model_name = 'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) + '-run-' + str('%02d' % (1 + 1)) + '.check'
			del model_final
			f = h5py.File(model_name, 'r+')
			del f['optimizer_weights']
			f.close()
			model_final = keras.models.load_model(model_name)
			model_name1 = self.outdir + str(model) + '___' + str(k) 
			model_final.save(model_name1)
			model_save_dest[k] = model_name1
				
		return model_save_dest

	# Hold out dataset validation function

	def inference_validation(self,test_X,test_y,model_save_dest,n_class=5,folds=5):
		print(test_X.shape,test_y.shape)
		pred = np.zeros(test_X.shape[0])
		for k in range(1,folds + 1):
			print(f'running inference on fold: {k}')
			model = keras.models.load_model(model_save_dest[k])
			pred = pred + model.predict(test_X)[:,0]
			pred = pred
			print(pred.shape)
			print(pred)
		pred = pred/float(folds)
		pred_class = np.round(pred)
		pred_class = np.array(pred_class,dtype=int)
		pred_class = list(map(lambda x:4 if x > 4 else x,pred_class))
		pred_class = list(map(lambda x:0 if x < 0 else x,pred_class))
		act_class = test_y 
		accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
		kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
		return pred_class,accuracy,kappa   
	
	def main(self):
		start_time = time.time()
		self.num_class = len(self.class_folders)
		if self.mode == 'train':
			print("Data Processing..")
			file_list,labels= self.read_data(self.class_folders,self.path,self.num_class,self.dim,train_val='train')
			print(len(file_list),len(labels))
			print(labels[0],labels[-1])
			self.model_save_dest = self.train_model(file_list,labels,n_fold=self.folds,batch_size=self.batch_size,
                                                        epochs=self.epochs,dim=self.dim,lr=self.lr,model=self.model)
			joblib.dump(self.model_save_dest,f'{self.outdir}/model_dict.pkl')
			print("Model saved to dest:",self.model_save_dest)
		else:
			model_save_dest = joblib.load(self.model_save_dest)
			print('Models loaded from:',model_save_dest)
            # Do inference/validation
			test_files,test_y = self.read_data(self.class_folders,self.path,self.num_class,self.dim,train_val='validation')
			test_X = []
			for f in test_files:
				img = self.get_im_cv2(f)
				img = self.pre_process(img)
				test_X.append(img)
			test_X = np.array(test_X)
			test_y = np.array(test_y)
			print(test_X.shape)
			print(len(test_y))
			pred_class,accuracy,kappa = self.inference_validation(test_X,test_y,model_save_dest,n_class=self.num_class,folds=self.folds)
			results_df = pd.DataFrame()
			results_df['file_name'] = test_files
			results_df['target'] = test_y
			results_df['prediction'] = pred_class
			results_df.to_csv(f'{self.outdir}/val_resuts_reg.csv',index=False)
			
			print("-----------------------------------------------------")
			print("Kappa score:", kappa)
			print("accuracy:", accuracy) 
			print("End of training")
			print("-----------------------------------------------------")
			print("Processing Time",time.time() - start_time,' secs')
		
if __name__ == "__main__":
	obj = TransferLearning()
	obj.main()