_author__ = 'Santanu Pattanayak'

import numpy as np
np.random.seed(1000)

import os
import glob
import cv2
import datetime
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.metrics import cohen_kappa_score
from keras.models import Sequential,Model
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import GlobalMaxPooling2D,GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import log_loss
import keras
from keras import __version__ as keras_version
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers 
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, Callback
from keras.applications.resnet50 import preprocess_input
import h5py
import argparse
from sklearn.externals import joblib
import json


class TransferLearning:


	def __init__(self):
		parser = argparse.ArgumentParser(description='Process the inputs')
		parser.add_argument('--path',help='image directory')
		parser.add_argument('--class_folders',help='class images folder names')
		parser.add_argument('--dim',type=int,help='Image dimensions to process')
		parser.add_argument('--lr',type=float,help='learning rate',default=1e-4)
		parser.add_argument('--batch_size',type=int,help='batch size')
		parser.add_argument('--epochs',type=int,help='no of epochs to train')
		parser.add_argument('--initial_layers_to_freeze',type=int,help='the initial layers to freeze')
		parser.add_argument('--model',help='Standard Model to load',default='InceptionV3')
		parser.add_argument('--folds',type=int,help='num of cross validation folds',default=5)
		parser.add_argument('--outdir',help='output directory')
		
		
		args = parser.parse_args()
		self.path = args.path
		self.class_folders = json.loads(args.class_folders)
		self.dim  = int(args.dim)
		self.lr   = float(args.lr)
		self.batch_size = int(args.batch_size)
		self.epochs =  int(args.epochs)
		self.initial_layers_to_freeze = int(args.initial_layers_to_freeze)
		self.model = args.model
		self.folds = int(args.folds)
		self.outdir = args.outdir
	
	
	def get_im_cv2(self,path,dim=224):
		img = cv2.imread(path)
		resized = cv2.resize(img, (dim,dim), cv2.INTER_LINEAR)
		return resized

	# Pre Process the Images based on the ImageNet pre-trained model Image transformation
	def pre_process(self,img):
		img[:,:,0] = img[:,:,0] - 103.939
		img[:,:,1] = img[:,:,0] - 116.779
		img[:,:,2] = img[:,:,0] - 123.68
		return img
	   
	# Function to build X, y in numpy format based on the train/validation datasets
	def read_data(self,class_folders,path,num_class,dim,train_val='train'):
		print(train_val)
		train_X,train_y = [],[] 
		for c in class_folders:
			path_class = path + str(train_val) + '/' + str(c)
			file_list = os.listdir(path_class) 
			for f in file_list:
				img = self.get_im_cv2(path_class + '/' + f)
				img = self.pre_process(img)
				train_X.append(img)
				label = int(c.split('class')[1])
				train_y.append(int(label))
		train_y = keras.utils.np_utils.to_categorical(np.array(train_y),num_class) 
		return np.array(train_X),train_y
		
	def inception_pseudo(self,dim=224,freeze_layers=30,full_freeze='N'):
		model = InceptionV3(weights='imagenet',include_top=False)
		x = model.output
		x = GlobalAveragePooling2D()(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		out = Dense(5,activation='softmax')(x)
		model_final = Model(input = model.input,outputs=out)
		if full_freeze != 'N':
			for layer in model.layers[0:freeze_layers]:
				layer.trainable = False
		return model_final

	# ResNet50 Model for transfer Learning 
	def resnet_pseudo(self,dim=224,freeze_layers=10,full_freeze='N'):
		model = ResNet50(weights='imagenet',include_top=False)
		x = model.output
		x = GlobalAveragePooling2D()(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		out = Dense(5,activation='softmax')(x)
		model_final = Model(input = model.input,outputs=out)
		if full_freeze != 'N':
			for layer in model.layers[0:freeze_layers]:
				layer.trainable = False
		return model_final

	# VGG16 Model for transfer Learning 

	def VGG16_pseudo(self,dim=224,freeze_layers=10,full_freeze='N'):
		model = VGG16(weights='imagenet',include_top=False)
		x = model.output
		x = GlobalAveragePooling2D()(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		x = Dense(512, activation='relu')(x)
		x = Dropout(0.5)(x)
		out = Dense(5,activation='softmax')(x)
		model_final = Model(input = model.input,outputs=out)
		if full_freeze != 'N':
			for layer in model.layers[0:freeze_layers]:
				layer.trainable = False
		return model_final


	def train_model(self,train_X,train_y,n_fold=5,batch_size=16,epochs=40,dim=224,lr=1e-5,model='ResNet50'):
		model_save_dest = {}
		k = 0
		kf = KFold(n_splits=n_fold, random_state=0, shuffle=True)

		for train_index, test_index in kf.split(train_X):


			k += 1 
			X_train,X_test = train_X[train_index],train_X[test_index]
			y_train, y_test = train_y[train_index],train_y[test_index]
			
			if model == 'Resnet50':
				model_final = self.resnet_pseudo(dim=224,freeze_layers=10,full_freeze='N')
			
			if model == 'VGG16':
				model_final = self.VGG16_pseudo(dim=224,freeze_layers=10,full_freeze='N') 
			
			if model == 'InceptionV3':
				model_final = self.inception_pseudo(dim=224,freeze_layers=10,full_freeze='N')
			
			datagen = ImageDataGenerator(
					horizontal_flip = True,
					vertical_flip = True,
					width_shift_range = 0.1,
					height_shift_range = 0.1,
					channel_shift_range=0,
					zoom_range = 0.2,
					rotation_range = 20)
			  
				
			adam = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
			model_final.compile(optimizer=adam, loss=["categorical_crossentropy"],metrics=['accuracy'])
			reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.50,
			     		                                  patience=3, min_lr=0.000001)
			
			callbacks = [
						EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1),
					    CSVLogger('keras-5fold-run-01-v1-epochs_ib.log', separator=',', append=False),reduce_lr,
						ModelCheckpoint(
								'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) + '-run-' + str('%02d' % (1 + 1)) + '.check',
								monitor='val_loss', mode='min',
								save_best_only=True,
								verbose=1)]
								
			model_final.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size),
			steps_per_epoch=X_train.shape[0]/batch_size,epochs=epochs,verbose=1,
			validation_data=(X_test,y_test),callbacks=callbacks,
                                          class_weight={0:0.012,1:0.12,2:0.058,3:0.36,4:0.43})
		 
			model_name = 'kera1-5fold-run-01-v1-fold-' + str('%02d' % (k + 1)) + '-run-' + str('%02d' % (1 + 1)) + '.check'
			del model_final
			f = h5py.File(model_name, 'r+')
			del f['optimizer_weights']
			f.close()
			model_final = keras.models.load_model(model_name)
			model_name1 = self.outdir + str(model) + '___' + str(k) 
			model_final.save(model_name1)
			model_save_dest[k] = model_name1
				
		return model_save_dest

	# Hold out dataset validation function

	def inference_validation(self,test_X,test_y,model_save_dest,n_class=5,folds=5):
		pred = np.zeros((len(test_X),n_class))

		for k in range(1,folds + 1):
			model = keras.models.load_model(model_save_dest[k])
			pred = pred + model.predict(test_X)
		pred = pred/(1.0*folds) 
		pred_class = np.argmax(pred,axis=1) 
		act_class = np.argmax(test_y,axis=1)
		accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
		kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
		return pred_class,accuracy,kappa   
	
	def main(self):
		start_time = time.time()
		print('Data Processing..')
		self.num_class = len(self.class_folders)
		train_X,train_y = self.read_data(self.class_folders,self.path,self.num_class,self.dim,train_val='train')
		self.model_save_dest = self.train_model(train_X,train_y,n_fold=self.folds,batch_size=self.batch_size,
                                                        epochs=self.epochs,dim=self.dim,lr=self.lr,model=self.model)
		print("Model saved to dest:",self.model_save_dest)
		test_X,test_y = self.read_data(self.class_folders,self.path,self.num_class,self.dim,train_val='validation')
		_,accuracy,kappa = self.inference_validation(test_X,test_y,self.model_save_dest,n_class=self.num_class,folds=self.folds)
		joblib.dump(self.model_save_dest,self.outdir  + "dict_model.pkl")
		print("-----------------------------------------------------")
		print("Kappa score:", kappa)
		print("accuracy:", accuracy) 
		print("End of training")
		print("-----------------------------------------------------")
		print("Processing Time",time.time() - start_time,' secs')
		
if __name__ == "__main__":
	obj = TransferLearning()
	obj.main()