python source code of didineuralmodel

"""Didi nerual network model implemented by using tensorflow"""

import sys
import os
sys.path.insert(0, os.path.abspath('..'))

import tensorflow as tf
import numpy as np
import logging
from bokeh.util.logconfig import level
import sys
from utility.tfbasemodel import TFModel
from preprocess.preparedata import PrepareData
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from evaluation.sklearnmape import mean_absolute_percentage_error
from utility.earlystopmonitor import EarlyStopMonitor

class DididNeuralNetowrk(TFModel, PrepareData, EarlyStopMonitor):
    def __init__(self):
        TFModel.__init__(self)
        PrepareData.__init__(self)
        EarlyStopMonitor.__init__(self)
        self.num_steps = 30000
        self.batch_size = 128
        self.early_stopping_rounds = None
        self.summaries_dir = './logs/didi'
        self.dropout= 0.9
        self.usedFeatures = [101,102,103,104,105,     #106,107, 
                             201, 204, #205,206, 203,
                             301,
                             401,402,
                             501,502,503,      #504,505,506,507,
                            601,602,603,604,605,606,
                             8801,8802
                             ]
        self.train_validation_foldid = -2 #0.415
#         self.train_validation_foldid = -1 #0.425
#         self.train_validation_foldid = -3 #0.430
        logging.getLogger().addHandler(logging.FileHandler('logs/didnerual.log', mode='w'))
        return
    def add_visualize_node(self):
        # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
        self.merged = tf.merge_all_summaries()
        self.train_writer = tf.train.SummaryWriter(self.summaries_dir+ '/train',
                                        self.graph)
        self.test_writer = tf.train.SummaryWriter(self.summaries_dir + '/test')

        return
    def get_input(self):
        # Input data.
        # Load the training, validation and test data into constants that are
        # attached to the graph.
        self.x_train, self.y_train,self.x_validation,self.y_validation = self.get_train_validationset()
        self.x_train, self.y_train,self.x_validation,self.y_validation = self.x_train.as_matrix(), self.y_train.as_matrix().reshape((-1,1)),\
                                                                         self.x_validation.as_matrix(),self.y_validation.as_matrix().reshape((-1,1))
#         self.x_train, self.y_train,self.x_validation,self.y_validation = self.x_train.astype(np.float32), self.y_train.astype(np.float32),\
#                                                                          self.x_validation.astype(np.float32),self.y_validation.astype(np.float32)
        sc = MinMaxScaler()
        sc.fit(self.x_train)
        self.x_train= sc.transform(self.x_train)
        self.x_validation= sc.transform(self.x_validation)
        
        self.inputlayer_num = len(self.get_used_features())
        self.outputlayer_num = 1
        
        # Input placehoolders
        with tf.name_scope('input'):
            self.x = tf.placeholder(tf.float32, [None, self.inputlayer_num], name='x-input')
            self.y_true = tf.placeholder(tf.float32, [None, self.outputlayer_num ], name='y-input')
        self.keep_prob = tf.placeholder(tf.float32, name='drop_out')
        
        return
    def add_inference_node(self):
        #output node self.pred
        hidden1 = self.nn_layer(self.x, 500, 'layer1')
        dropped = self.dropout_layer(hidden1)
        
        hidden1 = self.nn_layer(dropped, 300, 'layer2')
        dropped = self.dropout_layer(hidden1)
        
        self.y_pred = self.nn_layer(dropped, self.outputlayer_num , 'layer3')
        return
    def add_loss_node(self):
        #output node self.loss
 
        self.__add_mape_loss()
#         self.__add_mse_loss()
        return
    def __add_mse_loss(self):
        with tf.name_scope('loss'):
            diff = tf.square(self.y_true - self.y_pred)
            with tf.name_scope('mse'):
                self.loss = tf.reduce_mean(diff)
            tf.scalar_summary('mse', self.loss)
        return
    def __add_mape_loss(self):
        with tf.name_scope('loss'):
            diff = tf.abs((self.y_true - self.y_pred)/self.y_true)
            with tf.name_scope('mape'):
                self.loss = tf.reduce_mean(tf.cast(diff, tf.float32))
            tf.scalar_summary('loss', self.loss)
        return
    def add_optimizer_node(self):
        #output node self.train_step
        with tf.name_scope('train'):
            self.train_step = tf.train.AdamOptimizer(5.0e-4).minimize(self.loss)
        return
    def add_accuracy_node(self):
        #output node self.accuracy
        with tf.name_scope('evaluationmetrics'):
            with tf.name_scope('error_square'):
                error_square = tf.abs((self.y_true - self.y_pred)/self.y_true)
            with tf.name_scope('mape'):
                self.accuracy = tf.reduce_mean(tf.cast(error_square, tf.float32))
            tf.scalar_summary('mape', self.accuracy)
        return
    def add_evalmetrics_node(self):
        self.add_accuracy_node()
        return
    def feed_dict(self,feed_type):
        """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
        if feed_type == "train":
            xs, ys = self.get_next_batch(self.x_train, self.y_train, self.batch_size)
            k = self.dropout
            return {self.x: xs, self.y_true: ys, self.keep_prob: k}
        if feed_type == "validation":
            xs, ys = self.x_validation, self.y_validation
            k = 1.0
            return {self.x: xs, self.y_true: ys, self.keep_prob: k}
        if feed_type == "validation_wholetrain":
            xs, ys = self.x_train, self.y_train
            k = 1.0
            return {self.x: xs, self.y_true: ys, self.keep_prob: k}
        # Now we are feeding test data into the neural network
        if feed_type == "test":
            xs= self.x_test
            k = 1.0
            return {self.x: xs, self.keep_prob: k}

    def run_graph(self):
        logging.debug("computeGraph")
        with tf.Session(graph=self.graph) as sess:
            tf.initialize_all_variables().run()
            logging.debug("Initialized")
            for step in range(1, self.num_steps + 1):
                summary, _ , train_loss, train_metrics= sess.run([self.merged, self.train_step, self.loss, self.accuracy], feed_dict=self.feed_dict("train"))
                self.train_writer.add_summary(summary, step)
                
                if step % 100 == 0:
                    summary, validation_loss, validation_metrics = sess.run([self.merged, self.loss, self.accuracy], feed_dict=self.feed_dict("validation"))
                    self.test_writer.add_summary(summary, step)
#                     loss_train = sess.run(self.loss, feed_dict=self.feed_dict("validation_wholetrain"))
                    logging.info("Step {}/{}, train/test: {:.3f}/{:.3f}, train/test loss: {:.3f}/{:.3f}".format(step, self.num_steps, train_metrics, validation_metrics,\
                                                                                                                train_loss, validation_loss))
                    if self.get_stop_decisision(step, -validation_metrics):
                        logging.info("stop here due to early stopping")
                        return 
    
#                     y_pred = sess.run(self.y_pred, feed_dict=self.feed_dict("validation"))
#                     logging.info("validation mape :{:.3f}".format(mean_absolute_percentage_error(self.y_validation.reshape(-1), y_pred.reshape(-1))))
        return


if __name__ == "__main__":   
    obj= DididNeuralNetowrk()
    obj.run()