# -*- coding: utf-8 -*-
"""

Here are the sequence generators
including LSTM generator and Hawkes generator

@author: hongyuan
"""

import pickle
import time
import numpy
import theano
from theano import sandbox
import theano.tensor as tensor
import os
#import scipy.io
from collections import defaultdict
from theano.tensor.shared_randomstreams import RandomStreams
import utils
import struct

dtype=theano.config.floatX

class HawkesGen(object):
    '''
    here is the sequence generator using Hawkes process
    '''
    def __init__(self, settings):
        '''
        we follow the definition of multivariate Hawkes process
        mu is the base intensity and
        alpha is the effect matrix and
        delta is the decay matrix
        we randomly sample mu, alpha, delta
        '''
        self.args = settings['args']
        self.sum_for_time = settings['sum_for_time']
        numpy.random.seed(
            settings['seed_random']
        )
        print "initializing ... "
        if settings['path_pre_train'] == None:
            self.dim_process = settings['dim_process']
            self.mu = numpy.float32(
                numpy.random.uniform(
                    low=0.0, high=1.0,
                    size=(self.dim_process,)
                )
            )
            self.alpha = numpy.float32(
                numpy.random.uniform(
                    low=0.0, high=1.0,
                    size=(self.dim_process, self.dim_process)
                )
            )
            self.delta = numpy.float32(
                numpy.random.uniform(
                    low=10.0, high=20.0,
                    size=(self.dim_process, self.dim_process)
                )
            )
        else:
            path_pre_train = os.path.abspath(
                settings['path_pre_train']
            )
            with open(path_pre_train, 'rb') as f:
                model_pre_train = pickle.load(f)
            self.dim_process = model_pre_train['dim_process']
            self.mu = model_pre_train['mu']
            self.alpha = model_pre_train['alpha']
            self.delta = model_pre_train['delta']
        #self.intensity = numpy.copy(self.mu)
        self.name = 'HawkesGen'
        #
        self.intensity = numpy.copy(self.mu)
        self.one_seq = []
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        print "done "
        #
    #
    def set_params(self):
        print "set the params for missing data experiments ... "
        self.dim_process = numpy.int32(4)
        self.mu = numpy.float32(
            numpy.ones((self.dim_process, ))
        )
        self.alpha = numpy.float32(
            numpy.array(
                [
                    [0.0, 1.0, 0.0, 0.0],
                    [0.0, 0.0, 1.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0]
                ]
            )
        )
        self.delta = numpy.float32(
            numpy.array(
                [
                    [1.0, 1.0, 1.0, 1.0],
                    [1.0, 1.0, 1.0, 1.0],
                    [1.0, 1.0, 1.0, 1.0],
                    [1.0, 1.0, 1.0, 1.0]
                ]
            )
        )
    #
    def set_args(self, dict_args):
        self.args = dict_args
    #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'mu': numpy.copy(self.mu),
            'alpha': numpy.copy(self.alpha),
            'delta': numpy.copy(self.delta),
            'dim_process': self.dim_process,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity = numpy.copy(self.mu)
        self.one_seq = []
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        # initiliaze with mu
        # we do not neet to check
        # if time_current exceeds the sequence
        # since it is automatically garanteed
        self.intensity = numpy.copy(self.mu)
        for event in self.one_seq:
            time_since_start = event['time_since_start']
            #if time_current > time_since_start:
                # if this event is counted as * past event *
            type_event = event['type_event']
            change_time = time_current - time_since_start
            decay_frac = numpy.exp(
                -self.delta[:, type_event] * change_time
            )
            # self.intensity += self.alpha[:, idx_to_occur]
            self.intensity += numpy.copy(
                self.alpha[:, type_event] * decay_frac
            )
        # intensity computation is finished
    #
    #
    #
    # we can try using another method to sample data, which is quicker
    # we can first sample a point with rate \sum \lambda
    # and then sample the type based on the \lambda_k
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        self.compute_intensity_given_past(time_current)
        intensity_hazard = numpy.copy(
            self.intensity[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += E / intensity_hazard
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
            # this snippet below is for adaptive thining
            # it can speed things up
            # by decreasing upper bound
            # but it is closed when data is randomly generated at the beginning of this project
            intensity_hazard = numpy.copy(
                self.intensity[type_event]
            )
        #
        return time_current
        #
    #
    #
    def sample_time_for_all_type(self):
        # type_event is the type of event for which we want to sample the time
        # it is k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        self.compute_intensity_given_past(time_current)
        intensity_hazard = numpy.sum(self.intensity)
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += E / intensity_hazard
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / numpy.sum(self.intensity)
            # this snippet below is for adaptive thining
            # it can speed things up
            # by decreasing upper bound
            # but it is toggled off when data is randomly generated at the beginning of this project
            intensity_hazard = numpy.sum(self.intensity)
        #
        return time_current
        #
    #
    #
    def sample_one_event_sep(self):
        time_of_happen = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        for type_event in range(self.dim_process):
            # sample one event using "thinning algorithm"
            time_of_happen[type_event] = numpy.copy(
                self.sample_time_given_type(
                    type_event
                )
            )
        #
        time_since_start_new = numpy.min(time_of_happen)
        type_event_new = numpy.argmin(time_of_happen)
        return time_since_start_new, type_event_new
    #
    #
    def sample_one_event_tog(self):
        time_since_start_new = self.sample_time_for_all_type()
        self.compute_intensity_given_past(
            time_since_start_new
        )
        prob = self.intensity / numpy.sum(self.intensity)
        type_event_new = numpy.random.choice(
            range(self.dim_process), p = prob
        )
        return time_since_start_new, numpy.int32(type_event_new)
    #
    #
    def sample_one_event(self):
        if self.sum_for_time:
            return self.sample_one_event_tog()
        else:
            return self.sample_one_event_sep()
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        #Liiniger (2009), p. 28, describes a "thinning algorithm":
        #generate one event of each type, take the minimum,
        #and discard the others.
        #Details found in my paper write-up
        #
        #max_len is a pre-sampled value to set the length of seq
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_since_start_new, type_event_new = self.sample_one_event()
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        #
        #
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events = []
            list_time = []
            list_dtime = []
            list_items = []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
                list_dtime.append(
                    round(event_item['time_since_last_event'], 4)
                )
                list_items.append(
                    (
                        event_item['type_event'],
                        round(
                            event_item['time_since_last_event'], 4
                        )
                    )
                )
            print "the events, time and diff time for : ", idx_seq
            print list_events
            print list_time
            print list_dtime
            print "the list of items is : "
            print list_items
    #
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)


class HawkesInhibGen(object):
    '''
    here is the sequence generator using Hawkes process with inhibition
    '''
    def __init__(self, settings):
        '''
        we follow the definition of multivariate Hawkes process
        mu is the base intensity and
        alpha is the effect matrix and
        delta is the decay matrix
        we randomly sample mu, alpha, delta
        '''
        print "initializing ... "
        self.args = settings['args']
        self.sum_for_time = settings['sum_for_time']
        numpy.random.seed(
            settings['seed_random']
        )
        if settings['path_pre_train'] == None:
            self.dim_process = settings['dim_process']
            self.mu = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (self.dim_process,)
                )
            )
            self.alpha = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (self.dim_process, self.dim_process)
                )
            )
            self.delta = numpy.float32(
                numpy.random.uniform(
                    low=10.0, high=20.0,
                    size=(self.dim_process, self.dim_process)
                )
            )
        else:
            path_pre_train = os.path.abspath(
                settings['path_pre_train']
            )
            with open(path_pre_train, 'rb') as f:
                model_pre_train = pickle.load(f)
            self.dim_process = model_pre_train['dim_process']
            self.mu = model_pre_train['mu']
            self.alpha = model_pre_train['alpha']
            self.delta = model_pre_train['delta']
        #self.intensity = numpy.copy(self.mu)
        self.name = 'HawkesInhibGen'
        #
        self.intensity_tilde = numpy.copy(self.mu)
        self.intensity = numpy.log(
            numpy.float32(1.0) + numpy.exp(
                self.intensity_tilde
            )
        )
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        print "done "
        #
        #
    #
    #
    def set_args(self, dict_args):
        self.args = dict_args
    #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'mu': numpy.copy(self.mu),
            'alpha': numpy.copy(self.alpha),
            'delta': numpy.copy(self.delta),
            'dim_process': self.dim_process,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = numpy.copy(self.mu)
        self.intensity = self.soft_relu(self.intensity_tilde)
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        # initiliaze with mu
        self.intensity_tilde = numpy.copy(self.mu)
        for event in self.one_seq:
            time_since_start = event['time_since_start']
            #if time_current > time_since_start:
                # if this event is counted as * past event *
            type_event = event['type_event']
            change_time = time_current - time_since_start
            decay_frac = numpy.exp(
                -self.delta[:, type_event] * change_time
            )
            # self.intensity += self.alpha[:, idx_to_occur]
            self.intensity_tilde += numpy.copy(
                self.alpha[:, type_event] * decay_frac
            )
        self.intensity = self.soft_relu(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        self.intensity_tilde_ub = numpy.copy(
            self.mu
        )
        # to speed up, this mu is not taken relu
        # but it is still a upper bound
        #self.hard_relu(
        #    self.mu
        #)
        for event in self.one_seq:
            time_since_start = event['time_since_start']
            #if time_current > time_since_start:
            type_event = event['type_event']
            change_time = time_current - time_since_start
            decay_frac = numpy.exp(
                -self.delta[:, type_event] * change_time
            )
            self.intensity_tilde_ub += numpy.copy(
                self.hard_relu(
                    self.alpha[:, type_event]
                ) * decay_frac
            )
        self.intensity_ub = self.soft_relu(
            self.intensity_tilde_ub
        )
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        #
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += ( E / intensity_hazard )
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
            # for adaptive thinning,
            # decrease the upper bound
            # this is not used at the beginning of the project
            # it is only used for sampling given pre-trained models
            self.compute_intensity_upper_bound(time_current)
            intensity_hazard = numpy.copy(
                self.intensity_ub[type_event]
            )
        #
        return time_current
    #
    #
    def sample_time_for_all_type(self):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        intensity_hazard = numpy.sum(self.intensity_ub)
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += ( E / intensity_hazard )
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / numpy.sum(self.intensity)
            # for adaptive thinning,
            # decrease the upper bound
            # this is not used at the beginning of the project
            # it is only used for sampling given pre-trained models
            '''
            self.compute_intensity_upper_bound(time_current)
            intensity_hazard = numpy.sum(self.intensity_ub)
            '''
        return time_current
    #
    #
    def sample_one_event_sep(self):
        time_of_happen = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        for type_event in range(self.dim_process):
            # sample one event using "thinning algorithm"
            time_of_happen[type_event] = numpy.copy(
                self.sample_time_given_type(
                    type_event
                )
            )
        #
        time_since_start_new = numpy.min(time_of_happen)
        type_event_new = numpy.argmin(time_of_happen)
        return time_since_start_new, type_event_new
    #
    #
    def sample_one_event_tog(self):
        time_since_start_new = self.sample_time_for_all_type()
        self.compute_intensity_given_past(
            time_since_start_new
        )
        prob = self.intensity / numpy.sum(self.intensity)
        type_event_new = numpy.random.choice(
            range(self.dim_process), p = prob
        )
        return time_since_start_new, numpy.int32(type_event_new)
    #
    #
    def sample_one_event(self):
        if self.sum_for_time:
            return self.sample_one_event_tog()
        else:
            return self.sample_one_event_sep()
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        #
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_since_start_new, type_event_new = self.sample_one_event()
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events = []
            list_time = []
            list_dtime = []
            list_items = []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
                list_dtime.append(
                    round(event_item['time_since_last_event'], 4)
                )
                list_items.append(
                    (
                        event_item['type_event'],
                        round(
                            event_item['time_since_last_event'], 4
                        )
                    )
                )
            print "the events, time and diff time for : ", idx_seq
            print list_events
            print list_time
            print list_dtime
            print "the list of items is : "
            print list_items
    #
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)


class NeuralHawkesCTLSTM(object):
    '''
    here is the sequence generator
    using Neural Hawkes process with continuous-time LSTM
    '''
    def __init__(self, settings):
        #
        print "initializing generator ... "
        self.args = settings['args']
        self.sum_for_time = settings['sum_for_time']
        self.dim_float = numpy.int32(32)
        if settings['path_pre_train'] == None:
            print "random parameters ... "
            self.dim_process = settings['dim_process']
            self.dim_model = settings['dim_LSTM']
            self.dim_time = self.dim_float
            #
            numpy.random.seed(
                settings['seed_random']
            )
            #
            #self.scale = numpy.float32(
            #    numpy.random.uniform(
            #        low = 1e-3, high = 2.0,
            #        size = (self.dim_process, )
            #    )
            #)
            self.scale = numpy.float32(
                numpy.ones( (self.dim_process, ) )
            )
            #
            self.W_alpha = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (self.dim_model, self.dim_process)
                )
            )
            self.Emb_event = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_process + numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.W_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        2 * self.dim_model,
                        7 * self.dim_model
                    )
                )
            )
            self.b_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (7 * self.dim_model, )
                )
            )
        else:
            print "read pretrained model ... "
            path_pre_train = os.path.abspath(
                settings['path_pre_train']
            )
            with open(path_pre_train, 'rb') as f:
                model_pre_train = pickle.load(f)
            self.dim_process = model_pre_train['dim_process']
            self.dim_model = model_pre_train['dim_model']
            self.dim_time = model_pre_train['dim_time']
            #
            self.scale = model_pre_train['scale']
            self.W_alpha = model_pre_train['W_alpha']
            self.Emb_event = model_pre_train['Emb_event']
            self.W_recur = model_pre_train['W_recur']
            self.b_recur = model_pre_train['b_recur']
            #
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'NeuralHawkesGenCTLSTM'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        #self.hidden_t = numpy.zeros(
        #    (self.dim_model, ), dtype = dtype
        #)
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_target = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_decay = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.gate_output = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq) )
        print "initialization done "
        #
    #
    def set_args(self, dict_args):
        self.args = dict_args
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def soft_relu_scale(self, x):
        # last dim of x is dim_process
        x /= self.scale
        y = numpy.log(numpy.float32(1.0)+numpy.exp(x))
        y *= self.scale
        return y
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'scale': numpy.copy(self.scale),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        #self.hidden_t = numpy.zeros(
        #    (self.dim_model, ), dtype = dtype
        #)
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_target = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_decay = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.gate_output = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq) )
        #
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        # Note : for this event, we get its type
        # and time elapsed since last event
        # that is to say, this func is different than
        # rnn_unit in models
        # THERE : event, time_since_this_event_to_next
        # so first update, and then decay
        # HERE : time_since_last_event, event
        # so first decay, and then update
        # Note : this should be called
        # after one event is generated and appended
        # so the state is updated accordingly
        #TODO: decay
        cell_t_after_decay = self.cell_target + (
            self.cell_t - self.cell_target
        ) * numpy.exp(
            -self.cell_decay * self.one_seq[-1][
                'time_since_last_event'
            ]
        )
        hidden_t_after_decay = self.gate_output * numpy.tanh(
            cell_t_after_decay
        )
        #TODO: update
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, hidden_t_after_decay),
                axis = 0
            ), self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:4*self.dim_model]
        )
        # 2 -- input_bar and forget_bar gates
        gate_input_target = self.sigmoid(
            post_transform[4*self.dim_model:5*self.dim_model]
        )
        gate_forget_target = self.sigmoid(
            post_transform[5*self.dim_model:6*self.dim_model]
        )
        # cell memory decay
        cell_decay = self.soft_relu(
            post_transform[6*self.dim_model:]
        )
        #
        cell_t = gate_forget * cell_t_after_decay + gate_input * gate_pre_c
        cell_target = gate_forget_target * self.cell_target + gate_input_target * gate_pre_c
        #
        self.cell_t = numpy.copy(cell_t)
        self.cell_target = numpy.copy(cell_target)
        self.cell_decay = numpy.copy(cell_decay)
        self.gate_output = numpy.copy(gate_output)
        #
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        time_recent = self.one_seq[-1]['time_since_start']
        #
        cell_t_after_decay = self.cell_target + (
            self.cell_t - self.cell_target
        ) * numpy.exp(
            -self.cell_decay * (
                time_current - time_recent
            )
        )
        hidden_t_after_decay = self.gate_output * numpy.tanh(
            cell_t_after_decay
        )
        #
        self.intensity_tilde = numpy.dot(
            hidden_t_after_decay, self.W_alpha
        )
        self.intensity = self.soft_relu_scale(
            self.intensity_tilde
        )
        # intensity computation is finished
        #
    #
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        # Note : this is very tricky !!!
        # in decomposable process, finding upper bound is easy
        # see B.3 in NIPS paper
        # but in neural model
        # it is not a combo of POSITIVE decreasing funcs
        # So how to do this?
        # we find the functon is a sum of temrs
        # some terms are decreasing, we keep them
        # some terms are increasing, we get their upper-limit
        #
        # In detail, we compose it to 4 parts :
        # (dc = c-c_target)
        # w + dc -  increasing
        # w + dc +  decreasing
        # w - dc -  decreasing
        # w - dc +  increasing
        #
        time_recent = self.one_seq[-1]['time_since_start']
        #
        cell_gap = self.cell_t - self.cell_target
        cell_gap_matrix = numpy.outer(
            cell_gap, numpy.ones(
                (self.dim_process, ), dtype=dtype
            )
        )
        # dim * dim_process
        index_increasing_0 = (cell_gap_matrix > 0.0) & (self.W_alpha < 0.0)
        index_increasing_1 = (cell_gap_matrix < 0.0) & (self.W_alpha > 0.0)
        #
        cell_gap_matrix[
            index_increasing_0
        ] = numpy.float32(0.0)
        cell_gap_matrix[
            index_increasing_1
        ] = numpy.float32(0.0)
        #
        cell_t_after_decay = numpy.outer(
            self.cell_target, numpy.ones(
                (self.dim_process, ), dtype=dtype
            )
        ) + cell_gap_matrix * numpy.exp(
            -numpy.outer(
                self.cell_decay, numpy.ones(
                    (self.dim_process, ), dtype=dtype
                )
            ) * (
                time_current - time_recent
            )
        )
        hidden_t_after_decay = numpy.outer(
            self.gate_output, numpy.ones(
                (self.dim_process, ), dtype=dtype
            )
        ) * numpy.tanh(cell_t_after_decay)
        #
        self.intensity_tilde_ub = numpy.sum(
            hidden_t_after_decay * self.W_alpha,
            axis=0
        )
        self.intensity_ub = self.soft_relu_scale(
            self.intensity_tilde_ub
        )
        #
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            #print "type is : ", type_event
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            #print "E U time_current : "
            #print E, U, time_current
            #print "intensity hazard is : "
            #print intensity_hazard
            time_current += (E / intensity_hazard)
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
            #print "new time_current and u : "
            #print time_current, u
            #print "intensity and upper bound is : "
            #print self.intensity
            #print self.intensity_ub
            # use adaptive thinning algorithm
            # that is, decreasing the upper bound
            # to make the sampling quicker
            # use adaptive method by
            # toggling on the following block
            '''
            self.compute_intensity_upper_bound(
                time_current
            )
            intensity_hazard = numpy.copy(
                self.intensity_ub[type_event]
            )
            '''
        return time_current
        #
    #
    #
    def sample_time_for_all_type(self):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        intensity_hazard = numpy.sum(self.intensity_ub)
        #
        u = 1.5
        while u >= 1.0:
            #print "type is : ", type_event
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            #print "E U time_current : "
            #print E, U, time_current
            #print "intensity hazard is : "
            #print intensity_hazard
            time_current += (E / intensity_hazard)
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / numpy.sum(self.intensity)
            #print "new time_current and u : "
            #print time_current, u
            #print "intensity and upper bound is : "
            #print self.intensity
            #print self.intensity_ub
            # use adaptive thinning algorithm
            # that is, decreasing the upper bound
            # to make the sampling quicker
            # use adaptive method by
            # toggling on the following block
            '''
            self.compute_intensity_upper_bound(
                time_current
            )
            intensity_hazard = numpy.sum(self.intensity_ub)
            '''
        return time_current
        #
    #
    #
    def sample_one_event_sep(self):
        time_of_happen = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        for type_event in range(self.dim_process):
            # sample one event using "thinning algorithm"
            time_of_happen[type_event] = numpy.copy(
                self.sample_time_given_type(
                    type_event
                )
            )
        #
        time_since_start_new = numpy.min(time_of_happen)
        type_event_new = numpy.argmin(time_of_happen)
        return time_since_start_new, type_event_new
    #
    #
    def sample_one_event_tog(self):
        time_since_start_new = self.sample_time_for_all_type()
        self.compute_intensity_given_past(
            time_since_start_new
        )
        prob = self.intensity / numpy.sum(self.intensity)
        type_event_new = numpy.random.choice(
            range(self.dim_process), p = prob
        )
        return time_since_start_new, numpy.int32(type_event_new)
    #
    #
    def sample_one_event(self):
        if self.sum_for_time:
            return self.sample_one_event_tog()
        else:
            return self.sample_one_event_sep()
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in NIPS 17 Appendix
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            time_since_start_new, type_event_new = self.sample_one_event()
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #print settings
        print "generating sequences ... "
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events = []
            list_time = []
            list_dtime = []
            list_items = []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
                list_dtime.append(
                    round(event_item['time_since_last_event'], 4)
                )
                list_items.append(
                    (
                        event_item['type_event'],
                        round(
                            event_item['time_since_last_event'], 4
                        )
                    )
                )
            print "the events, time and diff time for : ", idx_seq
            print list_events
            print list_time
            print list_dtime
            print "the list of items is : "
            print list_items
    #
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)
#
#
#
#
#
#
# deprecated generators
# TODO: modules below are deprecated
# they are models that we tried over this project
# most of them work, better than Hawkes baseline
# but still lose to our neural Hawkes with continuous-time LSTM
# most of them keep the decomposable structure of Hawkes
# and try to use neural networks to parametrize it
#
#

class NeuralHawkesGen(object):
    '''
    here is the sequence generator using Neural Hawkes process
    '''
    def __init__(self, settings):
        #
        self.dim_process = settings['dim_process']
        self.dim_model = settings['dim_LSTM']
        #
        self.dim_float = numpy.int32(32)
        self.dim_time = self.dim_float
        #
        self.args = settings['args']
        numpy.random.seed(
            settings['seed_random']
        )
        self.mu = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (self.dim_process,)
            )
        )
        #
        self.delta = numpy.float32(
            numpy.random.uniform(
                low=10.0, high=20.0,
                size=(self.dim_model, self.dim_process)
            )
        )
        #
        self.W_alpha = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (self.dim_model, self.dim_process)
            )
        )
        self.Emb_event = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    self.dim_process + numpy.int32(1),
                    self.dim_model
                )
            )
        )
        self.Emb_time = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    self.dim_time, self.dim_model
                )
            )
        )
        self.W_recur = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    3 * self.dim_model,
                    4 * self.dim_model
                )
            )
        )
        self.b_recur = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (4*self.dim_model, )
            )
        )
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'NeuralHawkesGen'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'mu': numpy.copy(self.mu),
            'delta': numpy.copy(self.delta),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'Emb_time': numpy.copy(self.Emb_time),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    #
    def float32_to_bit(self, float_input):
        '''
        input a number in float, convert it to float32
        get its 32-bit representations
        '''
        float32_input = numpy.float32(float_input)
        str_input = ''.join(bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', float32_input))
        bit_input = numpy.zeros(
            (self.dim_float,), dtype=dtype
        )
        assert(self.dim_float == len(str_input))
        for idx, item_in_input in enumerate(str_input):
            bit_input[idx] = numpy.float32(item_in_input)
        return numpy.copy(bit_input)
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        emb_time_t = numpy.dot(
            self.float32_to_bit(
                self.one_seq[-1]['time_since_last_event']
            ),
            self.Emb_time
        )
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, emb_time_t, self.hidden_t),
                axis = 0
            ),
            self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:]
        )
        #
        cell_t_new = gate_forget * self.cell_t + gate_input * gate_pre_c
        hidden_t_new = gate_output * numpy.tanh(cell_t_new)
        self.hidden_t = numpy.copy(hidden_t_new)
        self.cell_t = numpy.copy(cell_t_new)
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        #
        time_recent = self.one_seq[-1]['time_since_start']
        #
        hidden_with_time = numpy.exp(
            -self.delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde = numpy.sum(
            self.W_alpha * hidden_with_time,
            axis = 0
        ) + self.mu
        #
        self.intensity = self.soft_relu(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        time_recent = self.one_seq[-1]['time_since_start']
        #
        hidden_with_time = numpy.exp(
            -self.delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde_ub = numpy.sum(
            self.hard_relu(
                self.W_alpha * hidden_with_time
            ),
            axis = 0
        ) + self.hard_relu(self.mu)
        #
        self.intensity_ub = self.soft_relu(
            self.intensity_tilde_ub
        )
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        #
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += E / intensity_hazard
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
        #
        return time_current
        #
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        #
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_of_happen = numpy.zeros(
                (self.dim_process,), dtype=dtype
            )
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            for type_event in range(self.dim_process):
                # sample one event using "thinning algorithm"
                time_of_happen[type_event] = numpy.copy(
                    self.sample_time_given_type(
                        type_event
                    )
                )
            #
            time_since_start_new = numpy.min(time_of_happen)
            type_event_new = numpy.argmin(time_of_happen)
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events, list_time = [], []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
            print list_events
            print list_time
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)


class GeneralizedNeuralHawkesGen(object):
    '''
    here is the sequence generator using Neural Hawkes process
    '''
    def __init__(self, settings):
        #
        self.dim_process = settings['dim_process']
        self.dim_model = settings['dim_LSTM']
        #
        self.dim_float = numpy.int32(32)
        self.dim_time = self.dim_float
        #
        self.args = settings['args']
        numpy.random.seed(
            settings['seed_random']
        )
        self.mu = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (self.dim_process,)
            )
        )
        #
        self.W_delta = numpy.float32(
            numpy.random.uniform(
                low = -1.0, high= 1.0,
                size=(
                    self.dim_model, self.dim_model,
                    self.dim_process
                )
            )
        )
        #
        self.W_alpha = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (self.dim_model, self.dim_process)
            )
        )
        self.Emb_event = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    self.dim_process + numpy.int32(1),
                    self.dim_model
                )
            )
        )
        self.Emb_time = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    self.dim_time, self.dim_model
                )
            )
        )
        self.W_recur = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    3 * self.dim_model,
                    4 * self.dim_model
                )
            )
        )
        self.b_recur = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (4*self.dim_model, )
            )
        )
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'GeneralizedNeuralHawkesGen'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'mu': numpy.copy(self.mu),
            'W_delta': numpy.copy(self.W_delta),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'Emb_time': numpy.copy(self.Emb_time),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    #
    def float32_to_bit(self, float_input):
        '''
        input a number in float, convert it to float32
        get its 32-bit representations
        '''
        float32_input = numpy.float32(float_input)
        str_input = ''.join(bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', float32_input))
        bit_input = numpy.zeros(
            (self.dim_float,), dtype=dtype
        )
        assert(self.dim_float == len(str_input))
        for idx, item_in_input in enumerate(str_input):
            bit_input[idx] = numpy.float32(item_in_input)
        return numpy.copy(bit_input)
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        emb_time_t = numpy.dot(
            self.float32_to_bit(
                self.one_seq[-1]['time_since_last_event']
            ),
            self.Emb_time
        )
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, emb_time_t, self.hidden_t),
                axis = 0
            ),
            self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:]
        )
        #
        cell_t_new = gate_forget * self.cell_t + gate_input * gate_pre_c
        hidden_t_new = gate_output * numpy.tanh(cell_t_new)
        self.hidden_t = numpy.copy(hidden_t_new)
        self.cell_t = numpy.copy(cell_t_new)
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        #
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde = numpy.sum(
            self.W_alpha * hidden_with_time,
            axis = 0
        ) + self.mu
        #
        self.intensity = self.soft_relu(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde_ub = numpy.sum(
            self.hard_relu(
                self.W_alpha * hidden_with_time
            ),
            axis = 0
        ) + self.hard_relu(self.mu)
        #
        self.intensity_ub = self.soft_relu(
            self.intensity_tilde_ub
        )
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        #
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += E / intensity_hazard
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
        #
        return time_current
        #
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        #
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_of_happen = numpy.zeros(
                (self.dim_process,), dtype=dtype
            )
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            for type_event in range(self.dim_process):
                # sample one event using "thinning algorithm"
                time_of_happen[type_event] = numpy.copy(
                    self.sample_time_given_type(
                        type_event
                    )
                )
            #
            time_since_start_new = numpy.min(time_of_happen)
            type_event_new = numpy.argmin(time_of_happen)
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events, list_time = [], []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
            print list_events
            print list_time
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)


class NeuralHawkesAdaptiveBaseGen(object):
    '''
    here is the sequence generator using Neural Hawkes process
    '''
    def __init__(self, settings):
        #
        self.dim_process = settings['dim_process']
        self.dim_model = settings['dim_LSTM']
        #
        self.dim_float = numpy.int32(32)
        self.dim_time = self.dim_float
        #
        self.args = settings['args']
        numpy.random.seed(
            settings['seed_random']
        )
        self.W_mu = numpy.float32(
            numpy.random.uniform(
                low = -1.0, high = 1.0,
                size = (
                    self.dim_model, self.dim_process
                )
            )
        )
        #
        self.W_delta = numpy.float32(
            numpy.random.uniform(
                low = -1.0, high = 1.0,
                size=(
                    self.dim_model, self.dim_model,
                    self.dim_process
                )
            )
        )
        #
        self.W_alpha = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (self.dim_model, self.dim_process)
            )
        )
        self.Emb_event = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    self.dim_process + numpy.int32(1),
                    self.dim_model
                )
            )
        )
        self.Emb_time = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    self.dim_time, self.dim_model
                )
            )
        )
        self.W_recur = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (
                    3 * self.dim_model,
                    4 * self.dim_model
                )
            )
        )
        self.b_recur = numpy.float32(
            numpy.random.uniform(
                low = -1.0,
                high = 1.0,
                size = (4*self.dim_model, )
            )
        )
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'AdaptiveNeuralHawkesGen'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'W_mu': numpy.copy(self.W_mu),
            'W_delta': numpy.copy(self.W_delta),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'Emb_time': numpy.copy(self.Emb_time),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    #
    def float32_to_bit(self, float_input):
        '''
        input a number in float, convert it to float32
        get its 32-bit representations
        '''
        float32_input = numpy.float32(float_input)
        str_input = ''.join(bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', float32_input))
        bit_input = numpy.zeros(
            (self.dim_float,), dtype=dtype
        )
        assert(self.dim_float == len(str_input))
        for idx, item_in_input in enumerate(str_input):
            bit_input[idx] = numpy.float32(item_in_input)
        return numpy.copy(bit_input)
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        emb_time_t = numpy.dot(
            self.float32_to_bit(
                self.one_seq[-1]['time_since_last_event']
            ),
            self.Emb_time
        )
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, emb_time_t, self.hidden_t),
                axis = 0
            ),
            self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:]
        )
        #
        cell_t_new = gate_forget * self.cell_t + gate_input * gate_pre_c
        hidden_t_new = gate_output * numpy.tanh(cell_t_new)
        self.hidden_t = numpy.copy(hidden_t_new)
        self.cell_t = numpy.copy(cell_t_new)
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        #
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde = numpy.sum(
            self.W_alpha * hidden_with_time,
            axis = 0
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        #
        self.intensity = self.soft_relu(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde_ub = numpy.sum(
            self.hard_relu(
                self.W_alpha * hidden_with_time
            ),
            axis = 0
        ) + self.hard_relu(
            numpy.dot(
                self.hidden_t, self.W_mu
            )
        )
        #
        self.intensity_ub = self.soft_relu(
            self.intensity_tilde_ub
        )
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        #
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            time_current += E / intensity_hazard
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
        #
        return time_current
        #
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        #
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_of_happen = numpy.zeros(
                (self.dim_process,), dtype=dtype
            )
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            for type_event in range(self.dim_process):
                # sample one event using "thinning algorithm"
                time_of_happen[type_event] = numpy.copy(
                    self.sample_time_given_type(
                        type_event
                    )
                )
            #
            time_since_start_new = numpy.min(time_of_happen)
            type_event_new = numpy.argmin(time_of_happen)
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events, list_time = [], []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
            print list_events
            print list_time
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)


class NeuralHawkesAdaptiveBaseGen_time(object):
    '''
    here is the sequence generator using Neural Hawkes process
    '''
    def __init__(self, settings):
        #
        print "initializing generator ... "
        self.args = settings['args']
        self.dim_float = numpy.int32(32)
        if settings['path_pre_train'] == None:
            print "random parameters ... "
            self.dim_process = settings['dim_process']
            self.dim_model = settings['dim_LSTM']
            #
            self.dim_time = self.dim_float
            numpy.random.seed(
                settings['seed_random']
            )
            self.W_mu = numpy.float32(
                numpy.random.uniform(
                    low = -1.0, high = 1.0,
                    size = (
                        self.dim_model, self.dim_process
                    )
                )
            )
            #
            self.W_delta = numpy.float32(
                numpy.random.uniform(
                    low = -1.0, high = 1.0,
                    size=(
                        self.dim_model, self.dim_model,
                        self.dim_process
                    )
                )
            )
            #
            self.W_alpha = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (self.dim_model, self.dim_process)
                )
            )
            self.Emb_event = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_process + numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.Emb_time = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_time+numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.Threshold_time = numpy.float32(
                numpy.random.uniform(
                    low = 0.0, high = 1.0,
                    size = (self.dim_time, )
                )
            )
            self.W_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        3 * self.dim_model,
                        4 * self.dim_model
                    )
                )
            )
            self.b_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (4*self.dim_model, )
                )
            )
        else:
            print "read pretrained model ... "
            path_pre_train = os.path.abspath(
                settings['path_pre_train']
            )
            with open(path_pre_train, 'rb') as f:
                model_pre_train = pickle.load(f)
            self.dim_process = model_pre_train['dim_process']
            self.dim_model = model_pre_train['dim_model']
            self.dim_time = model_pre_train['dim_time']
            #
            self.W_mu = model_pre_train['W_mu']
            self.W_delta = model_pre_train['W_delta']
            self.W_alpha = model_pre_train['W_alpha']
            self.Emb_event = model_pre_train['Emb_event']
            self.Emb_time = model_pre_train['Emb_time']
            self.Threshold_time = model_pre_train['Threshold_time']
            self.W_recur = model_pre_train['W_recur']
            self.b_recur = model_pre_train['b_recur']
            #
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'AdaptiveNeuralHawkesGen_time'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        print "initialization done "
        #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'W_mu': numpy.copy(self.W_mu),
            'W_delta': numpy.copy(self.W_delta),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'Emb_time': numpy.copy(self.Emb_time),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    #
    def float32_to_bit(self, float_input):
        '''
        input a number in float, convert it to float32
        get its 32-bit representations
        '''
        float32_input = numpy.float32(float_input)
        str_input = ''.join(bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', float32_input))
        bit_input = numpy.zeros(
            (self.dim_float,), dtype=dtype
        )
        assert(self.dim_float == len(str_input))
        for idx, item_in_input in enumerate(str_input):
            bit_input[idx] = numpy.float32(item_in_input)
        return numpy.copy(bit_input)
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        #
        time_rep_t = self.hard_relu(
            self.one_seq[-1]['time_since_last_event'] - self.Threshold_time
        )
        time_rep_t = numpy.concatenate(
            (
                time_rep_t,
                self.one_seq[-1][
                    'time_since_last_event'
                ][None]
            ), axis = 0
        )
        emb_time_t = numpy.dot(
            time_rep_t, self.Emb_time
        )
        #
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, emb_time_t, self.hidden_t),
                axis = 0
            ),
            self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:]
        )
        #
        cell_t_new = gate_forget * self.cell_t + gate_input * gate_pre_c
        hidden_t_new = gate_output * numpy.tanh(cell_t_new)
        self.hidden_t = numpy.copy(hidden_t_new)
        self.cell_t = numpy.copy(cell_t_new)
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        #
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde = numpy.sum(
            self.W_alpha * hidden_with_time,
            axis = 0
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        #
        self.intensity = self.soft_relu(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde_ub = numpy.sum(
            self.hard_relu(
                self.W_alpha * hidden_with_time
            ),
            axis = 0
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        # this part is time-invariant so
        # we do not need to take its hard_relu
        #self.hard_relu(
        #    numpy.dot(
        #        self.hidden_t, self.W_mu
        #    )
        #)
        #
        self.intensity_ub = self.soft_relu(
            self.intensity_tilde_ub
        )
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        #
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            #print "type is : ", type_event
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            #print "E U time_current : "
            #print E, U, time_current
            #print "intensity hazard is : "
            #print intensity_hazard
            time_current += (E / intensity_hazard)
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
            #print "new time_current and u : "
            #print time_current, u
            #print "intensity and upper bound is : "
            #print self.intensity
            #print self.intensity_ub
            # use adaptive thinning algorithm
            # that is, decreasing the upper bound
            # to make the sampling quicker
            self.compute_intensity_upper_bound(
                time_current
            )
            intensity_hazard = numpy.copy(
                self.intensity_ub[type_event]
            )
        #
        return time_current
        #
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        #
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_of_happen = numpy.zeros(
                (self.dim_process,), dtype=dtype
            )
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            for type_event in range(self.dim_process):
                # sample one event using "thinning algorithm"
                time_of_happen[type_event] = numpy.copy(
                    self.sample_time_given_type(
                        type_event
                    )
                )
            #
            time_since_start_new = numpy.min(time_of_happen)
            type_event_new = numpy.argmin(time_of_happen)
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        print "generating sequences ... "
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events = []
            list_time = []
            list_dtime = []
            list_items = []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
                list_dtime.append(
                    round(event_item['time_since_last_event'], 4)
                )
                list_items.append(
                    (
                        event_item['type_event'],
                        round(
                            event_item['time_since_last_event'], 4
                        )
                    )
                )
            print "the events, time and diff time for : ", idx_seq
            print list_events
            print list_time
            print list_dtime
            print "the list of items is : "
            print list_items
    #
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)



class NeuralHawkesAdaptiveBaseGen_time_scale(object):
    '''
    here is the sequence generator using Neural Hawkes process
    '''
    def __init__(self, settings):
        #
        print "initializing generator ... "
        self.args = settings['args']
        self.dim_float = numpy.int32(32)
        if settings['path_pre_train'] == None:
            print "random parameters ... "
            self.dim_process = settings['dim_process']
            self.dim_model = settings['dim_LSTM']
            self.dim_time = self.dim_float
            #
            numpy.random.seed(
                settings['seed_random']
            )
            #
            #self.scale = numpy.float32(
            #    numpy.random.uniform(
            #        low = 1e-3, high = 2.0,
            #        size = (self.dim_process, )
            #    )
            #)
            self.scale = numpy.float32(
                numpy.ones( (self.dim_process, ) )
            )
            #
            self.W_mu = numpy.float32(
                numpy.random.uniform(
                    low = -1.0, high = 1.0,
                    size = (
                        self.dim_model, self.dim_process
                    )
                )
            )
            #
            self.W_delta = numpy.float32(
                numpy.random.uniform(
                    low = -1.0, high = 1.0,
                    size=(
                        self.dim_model, self.dim_model,
                        self.dim_process
                    )
                )
            )
            #
            self.W_alpha = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (self.dim_model, self.dim_process)
                )
            )
            self.Emb_event = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_process + numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.Emb_time = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_time+numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.Threshold_time = numpy.float32(
                numpy.random.uniform(
                    low = 0.0, high = 1.0,
                    size = (self.dim_time, )
                )
            )
            self.W_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        3 * self.dim_model,
                        4 * self.dim_model
                    )
                )
            )
            self.b_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (4*self.dim_model, )
                )
            )
        else:
            print "read pretrained model ... "
            path_pre_train = os.path.abspath(
                settings['path_pre_train']
            )
            with open(path_pre_train, 'rb') as f:
                model_pre_train = pickle.load(f)
            self.dim_process = model_pre_train['dim_process']
            self.dim_model = model_pre_train['dim_model']
            self.dim_time = model_pre_train['dim_time']
            #
            self.scale = model_pre_train['scale']
            self.W_mu = model_pre_train['W_mu']
            self.W_delta = model_pre_train['W_delta']
            self.W_alpha = model_pre_train['W_alpha']
            self.Emb_event = model_pre_train['Emb_event']
            self.Emb_time = model_pre_train['Emb_time']
            self.Threshold_time = model_pre_train['Threshold_time']
            self.W_recur = model_pre_train['W_recur']
            self.b_recur = model_pre_train['b_recur']
            #
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'AdaptiveNeuralHawkesGen_time_scale'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        print "initialization done "
        #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def soft_relu_scale(self, x):
        # last dim of x is dim_process
        x /= self.scale
        y = numpy.log(numpy.float32(1.0)+numpy.exp(x))
        y *= self.scale
        return y
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'scale': numpy.copy(self.scale),
            'W_mu': numpy.copy(self.W_mu),
            'W_delta': numpy.copy(self.W_delta),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'Emb_time': numpy.copy(self.Emb_time),
            'Threshold_time': numpy.copy(self.Threshold_time),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    #
    def float32_to_bit(self, float_input):
        '''
        input a number in float, convert it to float32
        get its 32-bit representations
        '''
        float32_input = numpy.float32(float_input)
        str_input = ''.join(bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', float32_input))
        bit_input = numpy.zeros(
            (self.dim_float,), dtype=dtype
        )
        assert(self.dim_float == len(str_input))
        for idx, item_in_input in enumerate(str_input):
            bit_input[idx] = numpy.float32(item_in_input)
        return numpy.copy(bit_input)
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        #
        time_rep_t = self.hard_relu(
            self.one_seq[-1]['time_since_last_event'] - self.Threshold_time
        )
        time_rep_t = numpy.concatenate(
            (
                time_rep_t,
                self.one_seq[-1][
                    'time_since_last_event'
                ][None]
            ), axis = 0
        )
        emb_time_t = numpy.dot(
            time_rep_t, self.Emb_time
        )
        #
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, emb_time_t, self.hidden_t),
                axis = 0
            ),
            self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:]
        )
        #
        cell_t_new = gate_forget * self.cell_t + gate_input * gate_pre_c
        hidden_t_new = gate_output * numpy.tanh(cell_t_new)
        self.hidden_t = numpy.copy(hidden_t_new)
        self.cell_t = numpy.copy(cell_t_new)
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        #
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde = numpy.sum(
            self.W_alpha * hidden_with_time,
            axis = 0
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        #
        self.intensity = self.soft_relu_scale(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.tensordot(
                self.hidden_t, self.W_delta, (0, 0)
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t[:, None]
        # (self.dim_model, self.dim_process)
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde_ub = numpy.sum(
            self.hard_relu(
                self.W_alpha * hidden_with_time
            ),
            axis = 0
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        # this part is time-invariant so
        # we do not need to take its hard_relu
        #self.hard_relu(
        #    numpy.dot(
        #        self.hidden_t, self.W_mu
        #    )
        #)
        #
        self.intensity_ub = self.soft_relu_scale(
            self.intensity_tilde_ub
        )
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            #print "type is : ", type_event
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            #print "E U time_current : "
            #print E, U, time_current
            #print "intensity hazard is : "
            #print intensity_hazard
            time_current += (E / intensity_hazard)
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
            #print "new time_current and u : "
            #print time_current, u
            #print "intensity and upper bound is : "
            #print self.intensity
            #print self.intensity_ub
            # use adaptive thinning algorithm
            # that is, decreasing the upper bound
            # to make the sampling quicker
            # use adaptive method by
            # toggling on the following block
            '''
            self.compute_intensity_upper_bound(
                time_current
            )
            intensity_hazard = numpy.copy(
                self.intensity_ub[type_event]
            )
            '''
        return time_current
        #
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_of_happen = numpy.zeros(
                (self.dim_process,), dtype=dtype
            )
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            for type_event in range(self.dim_process):
                # sample one event using "thinning algorithm"
                time_of_happen[type_event] = numpy.copy(
                    self.sample_time_given_type(
                        type_event
                    )
                )
            #
            time_since_start_new = numpy.min(time_of_happen)
            type_event_new = numpy.argmin(time_of_happen)
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        print "generating sequences ... "
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events = []
            list_time = []
            list_dtime = []
            list_items = []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
                list_dtime.append(
                    round(event_item['time_since_last_event'], 4)
                )
                list_items.append(
                    (
                        event_item['type_event'],
                        round(
                            event_item['time_since_last_event'], 4
                        )
                    )
                )
            print "the events, time and diff time for : ", idx_seq
            print list_events
            print list_time
            print list_dtime
            print "the list of items is : "
            print list_items
    #
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)



class NeuralHawkesAdaptiveBaseGen_time_scale_reduce(
    object
):
    '''
    here is the sequence generator
    using Neural Hawkes process with reduced decay param
    '''
    def __init__(self, settings):
        #
        print "initializing generator ... "
        self.args = settings['args']
        self.dim_float = numpy.int32(32)
        if settings['path_pre_train'] == None:
            print "random parameters ... "
            self.dim_process = settings['dim_process']
            self.dim_model = settings['dim_LSTM']
            self.dim_time = self.dim_float
            #
            numpy.random.seed(
                settings['seed_random']
            )
            #
            #self.scale = numpy.float32(
            #    numpy.random.uniform(
            #        low = 1e-3, high = 2.0,
            #        size = (self.dim_process, )
            #    )
            #)
            self.scale = numpy.float32(
                numpy.ones( (self.dim_process, ) )
            )
            #
            self.W_mu = numpy.float32(
                numpy.random.uniform(
                    low = -1.0, high = 1.0,
                    size = (
                        self.dim_model, self.dim_process
                    )
                )
            )
            #
            self.W_delta = numpy.float32(
                numpy.random.uniform(
                    low = -1.0, high = 1.0,
                    size=(
                        self.dim_model,
                        self.dim_model
                    )
                )
            )
            #
            self.W_alpha = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (self.dim_model, self.dim_process)
                )
            )
            self.Emb_event = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_process + numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.Emb_time = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        self.dim_time+numpy.int32(1),
                        self.dim_model
                    )
                )
            )
            self.Threshold_time = numpy.float32(
                numpy.random.uniform(
                    low = 0.0, high = 1.0,
                    size = (self.dim_time, )
                )
            )
            self.W_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (
                        3 * self.dim_model,
                        4 * self.dim_model
                    )
                )
            )
            self.b_recur = numpy.float32(
                numpy.random.uniform(
                    low = -1.0,
                    high = 1.0,
                    size = (4*self.dim_model, )
                )
            )
        else:
            print "read pretrained model ... "
            path_pre_train = os.path.abspath(
                settings['path_pre_train']
            )
            with open(path_pre_train, 'rb') as f:
                model_pre_train = pickle.load(f)
            self.dim_process = model_pre_train['dim_process']
            self.dim_model = model_pre_train['dim_model']
            self.dim_time = model_pre_train['dim_time']
            #
            self.scale = model_pre_train['scale']
            self.W_mu = model_pre_train['W_mu']
            self.W_delta = model_pre_train['W_delta']
            self.W_alpha = model_pre_train['W_alpha']
            self.Emb_event = model_pre_train['Emb_event']
            self.Emb_time = model_pre_train['Emb_time']
            self.Threshold_time = model_pre_train['Threshold_time']
            self.W_recur = model_pre_train['W_recur']
            self.b_recur = model_pre_train['b_recur']
            #
        #
        #self.intensity = numpy.copy(self.mu)
        self.name = 'AdaptiveNeuralHawkesGen_time_scale_reduce'
        #
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        # initialization for LSTM states
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq) )
        print "initialization done "
        #
    #
    def soft_relu(self, x):
        return numpy.log(numpy.float32(1.0)+numpy.exp(x))
    #
    def soft_relu_scale(self, x):
        # last dim of x is dim_process
        x /= self.scale
        y = numpy.log(numpy.float32(1.0)+numpy.exp(x))
        y *= self.scale
        return y
    #
    def hard_relu(self, x):
        return numpy.float32(0.5) * (x + numpy.abs(x) )
        #
    #
    def save_model(self, file_save):
        print "saving model of generator ... "
        model_dict = {
            'scale': numpy.copy(self.scale),
            'W_mu': numpy.copy(self.W_mu),
            'W_delta': numpy.copy(self.W_delta),
            'W_alpha': numpy.copy(self.W_alpha),
            'Emb_event': numpy.copy(self.Emb_event),
            'Emb_time': numpy.copy(self.Emb_time),
            'Threshold_time': numpy.copy(self.Threshold_time),
            'W_recur': numpy.copy(self.W_recur),
            'b_recur': numpy.copy(self.b_recur),
            'dim_process': self.dim_process,
            'dim_model': self.dim_model,
            'dim_time': self.dim_time,
            'dim_float': self.dim_float,
            'name': self.name,
            'args': self.args
        }
        with open(file_save, 'wb') as f:
            pickle.dump(model_dict, f)
    #
    def restart_sequence(self):
        # clear the events memory and reset starting time is 0
        self.intensity_tilde = None
        self.intensity = None
        #
        self.intensity_tilde_ub = None
        self.intensity_ub = None
        #
        self.one_seq = []
        #
        self.one_seq.append(
            {
                'idx_event': numpy.int32(0),
                'type_event': self.dim_process,
                'time_since_start': numpy.float32(0.0),
                'time_since_last_event': numpy.float32(0.0),
                'time_since_last_same_event': numpy.float32(0.0)
            }
        )
        self.hidden_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        self.cell_t = numpy.zeros(
            (self.dim_model, ), dtype = dtype
        )
        #self.flag_continue = True
        self.cnt_total_event = numpy.int32(len(self.one_seq))
        #
    #
    #
    #
    def float32_to_bit(self, float_input):
        '''
        input a number in float, convert it to float32
        get its 32-bit representations
        '''
        float32_input = numpy.float32(float_input)
        str_input = ''.join(bin(ord(c)).replace('0b', '').rjust(8, '0') for c in struct.pack('!f', float32_input))
        bit_input = numpy.zeros(
            (self.dim_float,), dtype=dtype
        )
        assert(self.dim_float == len(str_input))
        for idx, item_in_input in enumerate(str_input):
            bit_input[idx] = numpy.float32(item_in_input)
        return numpy.copy(bit_input)
    #
    #
    def sigmoid(self, x):
        return 1 / (1+numpy.exp(-x))
    #
    #
    def compute_hidden_states(self):
        # every time it is called,
        # it computes the new hidden states of the LSTM
        # it gets the last event in the sequence
        # which is generated at t_(rec(t))
        # and compute its hidden states
        emb_event_t = self.Emb_event[
            self.one_seq[-1]['type_event'], :
        ]
        #
        time_rep_t = self.hard_relu(
            self.one_seq[-1]['time_since_last_event'] - self.Threshold_time
        )
        time_rep_t = numpy.concatenate(
            (
                time_rep_t,
                self.one_seq[-1][
                    'time_since_last_event'
                ][None]
            ), axis = 0
        )
        emb_time_t = numpy.dot(
            time_rep_t, self.Emb_time
        )
        #
        post_transform = numpy.dot(
            numpy.concatenate(
                (emb_event_t, emb_time_t, self.hidden_t),
                axis = 0
            ),
            self.W_recur
        ) + self.b_recur
        #
        gate_input = self.sigmoid(
            post_transform[:self.dim_model]
        )
        gate_forget = self.sigmoid(
            post_transform[self.dim_model:2*self.dim_model]
        )
        gate_output = self.sigmoid(
            post_transform[2*self.dim_model:3*self.dim_model]
        )
        gate_pre_c = numpy.tanh(
            post_transform[3*self.dim_model:]
        )
        #
        cell_t_new = gate_forget * self.cell_t + gate_input * gate_pre_c
        hidden_t_new = gate_output * numpy.tanh(cell_t_new)
        self.hidden_t = numpy.copy(hidden_t_new)
        self.cell_t = numpy.copy(cell_t_new)
        #
    #
    #
    def compute_intensity_given_past(self, time_current):
        # compute the intensity of current time
        # given the past events
        time_recent = self.one_seq[-1]['time_since_start']
        # W_delta : dim_model * dim_model
        delta = self.soft_relu(
            numpy.dot(
                self.hidden_t, self.W_delta
            )
        )
        # dim_model
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t
        # dim_model
        # self.W_alpha (self.dim_model, self.dim_process)
        self.intensity_tilde = numpy.dot(
            hidden_with_time, self.W_alpha
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        #
        self.intensity = self.soft_relu_scale(
            self.intensity_tilde
        )
        # intensity computation is finished
    #
    #
    def compute_intensity_upper_bound(self, time_current):
        # compute the upper bound of intensity
        # at the current time
        time_recent = self.one_seq[-1]['time_since_start']
        #
        delta = self.soft_relu(
            numpy.dot(
                self.hidden_t, self.W_delta
            )
        )
        #
        hidden_with_time = numpy.exp(
            -delta * (
                time_current - time_recent
            )
        ) * self.hidden_t
        # hidden_with_time : dim_model
        self.intensity_tilde_ub = numpy.sum(
            self.hard_relu(
                self.W_alpha * hidden_with_time[:, None]
            ),
            axis = 0
        ) + numpy.dot(
            self.hidden_t, self.W_mu
        )
        # this part is time-invariant so
        # we do not need to take its hard_relu
        #self.hard_relu(
        #    numpy.dot(
        #        self.hidden_t, self.W_mu
        #    )
        #)
        #
        self.intensity_ub = self.soft_relu_scale(
            self.intensity_tilde_ub
        )
        # intensity computation is finished
    #
    #
    def sample_time_given_type(self, type_event):
        # type_event is the type of event for which we want to sample the time
        # it is the little k in our model formulation in paper
        time_current = numpy.float32(0.0)
        if len(self.one_seq) > 0:
            time_current = self.one_seq[-1]['time_since_start']
        #
        #self.compute_intensity(time_current)
        self.compute_intensity_upper_bound(time_current)
        intensity_hazard = numpy.copy(
            self.intensity_ub[type_event]
        )
        #
        u = 1.5
        while u >= 1.0:
            #print "type is : ", type_event
            E = numpy.random.exponential(
                scale=1.0, size=None
            )
            U = numpy.random.uniform(
                low=0.0, high=1.0, size=None
            )
            #print "E U time_current : "
            #print E, U, time_current
            #print "intensity hazard is : "
            #print intensity_hazard
            time_current += (E / intensity_hazard)
            self.compute_intensity_given_past(time_current)
            u = U * intensity_hazard / self.intensity[type_event]
            #print "new time_current and u : "
            #print time_current, u
            #print "intensity and upper bound is : "
            #print self.intensity
            #print self.intensity_ub
            # use adaptive thinning algorithm
            # that is, decreasing the upper bound
            # to make the sampling quicker
            # use adaptive method by
            # toggling on the following block
            '''
            self.compute_intensity_upper_bound(
                time_current
            )
            intensity_hazard = numpy.copy(
                self.intensity_ub[type_event]
            )
            '''
        return time_current
        #
    #
    #
    def gen_one_seq(self, max_len):
        self.restart_sequence()
        '''
        Liiniger (2009), p. 28, describes a "thinning algorithm":
        generate one event of each type, take the minimum,
        and discard the others.
        Details found in my paper write-up
        max_len is a pre-sampled value to set the length of seq
        '''
        # initialize the seq
        time_since_start = numpy.float32(0.0)
        time_since_start_each_event = numpy.zeros(
            (self.dim_process,), dtype=dtype
        )
        #
        for idx_event in range(max_len):
            time_of_happen = numpy.zeros(
                (self.dim_process,), dtype=dtype
            )
            #
            # compute the hidden states
            # of the most recent event in sequence
            self.compute_hidden_states()
            #
            for type_event in range(self.dim_process):
                # sample one event using "thinning algorithm"
                time_of_happen[type_event] = numpy.copy(
                    self.sample_time_given_type(
                        type_event
                    )
                )
            #
            time_since_start_new = numpy.min(time_of_happen)
            type_event_new = numpy.argmin(time_of_happen)
            self.cnt_total_event += 1
            #
            # update sequence
            time_since_last_event = time_since_start_new - time_since_start
            time_since_start = time_since_start_new
            time_since_last_same_event = time_since_start - time_since_start_each_event[type_event_new]
            time_since_start_each_event[type_event_new] = time_since_start
            self.one_seq.append(
                {
                    'idx_event': self.cnt_total_event,
                    'type_event': type_event_new,
                    'time_since_start': time_since_start,
                    'time_since_last_event': time_since_last_event,
                    'time_since_last_same_event': time_since_last_same_event
                }
            )
            #
        # throw away the BOS item
        # at the head of the sequence
        self.one_seq.pop(0)
        #
    #
    #
    def gen_seqs(self, settings):
        #
        #print settings
        print "generating sequences ... "
        num_seqs = settings['num_seqs']
        #
        self.list_seqs = []
        cnt_seqs = 0
        #for idx_seq in range(num_seqs):
        while cnt_seqs < num_seqs:
            #
            max_len = numpy.int32(
                round(
                    numpy.random.uniform(
                        low=settings['min_len'],
                        high=settings['max_len']
                    )
                )
            )
            #
            self.gen_one_seq(max_len)
            self.list_seqs.append(self.one_seq)
            cnt_seqs += 1
            if cnt_seqs % 10 == 9:
                print "idx seq of gen : ", (cnt_seqs, self.name)
                print "total number of seqs : ", num_seqs
    #
    #
    def print_some(self):
        print "printing some seqs ... "
        for idx_seq in range(10):
            print "the id of this seq is : ", idx_seq
            seq = self.list_seqs[idx_seq]
            list_events = []
            list_time = []
            list_dtime = []
            list_items = []
            for event_item in seq:
                list_events.append(event_item['type_event'])
                list_time.append(
                    round(event_item['time_since_start'], 4)
                )
                list_dtime.append(
                    round(event_item['time_since_last_event'], 4)
                )
                list_items.append(
                    (
                        event_item['type_event'],
                        round(
                            event_item['time_since_last_event'], 4
                        )
                    )
                )
            print "the events, time and diff time for : ", idx_seq
            print list_events
            print list_time
            print list_dtime
            print "the list of items is : "
            print list_items
    #
    #
    def save_seqs(self, file_save):
        with open(file_save, 'wb') as f:
            pickle.dump(self.list_seqs, f)



#
#