Python keras.backend.mean() Examples

The following are 30 code examples of keras.backend.mean(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function

Example #1

Source File: 7_visualize_filters.py From deep-learning-note with MIT License

8 votes

def generate_pattern(layer_name, filter_index, size=150):
    # 过滤器可视化函数
    layer_output = model.get_layer(layer_name).output
    loss = K.mean(layer_output[:, :, :, filter_index])
    grads = K.gradients(loss, model.input)[0]
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
    iterate = K.function([model.input], [loss, grads])
    input_img_data = np.random.random((1, size, size, 3)) * 20 + 128.
    
    step = 1
    for _ in range(40):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step
    
    img = input_img_data[0]
    return deprocess_image(img)

Example #2

Source File: qrnn.py From typhon with MIT License

7 votes

def posterior_mean(self, x):
        r"""
        Computes the posterior mean by computing the first moment of the
        estimated posterior CDF.

        Arguments:

            x(np.array): Array of shape `(n, m)` containing `n` inputs for which
                         to predict the posterior mean.
        Returns:

            Array containing the posterior means for the provided inputs.
        """
        y_pred, qs = self.cdf(x)
        mus = y_pred[-1] - np.trapz(qs, x=y_pred)
        return mus

Example #3

Source File: model.py From dataiku-contrib with Apache License 2.0

6 votes

def rpn_class_loss_graph(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG.
    """
    # Squeeze last dim to simplify
    rpn_match = tf.squeeze(rpn_match, -1)
    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = tf.where(K.not_equal(rpn_match, 0))
    # Pick rows that contribute to the loss and filter out the rest.
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
    anchor_class = tf.gather_nd(anchor_class, indices)
    # Cross entropy loss
    loss = K.sparse_categorical_crossentropy(target=anchor_class,
                                             output=rpn_class_logits,
                                             from_logits=True)
    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
    return loss

Example #4

Source File: neural_network.py From CalibrationNN with GNU General Public License v3.0

6 votes

def test_helper(func, exponent, layer, lr, dropout_first, dropout_middle, 
                dropout_last, alpha, prefix='SWO GBP ', postfix='',
                with_comparison=False):
    print('Test %s, %s, %s, %s, %s %s %s' % (exponent, layer, lr, dropout_first,
                                       dropout_middle, dropout_last, alpha))
    model = func(exponent=exponent, lr=lr, layers=layer, 
                 dropout_first=dropout_first, dropout_middle=dropout_middle,
                 dropout_last=dropout_last, prefix=prefix, postfix=postfix, 
                 alpha=alpha)
    model.train(200)
    val_loss = np.mean(model.history['history']['val_loss'][-5:])
    
#    if with_comparison:
#        swo = inst.get_swaptiongen(inst.hullwhite_analytic)
#        _, values = swo.compare_history(model, dates=dates)
#        
    
    return (val_loss, layer, exponent, lr, dropout_first, dropout_middle, 
            dropout_last, alpha)

Example #5

Source File: model_loss.py From speech_separation with MIT License

6 votes

def audio_discriminate_loss2(gamma=0.1,beta = 2*0.1,num_speaker=2):
    def loss_func(S_true,S_pred,gamma=gamma,beta=beta,num_speaker=num_speaker):
        sum_mtr = K.zeros_like(S_true[:,:,:,:,0])
        for i in range(num_speaker):
            sum_mtr += K.square(S_true[:,:,:,:,i]-S_pred[:,:,:,:,i])
            for j in range(num_speaker):
                if i != j:
                    sum_mtr -= gamma*(K.square(S_true[:,:,:,:,i]-S_pred[:,:,:,:,j]))

        for i in range(num_speaker):
            for j in range(i+1,num_speaker):
                #sum_mtr -= beta*K.square(S_pred[:,:,:,i]-S_pred[:,:,:,j])
                #sum_mtr += beta*K.square(S_true[:,:,:,:,i]-S_true[:,:,:,:,j])
                pass
        #sum = K.sum(K.maximum(K.flatten(sum_mtr),0))

        loss = K.mean(K.flatten(sum_mtr))

        return loss
    return loss_func

Example #6

Source File: chapter_06_001.py From Python-Deep-Learning-SE with MIT License

6 votes

def sampling(args: tuple):
    """
    Reparameterization trick by sampling z from unit Gaussian
    :param args: (tensor, tensor) mean and log of variance of q(z|x)
    :returns tensor: sampled latent vector z
    """

    # unpack the input tuple
    z_mean, z_log_var = args

    # mini-batch size
    mb_size = K.shape(z_mean)[0]

    # latent space size
    dim = K.int_shape(z_mean)[1]

    # random normal vector with mean=0 and std=1.0
    epsilon = K.random_normal(shape=(mb_size, dim))

    return z_mean + K.exp(0.5 * z_log_var) * epsilon

Example #7

Source File: wgan_gp.py From Keras-GAN with MIT License

6 votes

def gradient_penalty_loss(self, y_true, y_pred, averaged_samples):
        """
        Computes gradient penalty based on prediction and weighted real / fake samples
        """
        gradients = K.gradients(y_pred, averaged_samples)[0]
        # compute the euclidean norm by squaring ...
        gradients_sqr = K.square(gradients)
        #   ... summing over the rows ...
        gradients_sqr_sum = K.sum(gradients_sqr,
                                  axis=np.arange(1, len(gradients_sqr.shape)))
        #   ... and sqrt
        gradient_l2_norm = K.sqrt(gradients_sqr_sum)
        # compute lambda * (1 - ||grad||)^2 still for each single sample
        gradient_penalty = K.square(1 - gradient_l2_norm)
        # return the mean as loss over all the batch samples
        return K.mean(gradient_penalty)

Example #8

Source File: model.py From PanopticSegmentation with MIT License

6 votes

def rpn_class_loss_graph(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.

    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG.
    """
    # Squeeze last dim to simplify
    rpn_match = tf.squeeze(rpn_match, -1)
    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = tf.where(K.not_equal(rpn_match, 0))
    # Pick rows that contribute to the loss and filter out the rest.
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
    anchor_class = tf.gather_nd(anchor_class, indices)
    # Cross entropy loss
    loss = K.sparse_categorical_crossentropy(target=anchor_class,
                                             output=rpn_class_logits,
                                             from_logits=True)
    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
    return loss

Example #9

Source File: breakout_dqn.py From reinforcement-learning with MIT License

6 votes

def optimizer(self):
        a = K.placeholder(shape=(None,), dtype='int32')
        y = K.placeholder(shape=(None,), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # approximate Q function using Convolution Neural Network
    # state is input and Q Value of each action is output of network

Example #10

Source File: breakout_dueling_ddqn.py From reinforcement-learning with MIT License

6 votes

def optimizer(self):
        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # approximate Q function using Convolution Neural Network
    # state is input and Q Value of each action is output of network
    # dueling network's Q Value is sum of advantages and state value

Example #11

Source File: bigan.py From Keras-BiGAN with MIT License

6 votes

def prepareSamples(self, cnum = 0, num = 1000): #8x8 images, bottom row is constant

        try:
            os.mkdir("Results/Samples-c" + str(cnum))
        except:
            x = 0

        im = self.im.get_class(cnum)
        e = self.GAN.E.predict(im, batch_size = BATCH_SIZE * k_images)

        mean = np.mean(e, axis = 0)
        std = np.std(e, axis = 0)

        n = noise(num)
        nc = nClass(num, mean, std)

        im = self.GAN.G.predict([n, nc], batch_size = BATCH_SIZE)

        for i in range(im.shape[0]):

            x = Image.fromarray(np.uint8(im[i]*255), mode = 'RGB')

            x.save("Results/Samples-c" + str(cnum) + "/im ("+str(i+1)+").png")

Example #12

Source File: qrnn.py From typhon with MIT License

6 votes

def predict(self, x):
        r"""
        Predict quantiles of the conditional distribution P(y|x).

        Forward propagates the inputs in `x` through the network to
        obtain the predicted quantiles `y`.

        Arguments:

            x(np.array): Array of shape `(n, m)` containing `n` m-dimensional inputs
                         for which to predict the conditional quantiles.

        Returns:

             Array of shape `(n, k)` with the columns corresponding to the k
             quantiles of the network.

        """
        predictions = np.stack(
            [m.predict((x - self.x_mean) / self.x_sigma) for m in self.models])
        return np.mean(predictions, axis=0)

Example #13

Source File: layer_normalization.py From keras-utility-layer-collection with MIT License

6 votes

def call(self, x):
        mean = K.mean(x, axis=-1)
        std = K.std(x, axis=-1)

        if len(x.shape) == 3:
            mean = K.permute_dimensions(
                K.repeat(mean, x.shape.as_list()[-1]),
                [0,2,1]
            )
            std = K.permute_dimensions(
                K.repeat(std, x.shape.as_list()[-1]),
                [0,2,1] 
            )
            
        elif len(x.shape) == 2:
            mean = K.reshape(
                K.repeat_elements(mean, x.shape.as_list()[-1], 0),
                (-1, x.shape.as_list()[-1])
            )
            std = K.reshape(
                K.repeat_elements(mean, x.shape.as_list()[-1], 0),
                (-1, x.shape.as_list()[-1])
            )
        
        return self._g * (x - mean) / (std + self._epsilon) + self._b

Example #14

Source File: attack_utils.py From blackbox-attacks with MIT License

6 votes

def gen_adv_loss(logits, y, loss='logloss', mean=False):
    """
    Generate the loss function.
    """

    if loss == 'training':
        # use the model's output instead of the true labels to avoid
        # label leaking at training time
        y = K.cast(K.equal(logits, K.max(logits, 1, keepdims=True)), "float32")
        y = y / K.sum(y, 1, keepdims=True)
        out = K.categorical_crossentropy(y, logits, from_logits=True)
    elif loss == 'logloss':
        out = K.categorical_crossentropy(y, logits, from_logits=True)
    else:
        raise ValueError("Unknown loss: {}".format(loss))

    if mean:
        out = K.mean(out)
    # else:
    #     out = K.sum(out)
    return out

Example #15

Source File: utls.py From MBLLEN with Apache License 2.0

5 votes

def bright_AB(y_true, y_pred):
            return K.abs(K.mean(y_true[:,:,:,:3])-K.mean(y_pred[:,:,:,:3]))

Example #16

Source File: utls.py From MBLLEN with Apache License 2.0

5 votes

def bright_mae(y_true, y_pred):
    return K.mean(K.abs(y_pred[:,:,:,:3] - y_true[:,:,:,:3]))

Example #17

Source File: VAE2.py From navbot with MIT License

5 votes

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], Z_DIM), mean=0., stddev=1.)
    return z_mean + K.exp(z_log_var / 2) * epsilon

Example #18

Source File: conv_vae.py From Projects with MIT License

5 votes

def _sampling(self,args):
        '''
        sampling function for embedding layer
        '''
        z_mean,z_log_var = args
        epsilon = K.random_normal(shape=K.shape(z_mean),mean=self.eps_mean,
                                  stddev=self.eps_std)
        return z_mean + K.exp(z_log_var) * epsilon

Example #19

Source File: conv_vae.py From Projects with MIT License

5 votes

def _vae_loss(self,input,output):
        '''
        loss function for variational autoencoder
        '''
        input_flat = K.flatten(input)
        output_flat = K.flatten(output)
        xent_loss = self.image_size[0] * self.image_size[1] \
                    * objectives.binary_crossentropy(input_flat,output_flat)
        kl_loss = - 0.5 * K.mean(1 + self.z_log_var - K.square(self.z_mean) 
                  - K.exp(self.z_log_var), axis=-1)
        return xent_loss + kl_loss

Example #20

Source File: loss.py From DeepDeblur with MIT License

5 votes

def identity_loss(y_true, y_pred):
    return K.mean(y_pred - y_true, axis=0)
    #return (y_pred - y_true)

Example #21

Source File: DeepLearn_cornet.py From DeepLearn with MIT License

5 votes

def cor(self,y1, y2, lamda):
        y1_mean = K.mean(y1, axis=0)
        y1_centered = y1 - y1_mean
        y2_mean = K.mean(y2, axis=0)
        y2_centered = y2 - y2_mean
        corr_nr = K.sum(y1_centered * y2_centered, axis=0)
        corr_dr1 = K.sqrt(T.sum(y1_centered * y1_centered, axis=0) + 1e-8)
        corr_dr2 = K.sqrt(T.sum(y2_centered * y2_centered, axis=0) + 1e-8)
        corr_dr = corr_dr1 * corr_dr2
        corr = corr_nr / corr_dr
        return K.sum(corr) * lamda

Example #22

Source File: breakout_ddqn.py From reinforcement-learning with MIT License

5 votes

def optimizer(self):
        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # approximate Q function using Convolution Neural Network
    # state is input and Q Value of each action is output of network

Example #23

Source File: ae_model.py From Pix2Pose with MIT License

5 votes

def call(self,x):
        y_pred=x[0]
        y_recont_gt=x[1] 
        y_prob_pred=tf.squeeze(x[2],axis=3) 
        y_prob_gt=x[3]
        visible = tf.cast(y_prob_gt > 0.5,y_pred.dtype)
        visible = tf.squeeze(visible,axis=3) 
        #generate transformed values using sym
        if(len(self.sym)>1):
            #if(True):
            for sym_id,transform in enumerate(self.sym): #3x3 matrix
                tf_mat=tf.convert_to_tensor(transform,y_recont_gt.dtype)
                y_gt_transformed = tf.transpose(tf.matmul(tf_mat,tf.transpose(tf.reshape(y_recont_gt,[-1,3]))))
                y_gt_transformed = tf.reshape(y_gt_transformed,[-1,128,128,3])
                loss_xyz_temp = K.sum(K.abs(y_gt_transformed-y_pred),axis=3)/3
                loss_sum=K.sum(loss_xyz_temp,axis=[1,2])
                if(sym_id>0):
                    loss_sums = tf.concat([loss_sums,tf.expand_dims(loss_sum,axis=0)],axis=0)
                    loss_xyzs=  tf.concat([loss_xyzs,tf.expand_dims(loss_xyz_temp,axis=0)],axis=0)
                else:
                    loss_sums = tf.expand_dims(loss_sum,axis=0) 
                    loss_xyzs = tf.expand_dims(loss_xyz_temp,axis=0)
            
            min_values = tf.reduce_min(loss_sums,axis=0,keepdims=True) 
            loss_switch = tf.cast(tf.equal(loss_sums,min_values),y_pred.dtype)
            loss_xyz = tf.expand_dims(tf.expand_dims(loss_switch,axis=2),axis=3)*loss_xyzs
            loss_xyz = K.sum(loss_xyz,axis=0) 
        else:
            loss_xyz = K.sum(K.abs(y_recont_gt-y_pred),axis=3)/3
        prob_loss = K.square(y_prob_pred-K.minimum(loss_xyz,1)) 
        loss_invisible = (1-visible)*loss_xyz
        loss_visible = visible*loss_xyz
        loss = loss_visible*3 + loss_invisible+ 0.5*prob_loss 
        loss = K.mean(loss,axis=[1,2])
        return loss

Example #24

Source File: keras_utils.py From timeception with GNU General Public License v3.0

5 votes

def map_charades(y_true, y_pred):
    """
    Returns mAP
    """
    m_aps = []

    tf_one = tf.constant(1, dtype=tf.float32)

    n_classes = y_pred.shape[1]
    for oc_i in range(n_classes):
        pred_row = y_pred[:, oc_i]
        sorted_idxs = tf_framework.argsort(-pred_row)
        true_row = y_true[:, oc_i]
        true_row = tf.map_fn(lambda i: true_row[i], sorted_idxs, dtype=np.float32)
        tp_poolean = tf.equal(true_row, tf_one)
        tp = tf.cast(tp_poolean, dtype=np.float32)
        fp = K.reverse(tp, axes=0)
        n_pos = tf.reduce_sum(tp)
        f_pcs = tf.cumsum(fp)
        t_pcs = tf.cumsum(tp)
        s = f_pcs + t_pcs

        s = tf.cast(s, tf.float32)
        t_pcs = tf.cast(t_pcs, tf.float32)
        tp_float = tf.cast(tp_poolean, np.float32)

        prec = t_pcs / s
        avg_prec = prec * tp_float

        n_pos = tf.cast(n_pos, tf.float32)
        avg_prec = avg_prec / n_pos
        avg_prec = tf.expand_dims(avg_prec, axis=0)
        m_aps.append(avg_prec)

    m_aps = K.concatenate(m_aps, axis=0)
    mAP = K.mean(m_aps)
    return mAP

# endregion

# region Callbacks

Example #25

Source File: keras_utils.py From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License

5 votes

def map_charades(y_true, y_pred):
    """
    Returns mAP
    """
    m_aps = []

    tf_one = tf.constant(1, dtype=tf.float32)

    n_classes = y_pred.shape[1]
    for oc_i in range(n_classes):
        pred_row = y_pred[:, oc_i]
        sorted_idxs = tf_framework.argsort(-pred_row)
        true_row = y_true[:, oc_i]
        true_row = tf.map_fn(lambda i: true_row[i], sorted_idxs, dtype=np.float32)
        tp_poolean = tf.equal(true_row, tf_one)
        tp = tf.cast(tp_poolean, dtype=np.float32)
        fp = K.reverse(tp, axes=0)
        n_pos = tf.reduce_sum(tp)
        f_pcs = tf.cumsum(fp)
        t_pcs = tf.cumsum(tp)
        s = f_pcs + t_pcs

        s = tf.cast(s, tf.float32)
        t_pcs = tf.cast(t_pcs, tf.float32)
        tp_float = tf.cast(tp_poolean, np.float32)

        prec = t_pcs / s
        avg_prec = prec * tp_float

        n_pos = tf.cast(n_pos, tf.float32)
        avg_prec = avg_prec / n_pos
        avg_prec = tf.expand_dims(avg_prec, axis=0)
        m_aps.append(avg_prec)

    m_aps = K.concatenate(m_aps, axis=0)
    mAP = K.mean(m_aps)
    return mAP

# endregion

# region Callbacks

Example #26

Source File: qrnn.py From typhon with MIT License

5 votes

def skewed_absolute_error(y_true, y_pred, tau):
    """
    The quantile loss function for a given quantile tau:

    L(y_true, y_pred) = (tau - I(y_pred < y_true)) * (y_pred - y_true)

    Where I is the indicator function.
    """
    dy = y_pred - y_true
    return K.mean((1.0 - tau) * K.relu(dy) + tau * K.relu(-dy), axis=-1)

Example #27

Source File: model.py From deepchem with MIT License

5 votes

def _buildEncoder(self, x, latent_rep_size, max_length, epsilon_std=0.01):
    h = Convolution1D(9, 9, activation='relu', name='conv_1')(x)
    h = Convolution1D(9, 9, activation='relu', name='conv_2')(h)
    h = Convolution1D(10, 11, activation='relu', name='conv_3')(h)
    h = Flatten(name='flatten_1')(h)
    h = Dense(435, activation='relu', name='dense_1')(h)

    def sampling(args):
      z_mean_, z_log_var_ = args
      batch_size = K.shape(z_mean_)[0]
      epsilon = K.random_normal(
          shape=(batch_size, latent_rep_size), mean=0., std=epsilon_std)
      return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

    z_mean = Dense(latent_rep_size, name='z_mean', activation='linear')(h)
    z_log_var = Dense(latent_rep_size, name='z_log_var', activation='linear')(h)

    def vae_loss(x, x_decoded_mean):
      x = K.flatten(x)
      x_decoded_mean = K.flatten(x_decoded_mean)
      xent_loss = max_length * objectives.binary_crossentropy(x, x_decoded_mean)
      kl_loss = -0.5 * K.mean(
          1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
      return xent_loss + kl_loss

    return (vae_loss, Lambda(
        sampling, output_shape=(latent_rep_size,),
        name='lambda')([z_mean, z_log_var]))

Example #28

Source File: cartpole_a2c.py From reinforcement-learning-kr with MIT License

5 votes

def critic_optimizer(self):
        target = K.placeholder(shape=[None, ])

        loss = K.mean(K.square(target - self.critic.output))

        optimizer = Adam(lr=self.critic_lr)
        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, target], [], updates=updates)

        return train

    # 각 타임스텝마다 정책신경망과 가치신경망을 업데이트

Example #29

Source File: breakout_a3c.py From reinforcement-learning-kr with MIT License

5 votes

def critic_optimizer(self):
        discounted_prediction = K.placeholder(shape=(None,))

        value = self.critic.output

        # [반환값 - 가치]의 제곱을 오류함수로 함
        loss = K.mean(K.square(discounted_prediction - value))

        optimizer = RMSprop(lr=self.critic_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.critic.trainable_weights, [],loss)
        train = K.function([self.critic.input, discounted_prediction],
                           [loss], updates=updates)
        return train

Example #30

Source File: breakout_dqn.py From reinforcement-learning-kr with MIT License

5 votes

def optimizer(self):
        a = K.placeholder(shape=(None,), dtype='int32')
        y = K.placeholder(shape=(None,), dtype='float32')

        prediction = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(prediction * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # 상태가 입력, 큐함수가 출력인 인공신경망 생성