Python keras.backend.sum() Examples

The following are 30 code examples of keras.backend.sum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function

Example #1

Source File: reinforce_agent.py From reinforcement-learning-kr with MIT License

6 votes

def optimizer(self):
        action = K.placeholder(shape=[None, 5])
        discounted_rewards = K.placeholder(shape=[None, ])
        
        # 크로스 엔트로피 오류함수 계산
        action_prob = K.sum(action * self.model.output, axis=1)
        cross_entropy = K.log(action_prob) * discounted_rewards
        loss = -K.sum(cross_entropy)
        
        # 정책신경망을 업데이트하는 훈련함수 생성
        optimizer = Adam(lr=self.learning_rate)
        updates = optimizer.get_updates(self.model.trainable_weights,[],
                                        loss)
        train = K.function([self.model.input, action, discounted_rewards], [],
                           updates=updates)

        return train

    # 정책신경망으로 행동 선택

Example #2

Source File: losses.py From FasterRCNN_KERAS with Apache License 2.0

6 votes

def rpn_loss_regr(num_anchors):
	def rpn_loss_regr_fixed_num(y_true, y_pred):
		if K.image_dim_ordering() == 'th':
			x = y_true[:, 4 * num_anchors:, :, :] - y_pred
			x_abs = K.abs(x)
			x_bool = K.less_equal(x_abs, 1.0)
			return lambda_rpn_regr * K.sum(
				y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :])
		else:
			x = y_true[:, :, :, 4 * num_anchors:] - y_pred
			x_abs = K.abs(x)
			x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32)

			return lambda_rpn_regr * K.sum(
				y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])

	return rpn_loss_regr_fixed_num

Example #3

Source File: reinforce_agent.py From 2019-OSS-Summer-RL with MIT License

6 votes

def optimizer(self):
        action = K.placeholder(shape=[None, 5])
        discounted_rewards = K.placeholder(shape=[None, ])
        
        # 크로스 엔트로피 오류함수 계산
        action_prob = K.sum(action * self.model.output, axis=1)
        cross_entropy = K.log(action_prob) * discounted_rewards
        loss = -K.sum(cross_entropy)
        
        # 정책신경망을 업데이트하는 훈련함수 생성
        optimizer = Adam(lr=self.learning_rate)
        updates = optimizer.get_updates(self.model.trainable_weights,[],
                                        loss)
        train = K.function([self.model.input, action, discounted_rewards], [],
                           updates=updates)

        return train

    # 정책신경망으로 행동 선택

Example #4

Source File: wgan_gp.py From Keras-GAN with MIT License

6 votes

def gradient_penalty_loss(self, y_true, y_pred, averaged_samples):
        """
        Computes gradient penalty based on prediction and weighted real / fake samples
        """
        gradients = K.gradients(y_pred, averaged_samples)[0]
        # compute the euclidean norm by squaring ...
        gradients_sqr = K.square(gradients)
        #   ... summing over the rows ...
        gradients_sqr_sum = K.sum(gradients_sqr,
                                  axis=np.arange(1, len(gradients_sqr.shape)))
        #   ... and sqrt
        gradient_l2_norm = K.sqrt(gradients_sqr_sum)
        # compute lambda * (1 - ||grad||)^2 still for each single sample
        gradient_penalty = K.square(1 - gradient_l2_norm)
        # return the mean as loss over all the batch samples
        return K.mean(gradient_penalty)

Example #5

Source File: cartpole_a3c.py From reinforcement-learning with MIT License

6 votes

def actor_optimizer(self):
        action = K.placeholder(shape=(None, self.action_size))
        advantages = K.placeholder(shape=(None, ))

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages)
        loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)

        actor_loss = loss + 0.01*entropy

        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        train = K.function([self.actor.input, action, advantages], [], updates=updates)
        return train

    # make loss function for Value approximation

Example #6

Source File: model_loss.py From speech_separation with MIT License

6 votes

def audio_discriminate_loss2(gamma=0.1,beta = 2*0.1,num_speaker=2):
    def loss_func(S_true,S_pred,gamma=gamma,beta=beta,num_speaker=num_speaker):
        sum_mtr = K.zeros_like(S_true[:,:,:,:,0])
        for i in range(num_speaker):
            sum_mtr += K.square(S_true[:,:,:,:,i]-S_pred[:,:,:,:,i])
            for j in range(num_speaker):
                if i != j:
                    sum_mtr -= gamma*(K.square(S_true[:,:,:,:,i]-S_pred[:,:,:,:,j]))

        for i in range(num_speaker):
            for j in range(i+1,num_speaker):
                #sum_mtr -= beta*K.square(S_pred[:,:,:,i]-S_pred[:,:,:,j])
                #sum_mtr += beta*K.square(S_true[:,:,:,:,i]-S_true[:,:,:,:,j])
                pass
        #sum = K.sum(K.maximum(K.flatten(sum_mtr),0))

        loss = K.mean(K.flatten(sum_mtr))

        return loss
    return loss_func

Example #7

Source File: breakout_dueling_ddqn.py From reinforcement-learning with MIT License

6 votes

def optimizer(self):
        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # approximate Q function using Convolution Neural Network
    # state is input and Q Value of each action is output of network
    # dueling network's Q Value is sum of advantages and state value

Example #8

Source File: breakout_a3c.py From reinforcement-learning with MIT License

6 votes

def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[None, ])

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * advantages
        actor_loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        loss = actor_loss + 0.01*entropy
        optimizer = RMSprop(lr=self.actor_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss], updates=updates)

        return train

    # make loss function for Value approximation

Example #9

Source File: reversing_gan.py From gandlf with MIT License

6 votes

def reverse_generator(generator, X_sample, y_sample, title):
    """Gradient descent to map images back to their latent vectors."""

    latent_vec = np.random.normal(size=(1, 100))

    # Function for figuring out how to bump the input.
    target = K.placeholder()
    loss = K.sum(K.square(generator.outputs[0] - target))
    grad = K.gradients(loss, generator.inputs[0])[0]
    update_fn = K.function(generator.inputs + [target], [grad])

    # Repeatedly apply the update rule.
    xs = []
    for i in range(60):
        print('%d: latent_vec mean=%f, std=%f'
              % (i, np.mean(latent_vec), np.std(latent_vec)))
        xs.append(generator.predict_on_batch([latent_vec, y_sample]))
        for _ in range(10):
            update_vec = update_fn([latent_vec, y_sample, X_sample])[0]
            latent_vec -= update_vec * update_rate

    # Plots the samples.
    xs = np.concatenate(xs, axis=0)
    plot_as_gif(xs, X_sample, title)

Example #10

Source File: reversing_gan.py From gandlf with MIT License

6 votes

def build_generator():
    """Builds the big generator model."""

    latent = keras.layers.Input((100,), name='latent')

    image_class = keras.layers.Input((10,), dtype='float32',
                                     name='image_class')
    d = keras.layers.Dense(100)(image_class)
    merged = keras.layers.merge([latent, d], mode='sum')

    hidden = keras.layers.Dense(512)(merged)
    hidden = keras.layers.LeakyReLU()(hidden)

    hidden = keras.layers.Dense(512)(hidden)
    hidden = keras.layers.LeakyReLU()(hidden)

    output_layer = keras.layers.Dense(28 * 28,
        activation='tanh')(hidden)
    fake_image = keras.layers.Reshape((28, 28, 1))(output_layer)

    return keras.models.Model(input=[latent, image_class],
                              output=fake_image)

Example #11

Source File: attack_utils.py From blackbox-attacks with MIT License

6 votes

def gen_adv_loss(logits, y, loss='logloss', mean=False):
    """
    Generate the loss function.
    """

    if loss == 'training':
        # use the model's output instead of the true labels to avoid
        # label leaking at training time
        y = K.cast(K.equal(logits, K.max(logits, 1, keepdims=True)), "float32")
        y = y / K.sum(y, 1, keepdims=True)
        out = K.categorical_crossentropy(y, logits, from_logits=True)
    elif loss == 'logloss':
        out = K.categorical_crossentropy(y, logits, from_logits=True)
    else:
        raise ValueError("Unknown loss: {}".format(loss))

    if mean:
        out = K.mean(out)
    # else:
    #     out = K.sum(out)
    return out

Example #12

Source File: attack_utils.py From blackbox-attacks with MIT License

6 votes

def gen_adv_loss(logits, y, loss='logloss', mean=False):
    """
    Generate the loss function.
    """

    if loss == 'training':
        # use the model's output instead of the true labels to avoid
        # label leaking at training time
        y = K.cast(K.equal(logits, K.max(logits, 1, keepdims=True)), "float32")
        y = y / K.sum(y, 1, keepdims=True)
        out = K.categorical_crossentropy(logits, y, from_logits=True)
    elif loss == 'logloss':
        # out = K.categorical_crossentropy(logits, y, from_logits=True)
        out = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
        out = tf.reduce_mean(out)
    else:
        raise ValueError("Unknown loss: {}".format(loss))

    if mean:
        out = tf.mean(out)
    # else:
    #     out = K.sum(out)
    return out

Example #13

Source File: contrib.py From steppy-toolkit with MIT License

6 votes

def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result

Example #14

Source File: attention.py From keras-utility-layer-collection with MIT License

6 votes

def step(self, x, states):  
        h = states[0]
        # states[1] necessary?
        
        # comes from the constants
        X_static = states[-2]
        # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim]
        total_x_static_prod = states[-1]

        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_static_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        static_x_weighted = K.sum(attention * X_static, [1])
        
        x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3

        h, new_states = self.layer.cell.call(x, states[:-2])
        
        # append attention to the states to "smuggle" it out of the RNN wrapper
        attention = K.squeeze(attention, -1)
        h = K.concatenate([h, attention])

        return h, new_states

Example #15

Source File: attention.py From keras-utility-layer-collection with MIT License

6 votes

def step(self, x, states):   
        h = states[0]
        # states[1] necessary?

        # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim]
        total_x_prod = states[-1]
        # comes from the constants (equals the input sequence)
        X = states[-2]
        
        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        x_weighted = K.sum(attention * X, [1])

        x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3
        
        h, new_states = self.layer.cell.call(x, states[:-2])
        
        return h, new_states

Example #16

Source File: breakout_dqn.py From reinforcement-learning with MIT License

6 votes

def optimizer(self):
        a = K.placeholder(shape=(None,), dtype='int32')
        y = K.placeholder(shape=(None,), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # approximate Q function using Convolution Neural Network
    # state is input and Q Value of each action is output of network

Example #17

Source File: DeepLearn_cornet.py From DeepLearn with MIT License

5 votes

def cor(self,y1, y2, lamda):
        y1_mean = K.mean(y1, axis=0)
        y1_centered = y1 - y1_mean
        y2_mean = K.mean(y2, axis=0)
        y2_centered = y2 - y2_mean
        corr_nr = K.sum(y1_centered * y2_centered, axis=0)
        corr_dr1 = K.sqrt(T.sum(y1_centered * y1_centered, axis=0) + 1e-8)
        corr_dr2 = K.sqrt(T.sum(y2_centered * y2_centered, axis=0) + 1e-8)
        corr_dr = corr_dr1 * corr_dr2
        corr = corr_nr / corr_dr
        return K.sum(corr) * lamda

Example #18

Source File: BBalpha_dropout.py From Dropout_BBalpha with MIT License

5 votes

def bbalpha_softmax_cross_entropy_with_mc_logits(alpha):
    alpha = K.cast_to_floatx(alpha)
    def loss(y_true, mc_logits):
        # log(p_ij), p_ij = softmax(logit_ij)
        #assert mc_logits.ndim == 3
        mc_log_softmax = mc_logits - K.max(mc_logits, axis=2, keepdims=True)
        mc_log_softmax = mc_log_softmax - K.log(K.sum(K.exp(mc_log_softmax), axis=2, keepdims=True))
        mc_ll = K.sum(y_true * mc_log_softmax, -1)  # N x K
        K_mc = mc_ll.get_shape().as_list()[1]	# only for tensorflow
        return - 1. / alpha * (logsumexp(alpha * mc_ll, 1) + K.log(1.0 / K_mc))
    return loss


###################################################################
# the model

Example #19

Source File: BBalpha_dropout.py From Dropout_BBalpha with MIT License

5 votes

def test_MC_dropout(model, X, Y):
    pred = model.predict(X)  # N x K x D
    pred = np.mean(pred, 1)
    acc = np.mean(np.argmax(pred, axis=-1) == np.argmax(Y, axis=-1))
    ll = np.sum(np.log(np.sum(pred * Y, -1)))
    return acc, ll

Example #20

Source File: BBalpha_dropout.py From Dropout_BBalpha with MIT License

5 votes

def logsumexp(x, axis=None):
    x_max = K.max(x, axis=axis, keepdims=True)
    return K.log(K.sum(K.exp(x - x_max), axis=axis, keepdims=True)) + x_max

Example #21

Source File: grad_cam.py From Emotion with MIT License

5 votes

def compile_gradient_function(input_model, category_index, layer_name):
    model = Sequential()
    model.add(input_model)

    num_classes = model.output_shape[1]
    target_layer = lambda x: target_category_loss(x, category_index, num_classes)
    model.add(Lambda(target_layer,
                     output_shape = target_category_loss_output_shape))

    loss = K.sum(model.layers[-1].output)
    conv_output = model.layers[0].get_layer(layer_name).output
    gradients = normalize(K.gradients(loss, conv_output)[0])
    gradient_function = K.function([model.layers[0].input, K.learning_phase()],
                                                    [conv_output, gradients])
    return gradient_function

Example #22

Source File: grad_cam.py From face_classification with MIT License

5 votes

def compile_gradient_function(input_model, category_index, layer_name):
    model = Sequential()
    model.add(input_model)

    num_classes = model.output_shape[1]
    target_layer = lambda x: target_category_loss(x, category_index, num_classes)
    model.add(Lambda(target_layer,
                     output_shape=target_category_loss_output_shape))

    loss = K.sum(model.layers[-1].output)
    conv_output = model.layers[0].get_layer(layer_name).output
    gradients = normalize(K.gradients(loss, conv_output)[0])
    gradient_function = K.function([model.layers[0].input, K.learning_phase()],
                                   [conv_output, gradients])
    return gradient_function

Example #23

Source File: XRMB_CNN_17.06.v2.py From DeepLearn with MIT License

5 votes

def sum_corr(model):
    view1 = np.load("MFCC_Test.npy")
    view2 = np.load("XRMB_Test.npy")
    x = project(model,[view1,np.zeros_like(view2)])
    y = project(model,[np.zeros_like(view1),view2])
    print ("test correlation")
    corr = 0
    for i in range(0,len(x[0])):
        x1 = x[:,i] - (np.ones(len(x))*(sum(x[:,i])/len(x)))
        x2 = y[:,i] - (np.ones(len(y))*(sum(y[:,i])/len(y)))
        nr = sum(x1 * x2)/(math.sqrt(sum(x1*x1))*math.sqrt(sum(x2*x2)))
        corr+=nr
    print (corr)

Example #24

Source File: layers.py From DeepLearn with MIT License

5 votes

def call(self, x, mask=None):
        h1 = x[0]
        h2 = x[1]    
        dif = K.sum(K.abs(h1-h2),axis=1)  
        h = K.exp(-dif)
        #print h.shape
        h=K.clip(h,1e-7,1.0-1e-7)
        h = K.reshape(h, (h.shape[0],1))
        return h

Example #25

Source File: model_abcnn.py From DeepLearn with MIT License

5 votes

def compute_euclidean_match_score(l_r):
    l, r = l_r
    denominator = 1. + K.sqrt(
        -2 * K.batch_dot(l, r, axes=[2, 2]) +
        K.expand_dims(K.sum(K.square(l), axis=2), 2) +
        K.expand_dims(K.sum(K.square(r), axis=2), 1)
    )
    denominator = K.maximum(denominator, K.epsilon())
    return 1. / denominator

Example #26

Source File: DeepLearn_cornet.py From DeepLearn with MIT License

5 votes

def sum_corr(model):
    view1 = np.load("test_v1.npy")
    view2 = np.load("test_v2.npy")
    x = project(model,[view1,np.zeros_like(view1)])
    y = project(model,[np.zeros_like(view2),view2])
    print "test correlation"
    corr = 0
    for i in range(0,len(x[0])):
		x1 = x[:,i] - (np.ones(len(x))*(sum(x[:,i])/len(x)))
		x2 = y[:,i] - (np.ones(len(y))*(sum(y[:,i])/len(y)))
		nr = sum(x1 * x2)/(math.sqrt(sum(x1*x1))*math.sqrt(sum(x2*x2)))
		corr+=nr
    print corr

Example #27

Source File: ae_model.py From Pix2Pose with MIT License

5 votes

def call(self,x):
        y_pred=x[0]
        y_recont_gt=x[1] 
        y_prob_pred=tf.squeeze(x[2],axis=3) 
        y_prob_gt=x[3]
        visible = tf.cast(y_prob_gt > 0.5,y_pred.dtype)
        visible = tf.squeeze(visible,axis=3) 
        #generate transformed values using sym
        if(len(self.sym)>1):
            #if(True):
            for sym_id,transform in enumerate(self.sym): #3x3 matrix
                tf_mat=tf.convert_to_tensor(transform,y_recont_gt.dtype)
                y_gt_transformed = tf.transpose(tf.matmul(tf_mat,tf.transpose(tf.reshape(y_recont_gt,[-1,3]))))
                y_gt_transformed = tf.reshape(y_gt_transformed,[-1,128,128,3])
                loss_xyz_temp = K.sum(K.abs(y_gt_transformed-y_pred),axis=3)/3
                loss_sum=K.sum(loss_xyz_temp,axis=[1,2])
                if(sym_id>0):
                    loss_sums = tf.concat([loss_sums,tf.expand_dims(loss_sum,axis=0)],axis=0)
                    loss_xyzs=  tf.concat([loss_xyzs,tf.expand_dims(loss_xyz_temp,axis=0)],axis=0)
                else:
                    loss_sums = tf.expand_dims(loss_sum,axis=0) 
                    loss_xyzs = tf.expand_dims(loss_xyz_temp,axis=0)
            
            min_values = tf.reduce_min(loss_sums,axis=0,keepdims=True) 
            loss_switch = tf.cast(tf.equal(loss_sums,min_values),y_pred.dtype)
            loss_xyz = tf.expand_dims(tf.expand_dims(loss_switch,axis=2),axis=3)*loss_xyzs
            loss_xyz = K.sum(loss_xyz,axis=0) 
        else:
            loss_xyz = K.sum(K.abs(y_recont_gt-y_pred),axis=3)/3
        prob_loss = K.square(y_prob_pred-K.minimum(loss_xyz,1)) 
        loss_invisible = (1-visible)*loss_xyz
        loss_visible = visible*loss_xyz
        loss = loss_visible*3 + loss_invisible+ 0.5*prob_loss 
        loss = K.mean(loss,axis=[1,2])
        return loss

Example #28

Source File: breakout_dueling_ddqn.py From reinforcement-learning with MIT License

5 votes

def build_model(self):
        input = Input(shape=self.state_size)
        shared = Conv2D(32, (8, 8), strides=(4, 4), activation='relu')(input)
        shared = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')(shared)
        shared = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')(shared)
        flatten = Flatten()(shared)

        # network separate state value and advantages
        advantage_fc = Dense(512, activation='relu')(flatten)
        advantage = Dense(self.action_size)(advantage_fc)
        advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True),
                           output_shape=(self.action_size,))(advantage)

        value_fc = Dense(512, activation='relu')(flatten)
        value =  Dense(1)(value_fc)
        value = Lambda(lambda s: K.expand_dims(s[:, 0], -1),
                       output_shape=(self.action_size,))(value)

        # network merged and make Q Value
        q_value = merge([value, advantage], mode='sum')
        model = Model(inputs=input, outputs=q_value)
        model.summary()

        return model

    # after some time interval update the target model to be same with model

Example #29

Source File: attention.py From Document-Classifier-LSTM with MIT License

5 votes

def call(self, x):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.softmax(ait)
        a = K.expand_dims(a)
        weighted_input = x * a

        return K.sum(weighted_input, axis=1)

Example #30

Source File: breakout_ddqn.py From reinforcement-learning with MIT License

5 votes

def optimizer(self):
        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train

    # approximate Q function using Convolution Neural Network
    # state is input and Q Value of each action is output of network