import tensorflow as tf import tensorflow.contrib.layers as layers def layer_norm_fn(x, relu=True): x = layers.layer_norm(x, scale=True, center=True) if relu: x = tf.nn.relu(x) return x def model(img_in, num_actions, scope, reuse=False, layer_norm=False, distributed=False, atoms=51): """As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf""" print("create default model: distributed? ", distributed, "atoms", atoms) with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): value_out = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None) if layer_norm: value_out = layer_norm_fn(value_out, relu=True) else: value_out = tf.nn.relu(value_out) value_out = layers.fully_connected(value_out, num_outputs=num_actions*atoms, activation_fn=None) if distributed: value_out = tf.reshape(value_out, [-1, num_actions, atoms]) print("output shape:", tf.shape(value_out)) return value_out def dueling_model(img_in, num_actions, scope, reuse=False, layer_norm=False, distributed=False, atoms=51): """As described in https://arxiv.org/abs/1511.06581""" print("create dueling model: distributed? ", distributed, "atoms", atoms) with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("state_value"): state_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None) if layer_norm: state_hidden = layer_norm_fn(state_hidden, relu=True) else: state_hidden = tf.nn.relu(state_hidden) state_score = layers.fully_connected(state_hidden, num_outputs=atoms, activation_fn=None) with tf.variable_scope("action_value"): actions_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None) if layer_norm: actions_hidden = layer_norm_fn(actions_hidden, relu=True) else: actions_hidden = tf.nn.relu(actions_hidden) action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions*atoms, activation_fn=None) if distributed: atoms_action_adv = tf.reshape(action_scores, [-1, num_actions, atoms]) atoms_action_adv_mean = tf.reduce_mean(atoms_action_adv, 1) atoms_action_adv = atoms_action_adv - tf.expand_dims(atoms_action_adv_mean, 1) atoms_scores = tf.expand_dims(state_score, 1) print("output shape:", tf.shape(atoms_scores)) return tf.add(atoms_scores, atoms_action_adv) else: action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores = action_scores - tf.expand_dims(action_scores_mean, 1) print("output shape:", tf.shape(action_scores)) return state_score + action_scores