python source code of play

import tensorflow as tf
import argparse

import tensorflow as tf

import environments
from agent import PPOAgent
from policy import *


def print_summary(ep_count, rew):
    print("Episode: %s. Reward: %s" % (ep_count, rew))


def start(env):
    MASTER_NAME = "master-0"

    tf.reset_default_graph()

    with tf.Session() as session:
        with tf.variable_scope(MASTER_NAME) as scope:
            env_opts = environments.get_env_options(env, False)
            policy = get_policy(env_opts, session)
            master_agent = PPOAgent(policy, session, MASTER_NAME, env_opts)
        saver = tf.train.Saver(max_to_keep=1)
        saver = tf.train.import_meta_graph(tf.train.latest_checkpoint("models/%s/" % env) + ".meta")
        saver.restore(session, tf.train.latest_checkpoint("models/%s/" % env))
        try:
            pass
        except:
            print("Failed to restore model, starting from scratch")
            session.run(tf.global_variables_initializer())

        producer = environments.EnvironmentProducer(env, False)
        env = producer.get_new_environment()
        episode_count = 0
        cum_rew = 0
        while True:
            terminal = False
            s0 = env.reset()
            cur_hidden_state = master_agent.get_init_hidden_state()
            episode_count += 1
            cur_rew = 0
            while not terminal:
                env.render()
                action, h_out = master_agent.get_strict_sample(s0, cur_hidden_state)
                cur_hidden_state = h_out
                s0, r, terminal, _ = env.step(action)
                cum_rew += r
                cur_rew += r
            print("Ep: %s, cur_reward: %s reward: %s" % (episode_count, cur_rew, cum_rew / episode_count))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=('Parallel PPO'))
    parser.add_argument('-env', type=str, help='Env name')
    args = parser.parse_args()

    start(**vars(args))