python source code of options

import numpy as np
import os
import shutil
from tensorboardX import SummaryWriter
import torch
import torch.nn as nn

from utils.random_process import OrnsteinUhlenbeckProcess

CONFIGS = [
# agent_type, env_type, game,           memory_type, model_type
[ "dqn",      "atari",  "pong",         "shared",    "dqn-cnn" ], # 0
# add custom configs ...
]

class Params(object):
    def __init__(self):
        # training signature
        self.machine    = "daim"        # "machine_id"
        self.timestamp  = "19061800"    # "yymmdd##"
        # training configuration
        self.mode       = 1             # 1(train) | 2(test model_file)
        self.config     = 0
        self.gpu_ind    = 0             # learner will be using device('cuda:gpu_ind')

        self.agent_type, self.env_type, self.game, self.memory_type, self.model_type = CONFIGS[self.config]

        self.seed       = 100
        self.render     = False         # whether render the window from the original envs or not
        self.visualize  = True          # whether do online plotting and stuff or not

        self.num_envs_per_actor = 1     # NOTE: must be 1 for envs that don't have parallel support
        self.num_actors = 8
        self.num_learners = 1

        # prefix for saving models&logs
        self.refs       = self.machine + "_" + self.timestamp
        self.root_dir   = os.getcwd()

        # model files
        # NOTE: will save the current model to model_name
        self.model_name = self.root_dir + "/models/" + self.refs + ".pth"
        # NOTE: will load pretrained model_file if not None
        self.model_file = None#self.root_dir + "/models/{TODO:FILL_IN_PRETAINED_MODEL_FILE}.pth"
        if self.mode == 2:
            self.model_file = self.model_name  # NOTE: so only need to change self.mode to 2 to test the current training
            assert self.model_file is not None, "Pre-Trained model is None, Testing aborted!!!"
            self.visualize = False

        # logging configs
        self.log_dir = self.root_dir + "/logs/" + self.refs + "/"


class EnvParams(Params):
    def __init__(self):
        super(EnvParams, self).__init__()

        # for preprocessing the states before outputing from env
        if "mlp" in self.model_type:    # low dim inputs, no preprocessing or resizing
            self.state_cha = 1          # NOTE: equals hist_len
            self.state_hei = 1          # NOTE: always 1 for mlp's
            self.state_wid = None       # depends on the env
        elif "cnn" in self.model_type:  # raw image inputs, need to resize or crop to this step_size
            self.state_cha = 4          # NOTE: equals hist_len
            self.state_hei = 84
            self.state_wid = 84

        if self.env_type == "atari":
            self.early_stop = 12500     # TODO: check Rainbow


class MemoryParams(Params):
    def __init__(self):
        super(MemoryParams, self).__init__()

        if self.memory_type == "shared":
            if self.agent_type == "dqn":
                self.memory_size = 50000
            elif self.agent_type == "ddpg":
                self.memory_size = 50000

            self.enable_per = False             # TODO: not completed for now: prioritized experience replay
            # dtype for states
            if "mlp" in self.model_type:
                # self.dtype = torch.float32    # somehow passing in dtype causes error in mp
                self.tensortype = torch.FloatTensor
            elif "cnn" in self.model_type:      # save image as byte to save space
                # self.dtype = torch.uint8      # somehow passing in dtype causes error in mp
                self.tensortype = torch.ByteTensor

            self.enable_per = False             # prioritized experience replay
            if self.enable_per:
                self.priority_exponent = 0.5    # TODO: rainbow: 0.5, distributed: 0.6
                self.priority_weight = 0.4


class ModelParams(Params):
    def __init__(self):
        super(ModelParams, self).__init__()

        # NOTE: the devices cannot be passed into the processes this way
        # if 'discrete' in self.model_type:
        #     self.model_device = torch.device('cpu')
        # if 'continuous' in self.model_type:
        #     self.model_device = torch.device('cpu')


class AgentParams(Params):
    def __init__(self):
        super(AgentParams, self).__init__()

        if self.agent_type == "dqn":
            # criteria and optimizer
            self.value_criteria = nn.MSELoss()
            # self.optim = torch.optim.RMSprop
            self.optim = torch.optim.Adam
            # generic hyperparameters
            self.num_tasks           = 1    # NOTE: always put main task at last
            self.steps               = 500000 # max #iterations
            self.gamma               = 0.99
            self.clip_grad           = np.inf#40.#100
            self.lr                  = 1e-4#2.5e-4/4.
            self.lr_decay            = False
            self.weight_decay        = 0.
            self.actor_sync_freq     = 100  # sync global_model to actor's local_model every this many steps
            # logger configs
            self.logger_freq         = 15   # log every this many secs
            self.actor_freq          = 250  # push & reset local actor stats every this many actor steps
            self.learner_freq        = 100  # push & reset local learner stats every this many learner steps
            self.evaluator_freq      = 30   # eval every this many secs
            self.evaluator_nepisodes = 2    # eval for this many episodes # TODO:
            self.tester_nepisodes    = 50
            # off-policy specifics
            self.learn_start         = 5000 # start update params after this many steps
            self.batch_size          = 128
            self.target_model_update = 250
            self.nstep               = 5    # NOTE: looks this many steps ahead
            # dqn specifics
            self.enable_double       = False#True#False
            self.eps                 = 0.4
            self.eps_alpha           = 7
        elif self.agent_type == "ddpg":
            # criteria and optimizer
            self.value_criteria = nn.MSELoss()
            self.optim = torch.optim.Adam
            # generic hyperparameters
            self.num_tasks           = 1    # NOTE: always put main task at last
            self.steps               = 500000 # max #iterations
            self.gamma               = 0.99
            self.clip_grad           = 40.
            self.lr                  = 1e-4
            self.lr_decay            = False
            self.weight_decay        = 0.
            self.actor_sync_freq     = 400  # sync global_model to actor's local_model every this many steps
            # logger configs
            self.logger_freq         = 15   # log every this many secs
            self.actor_freq          = 2500 # push & reset local actor stats every this many actor steps
            self.learner_freq        = 1000 # push & reset local learner stats every this many learner steps
            self.evaluator_freq      = 60   # eval every this many secs
            self.evaluator_nepisodes = 2    # eval for this many episodes # TODO:
            self.tester_nepisodes    = 50
            # off-policy specifics
            self.learn_start         = 250  # start update params after this many steps
            self.batch_size          = 64
            self.target_model_update = 1e-3
            self.nstep               = 5    # NOTE: looks this many steps ahead
            # ddpg specifics
            self.random_process      = OrnsteinUhlenbeckProcess


class Options(Params):
    env_params = EnvParams()
    memory_params = MemoryParams()
    model_params = ModelParams()
    agent_params = AgentParams()