Python chainer.optimizers.RMSpropGraves() Examples

The following are 8 code examples of chainer.optimizers.RMSpropGraves(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer.optimizers , or try the search function .
Example #1
Source File: dqn_agent.py    From DQN-chainer with MIT License 6 votes vote down vote up
def __init__(self, n_history, n_act):
        print("Initializing DQN...")
        self.step = 0  # number of steps that DQN is updated
        self.n_act = n_act
        self.n_history = n_history  # Number of obervations used to construct the single state

        print("Model Building")
        self.model = ActionValue(n_history, n_act).to_gpu()
        self.model_target = copy.deepcopy(self.model)

        print("Initizlizing Optimizer")
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.01)
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        hs = self.n_history
        ims = self.img_size
        self.replay_buffer = [np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8),
                  np.zeros(self.data_size, dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.float32),
                  np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.bool)] 
Example #2
Source File: dqn_agent_cpu.py    From DQN-chainer with MIT License 6 votes vote down vote up
def __init__(self, n_history, n_act):
        print("Initializing DQN...")
        self.step = 0  # number of steps that DQN is updated
        self.n_act = n_act
        self.n_history = n_history  # Number of obervations used to construct the single state

        print("Model Building")
        self.model = ActionValue(n_history, n_act)
        self.model_target = copy.deepcopy(self.model)

        print("Initizlizing Optimizer")
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.01)
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        hs = self.n_history
        ims = self.img_size
        self.replay_buffer = [np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8),
                  np.zeros(self.data_size, dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.float32),
                  np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.bool)] 
Example #3
Source File: test_pretrained_models.py    From chainerrl with MIT License 5 votes vote down vote up
def _test_load_dqn(self, gpu):
        q_func = links.Sequence(
            links.NatureDQNHead(),
            L.Linear(512, 4),
            DiscreteActionValue)

        opt = optimizers.RMSpropGraves(
            lr=2.5e-4, alpha=0.95, momentum=0.0, eps=1e-2)
        opt.setup(q_func)

        rbuf = replay_buffer.ReplayBuffer(100)

        explorer = explorers.LinearDecayEpsilonGreedy(
            start_epsilon=1.0, end_epsilon=0.1,
            decay_steps=10 ** 6,
            random_action_func=lambda: np.random.randint(4))

        agent = agents.DQN(q_func, opt, rbuf, gpu=gpu, gamma=0.99,
                           explorer=explorer, replay_start_size=50,
                           target_update_interval=10 ** 4,
                           clip_delta=True,
                           update_interval=4,
                           batch_accumulator='sum',
                           phi=lambda x: x)

        model, exists = download_model("DQN", "BreakoutNoFrameskip-v4",
                                       model_type=self.pretrained_type)
        agent.load(model)
        if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'):
            assert exists 
Example #4
Source File: test_optimizers_by_linear_model.py    From chainer with MIT License 5 votes vote down vote up
def create(self):
        return optimizers.RMSpropGraves(0.1) 
Example #5
Source File: dqn_agent_nips.py    From DQN-chainer with MIT License 5 votes vote down vote up
def __init__(self, enable_controller=[0, 3, 4]):
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"

        print "Initializing DQN..."
#	Initialization for Chainer 1.1.0 or older.
#        print "CUDA init"
#        cuda.init()

        print "Model Building"
        self.model = FunctionSet(
            l1=F.Convolution2D(4, 16, ksize=8, stride=4, wscale=np.sqrt(2)),
            l2=F.Convolution2D(16, 32, ksize=4, stride=2, wscale=np.sqrt(2)),
            l3=F.Linear(2592, 256),
            q_value=F.Linear(256, self.num_of_actions,
                             initialW=np.zeros((self.num_of_actions, 256),
                                               dtype=np.float32))
        ).to_gpu()

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
                  np.zeros(self.data_size, dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.int8),
                  np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.bool)] 
Example #6
Source File: dqn_agent_nature.py    From DQN-chainer with MIT License 5 votes vote down vote up
def __init__(self, enable_controller=[0, 3, 4]):
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"

        print "Initializing DQN..."
#	Initialization of Chainer 1.1.0 or older.
#        print "CUDA init"
#        cuda.init()

        print "Model Building"
        self.model = FunctionSet(
            l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
            l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
            l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
            l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
            q_value=F.Linear(512, self.num_of_actions,
                             initialW=np.zeros((self.num_of_actions, 512),
                                               dtype=np.float32))
        ).to_gpu()

        self.model_target = copy.deepcopy(self.model)

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
                  np.zeros(self.data_size, dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.int8),
                  np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.bool)] 
Example #7
Source File: dern.py    From der-network with MIT License 5 votes vote down vote up
def setup_optimizer(self):
        optimizer = optimizers.RMSpropGraves(
            lr=self.args.start_lr, alpha=0.95, momentum=0.9, eps=1e-08)
        optimizer.setup(self)
        optimizer.add_hook(chainer.optimizer.GradientClipping(self.args.grad_clip))
        return optimizer 
Example #8
Source File: q_net.py    From deel with MIT License 5 votes vote down vote up
def __init__(self, use_gpu, enable_controller, dim):
		self.use_gpu = use_gpu
		self.num_of_actions = len(enable_controller)
		self.enable_controller = enable_controller
		self.dim = dim

		print("Initializing Q-Network...")

		hidden_dim = 256
		self.model = FunctionSet(
			l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)),
			q_value=F.Linear(hidden_dim, self.num_of_actions,
							 initialW=np.zeros((self.num_of_actions, hidden_dim),
											   dtype=np.float32))
		)
		if self.use_gpu >= 0:
			self.model.to_gpu()

		self.model_target = copy.deepcopy(self.model)

		self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
		self.optimizer.setup(self.model.collect_parameters())

		# History Data :  D=[s, a, r, s_dash, end_episode_flag]
		self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
				  np.zeros(self.data_size, dtype=np.uint8),
				  np.zeros((self.data_size, 1), dtype=np.int8),
				  np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
				  np.zeros((self.data_size, 1), dtype=np.bool)]