Python gym.utils.seeding.np_random() Examples

The following are 30 code examples of gym.utils.seeding.np_random(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gym.utils.seeding , or try the search function .
Example #1
Source File: blackjack.py    From rl_algorithms with MIT License 7 votes vote down vote up
def _step(self, action):
        assert self.action_space.contains(action)
        if action:  # hit: add a card to players hand and return
            self.player.append(draw_card(self.np_random))
            if is_bust(self.player):
                done = True
                reward = -1
            else:
                done = False
                reward = 0
        else:  # stick: play out the dealers hand, and score
            done = True
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.np_random))
            reward = cmp(score(self.player), score(self.dealer))
            if self.natural and is_natural(self.player) and reward == 1:
                reward = 1.5
        return self._get_obs(), reward, done, {} 
Example #2
Source File: kellycoinflip.py    From DRL_DeliveryDuel with MIT License 6 votes vote down vote up
def step(self, action):
        action = action/100.0
        if action > self.wealth:
          action = self.wealth
        if self.wealth < 0.000001:
            done = True
            reward = 0.0
        else:
          if self.rounds == 0:
            done = True
            reward = self.wealth
          else:
            self.rounds = self.rounds - 1
            done = False
            reward = 0.0
            coinflip = flip(self.edge, self.np_random)
            self.roundsElapsed = self.roundsElapsed+1
            if coinflip:
              self.wealth = min(self.maxWealth, self.wealth + action)
              self.maxEverWealth = max(self.wealth, self.maxEverWealth)
              self.wins = self.wins+1
            else:
              self.wealth = self.wealth - action
              self.losses = self.losses+1
        return self._get_obs(), reward, done, {} 
Example #3
Source File: kellycoinflip.py    From DRL_DeliveryDuel with MIT License 6 votes vote down vote up
def step(self, action):
        action = action/100.0 # convert from pennies to dollars
        if action > self.wealth: # treat attempts to bet more than possess as == betting everything
          action = self.wealth
        if self.wealth < 0.000001:
            done = True
            reward = 0.0
        else:
          if self.rounds == 0:
            done = True
            reward = self.wealth
          else:
            self.rounds = self.rounds - 1
            done = False
            reward = 0.0
            coinflip = flip(self.edge, self.np_random)
            if coinflip:
              self.wealth = min(self.maxWealth, self.wealth + action)
            else:
              self.wealth = self.wealth - action
        return self._get_obs(), reward, done, {} 
Example #4
Source File: memorize_digits.py    From DRL_DeliveryDuel with MIT License 6 votes vote down vote up
def step(self, action):
        reward = -1
        done = False
        self.step_n += 1
        if self.digit==-1:
            pass
        else:
            if self.digit==action:
                reward = +1
            done = self.step_n > 20 and 0==self.np_random.randint(low=0, high=5)
        self.digit = self.np_random.randint(low=0, high=10)
        obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 )
        obs[:,:,:] = self.color_bg
        digit_img = np.zeros( (6,6,3), dtype=np.uint8 )
        digit_img[:] = self.color_bg
        xxx = self.bogus_mnist[self.digit]==42
        digit_img[xxx] = self.color_digit
        obs[self.digit_y-3:self.digit_y+3, self.digit_x-3:self.digit_x+3] = digit_img
        self.last_obs = obs
        return obs, reward, done, {} 
Example #5
Source File: tic_tac_toe_env.py    From BERT with Apache License 2.0 6 votes vote down vote up
def __init__(self, strict=False):
    self.strict = strict

    # What about metadata and spec?
    self.reward_range = (-1.0, 1.0)

    # Action space -- 9 positions that we can chose to mark.
    self.action_space = spaces.Discrete(9)

    # Observation space -- this hopefully does what we need.
    self.observation_space = spaces.Box(
        low=-1, high=1, shape=(3, 3), dtype=np.int64)

    # Set the seed.
    self.np_random = None
    self.seed()

    # Start the game.
    self.board_state = None
    self.done = False
    self.reset() 
Example #6
Source File: blackjack.py    From DRL_DeliveryDuel with MIT License 6 votes vote down vote up
def step(self, action):
        assert self.action_space.contains(action)
        if action:  # hit: add a card to players hand and return
            self.player.append(draw_card(self.np_random))
            if is_bust(self.player):
                done = True
                reward = -1
            else:
                done = False
                reward = 0
        else:  # stick: play out the dealers hand, and score
            done = True
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.np_random))
            reward = cmp(score(self.player), score(self.dealer))
            if self.natural and is_natural(self.player) and reward == 1:
                reward = 1.5
        return self._get_obs(), reward, done, {} 
Example #7
Source File: block_pushing.py    From c-swm with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #8
Source File: hotter_colder.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #9
Source File: hotter_colder.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        self.number = self.np_random.uniform(-self.range, self.range)
        self.guess_count = 0
        self.observation = 0
        return self.observation 
Example #10
Source File: kellycoinflip.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def flip(edge, np_random):
    return np_random.uniform() < edge 
Example #11
Source File: atari_env.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        self.ale.loadROM(self.game_path)
        return [seed1, seed2] 
Example #12
Source File: blackjack.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #13
Source File: blackjack.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        self.dealer = draw_hand(self.np_random)
        self.player = draw_hand(self.np_random)
        return self._get_obs() 
Example #14
Source File: roulette.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def step(self, action):
        assert self.action_space.contains(action)
        if action == self.n - 1:
            # observation, reward, done, info
            return 0, 0, True, {}

        # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
        val = self.np_random.randint(0, self.n - 1)
        if val == action == 0:
            reward = self.n - 2.0
        elif val != 0 and action != 0 and val % 2 == action % 2:
            reward = 1.0
        else:
            reward = -1.0
        return 0, reward, False, {} 
Example #15
Source File: block_pushing.py    From c-swm with MIT License 5 votes vote down vote up
def __init__(self, width=5, height=5, render_type='cubes', num_objects=5,
                 seed=None):
        self.width = width
        self.height = height
        self.render_type = render_type

        self.num_objects = num_objects
        self.num_actions = 4 * self.num_objects  # Move NESW

        self.colors = utils.get_colors(num_colors=max(9, self.num_objects))

        self.np_random = None
        self.game = None
        self.target = None

        # Initialize to pos outside of env for easier collision resolution.
        self.objects = [[-1, -1] for _ in range(self.num_objects)]

        # If True, then check for collisions and don't allow two
        #   objects to occupy the same position.
        self.collisions = True

        self.action_space = spaces.Discrete(self.num_actions)
        self.observation_space = spaces.Box(
            low=0, high=1,
            shape=(3, self.width, self.height),
            dtype=np.float32
        )

        self.seed(seed)
        self.reset() 
Example #16
Source File: quadrotor2d_slungload.py    From reinmav-gym with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def reset(self):
		print("reset")
		self.state = np.array(self.np_random.uniform(low=-1.0, high=1.0, size=(9,)))
		return np.array(self.state) 
Example #17
Source File: quadrotor3d_slungload.py    From reinmav-gym with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def reset(self):
		print("reset")
		self.state = np.array(self.np_random.uniform(low=-1.0, high=1.0, size=(16,)))
		return np.array(self.state) 
Example #18
Source File: atari_env.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def step(self, a):
        reward = 0.0
        action = self._action_set[a]

        if isinstance(self.frameskip, int):
            num_steps = self.frameskip
        else:
            num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
        for _ in range(num_steps):
            reward += self.ale.act(action)
        ob = self._get_obs()

        return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()} 
Example #19
Source File: kellycoinflip.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #20
Source File: acrobot.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
        return self._get_ob() 
Example #21
Source File: acrobot.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #22
Source File: cartpole.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
        self.steps_beyond_done = None
        return np.array(self.state) 
Example #23
Source File: cartpole.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #24
Source File: continuous_mountain_car.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state)

#    def get_state(self):
#        return self.state 
Example #25
Source File: mountain_car.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state) 
Example #26
Source File: mountain_car.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #27
Source File: pendulum.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def reset(self):
        high = np.array([np.pi, 1])
        self.state = self.np_random.uniform(low=-high, high=high)
        self.last_u = None
        return self._get_obs() 
Example #28
Source File: pendulum.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed] 
Example #29
Source File: bipedal_walker.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def _generate_clouds(self):
        # Sorry for the clouds, couldn't resist
        self.cloud_poly   = []
        for i in range(TERRAIN_LENGTH//20):
            x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
            y = VIEWPORT_H/SCALE*3/4
            poly = [
                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
                for a in range(5) ]
            x1 = min( [p[0] for p in poly] )
            x2 = max( [p[0] for p in poly] )
            self.cloud_poly.append( (poly,x1,x2) ) 
Example #30
Source File: car_racing.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]