Python gym.utils.seeding.np_random() Examples

The following are 30 code examples of gym.utils.seeding.np_random(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gym.utils.seeding , or try the search function

Example #1

Source File: blackjack.py From rl_algorithms with MIT License

7 votes

def _step(self, action):
        assert self.action_space.contains(action)
        if action:  # hit: add a card to players hand and return
            self.player.append(draw_card(self.np_random))
            if is_bust(self.player):
                done = True
                reward = -1
            else:
                done = False
                reward = 0
        else:  # stick: play out the dealers hand, and score
            done = True
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.np_random))
            reward = cmp(score(self.player), score(self.dealer))
            if self.natural and is_natural(self.player) and reward == 1:
                reward = 1.5
        return self._get_obs(), reward, done, {}

Example #2

Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License

6 votes

def step(self, action):
        action = action/100.0
        if action > self.wealth:
          action = self.wealth
        if self.wealth < 0.000001:
            done = True
            reward = 0.0
        else:
          if self.rounds == 0:
            done = True
            reward = self.wealth
          else:
            self.rounds = self.rounds - 1
            done = False
            reward = 0.0
            coinflip = flip(self.edge, self.np_random)
            self.roundsElapsed = self.roundsElapsed+1
            if coinflip:
              self.wealth = min(self.maxWealth, self.wealth + action)
              self.maxEverWealth = max(self.wealth, self.maxEverWealth)
              self.wins = self.wins+1
            else:
              self.wealth = self.wealth - action
              self.losses = self.losses+1
        return self._get_obs(), reward, done, {}

Example #3

Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License

6 votes

def step(self, action):
        action = action/100.0 # convert from pennies to dollars
        if action > self.wealth: # treat attempts to bet more than possess as == betting everything
          action = self.wealth
        if self.wealth < 0.000001:
            done = True
            reward = 0.0
        else:
          if self.rounds == 0:
            done = True
            reward = self.wealth
          else:
            self.rounds = self.rounds - 1
            done = False
            reward = 0.0
            coinflip = flip(self.edge, self.np_random)
            if coinflip:
              self.wealth = min(self.maxWealth, self.wealth + action)
            else:
              self.wealth = self.wealth - action
        return self._get_obs(), reward, done, {}

Example #4

Source File: memorize_digits.py From DRL_DeliveryDuel with MIT License

6 votes

def step(self, action):
        reward = -1
        done = False
        self.step_n += 1
        if self.digit==-1:
            pass
        else:
            if self.digit==action:
                reward = +1
            done = self.step_n > 20 and 0==self.np_random.randint(low=0, high=5)
        self.digit = self.np_random.randint(low=0, high=10)
        obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 )
        obs[:,:,:] = self.color_bg
        digit_img = np.zeros( (6,6,3), dtype=np.uint8 )
        digit_img[:] = self.color_bg
        xxx = self.bogus_mnist[self.digit]==42
        digit_img[xxx] = self.color_digit
        obs[self.digit_y-3:self.digit_y+3, self.digit_x-3:self.digit_x+3] = digit_img
        self.last_obs = obs
        return obs, reward, done, {}

Example #5

Source File: tic_tac_toe_env.py From BERT with Apache License 2.0

6 votes

def __init__(self, strict=False):
    self.strict = strict

    # What about metadata and spec?
    self.reward_range = (-1.0, 1.0)

    # Action space -- 9 positions that we can chose to mark.
    self.action_space = spaces.Discrete(9)

    # Observation space -- this hopefully does what we need.
    self.observation_space = spaces.Box(
        low=-1, high=1, shape=(3, 3), dtype=np.int64)

    # Set the seed.
    self.np_random = None
    self.seed()

    # Start the game.
    self.board_state = None
    self.done = False
    self.reset()

Example #6

Source File: blackjack.py From DRL_DeliveryDuel with MIT License

6 votes

def step(self, action):
        assert self.action_space.contains(action)
        if action:  # hit: add a card to players hand and return
            self.player.append(draw_card(self.np_random))
            if is_bust(self.player):
                done = True
                reward = -1
            else:
                done = False
                reward = 0
        else:  # stick: play out the dealers hand, and score
            done = True
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.np_random))
            reward = cmp(score(self.player), score(self.dealer))
            if self.natural and is_natural(self.player) and reward == 1:
                reward = 1.5
        return self._get_obs(), reward, done, {}

Example #7

Source File: block_pushing.py From c-swm with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #8

Source File: hotter_colder.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #9

Source File: hotter_colder.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        self.number = self.np_random.uniform(-self.range, self.range)
        self.guess_count = 0
        self.observation = 0
        return self.observation

Example #10

Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License

5 votes

def flip(edge, np_random):
    return np_random.uniform() < edge

Example #11

Source File: atari_env.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        self.ale.loadROM(self.game_path)
        return [seed1, seed2]

Example #12

Source File: blackjack.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #13

Source File: blackjack.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        self.dealer = draw_hand(self.np_random)
        self.player = draw_hand(self.np_random)
        return self._get_obs()

Example #14

Source File: roulette.py From DRL_DeliveryDuel with MIT License

5 votes

def step(self, action):
        assert self.action_space.contains(action)
        if action == self.n - 1:
            # observation, reward, done, info
            return 0, 0, True, {}

        # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
        val = self.np_random.randint(0, self.n - 1)
        if val == action == 0:
            reward = self.n - 2.0
        elif val != 0 and action != 0 and val % 2 == action % 2:
            reward = 1.0
        else:
            reward = -1.0
        return 0, reward, False, {}

Example #15

Source File: block_pushing.py From c-swm with MIT License

5 votes

def __init__(self, width=5, height=5, render_type='cubes', num_objects=5,
                 seed=None):
        self.width = width
        self.height = height
        self.render_type = render_type

        self.num_objects = num_objects
        self.num_actions = 4 * self.num_objects  # Move NESW

        self.colors = utils.get_colors(num_colors=max(9, self.num_objects))

        self.np_random = None
        self.game = None
        self.target = None

        # Initialize to pos outside of env for easier collision resolution.
        self.objects = [[-1, -1] for _ in range(self.num_objects)]

        # If True, then check for collisions and don't allow two
        #   objects to occupy the same position.
        self.collisions = True

        self.action_space = spaces.Discrete(self.num_actions)
        self.observation_space = spaces.Box(
            low=0, high=1,
            shape=(3, self.width, self.height),
            dtype=np.float32
        )

        self.seed(seed)
        self.reset()

Example #16

Source File: quadrotor2d_slungload.py From reinmav-gym with BSD 3-Clause "New" or "Revised" License

5 votes

def reset(self):
		print("reset")
		self.state = np.array(self.np_random.uniform(low=-1.0, high=1.0, size=(9,)))
		return np.array(self.state)

Example #17

Source File: quadrotor3d_slungload.py From reinmav-gym with BSD 3-Clause "New" or "Revised" License

5 votes

def reset(self):
		print("reset")
		self.state = np.array(self.np_random.uniform(low=-1.0, high=1.0, size=(16,)))
		return np.array(self.state)

Example #18

Source File: atari_env.py From DRL_DeliveryDuel with MIT License

5 votes

def step(self, a):
        reward = 0.0
        action = self._action_set[a]

        if isinstance(self.frameskip, int):
            num_steps = self.frameskip
        else:
            num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
        for _ in range(num_steps):
            reward += self.ale.act(action)
        ob = self._get_obs()

        return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}

Example #19

Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #20

Source File: acrobot.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
        return self._get_ob()

Example #21

Source File: acrobot.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #22

Source File: cartpole.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
        self.steps_beyond_done = None
        return np.array(self.state)

Example #23

Source File: cartpole.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #24

Source File: continuous_mountain_car.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state)

#    def get_state(self):
#        return self.state

Example #25

Source File: mountain_car.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state)

Example #26

Source File: mountain_car.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #27

Source File: pendulum.py From DRL_DeliveryDuel with MIT License

5 votes

def reset(self):
        high = np.array([np.pi, 1])
        self.state = self.np_random.uniform(low=-high, high=high)
        self.last_u = None
        return self._get_obs()

Example #28

Source File: pendulum.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

Example #29

Source File: bipedal_walker.py From DRL_DeliveryDuel with MIT License

5 votes

def _generate_clouds(self):
        # Sorry for the clouds, couldn't resist
        self.cloud_poly   = []
        for i in range(TERRAIN_LENGTH//20):
            x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
            y = VIEWPORT_H/SCALE*3/4
            poly = [
                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
                for a in range(5) ]
            x1 = min( [p[0] for p in poly] )
            x2 = max( [p[0] for p in poly] )
            self.cloud_poly.append( (poly,x1,x2) )

Example #30

Source File: car_racing.py From DRL_DeliveryDuel with MIT License

5 votes

def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]