Python gym.spec() Examples

The following are 30 code examples of gym.spec(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gym , or try the search function .
Example #1
Source File: wrappers_test.py    From agents with Apache License 2.0 6 votes vote down vote up
def test_batch_env(self):
    """Vary the observation spec and step the environment."""
    obs_spec = collections.OrderedDict({
        'obs1': array_spec.ArraySpec((1,), np.int32),
        'obs2': array_spec.ArraySpec((2,), np.int32),
    })

    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    # Generate a randomy py environment with batch size.
    batch_size = 4
    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec, batch_size=batch_size)

    env = wrappers.FlattenObservationsWrapper(env)
    time_step = env.step(
        array_spec.sample_bounded_spec(action_spec, np.random.RandomState()))

    expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys())
    self.assertEqual(time_step.observation.shape,
                     (batch_size, expected_shape[0]))
    self.assertEqual(
        env.observation_spec(),
        array_spec.ArraySpec(
            shape=expected_shape, dtype=np.int32, name='packed_observations')) 
Example #2
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 6 votes vote down vote up
def test_wrapped_cartpole_specs(self):
    # Note we use spec.make on gym envs to avoid getting a TimeLimit wrapper on
    # the environment.
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)

    action_spec = env.action_spec()
    self.assertEqual((), action_spec.shape)
    self.assertEqual(0, action_spec.minimum)
    self.assertEqual(1, action_spec.maximum)

    observation_spec = env.observation_spec()
    self.assertEqual((4,), observation_spec.shape)
    self.assertEqual(np.float32, observation_spec.dtype)
    high = np.array([
        4.8,
        np.finfo(np.float32).max, 2 / 15.0 * math.pi,
        np.finfo(np.float32).max
    ])
    np.testing.assert_array_almost_equal(-high, observation_spec.minimum)
    np.testing.assert_array_almost_equal(high, observation_spec.maximum) 
Example #3
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 6 votes vote down vote up
def test_spec_from_gym_space_when_simplify_box_bounds_false(self):
    # testing on gym.spaces.Dict which makes recursive calls to
    # _spec_from_gym_space
    box_space = gym.spaces.Box(-1.0, 1.0, (2,))
    dict_space = gym.spaces.Dict({'box1': box_space, 'box2': box_space})
    spec = gym_wrapper.spec_from_gym_space(
        dict_space, simplify_box_bounds=False)

    self.assertEqual((2,), spec['box1'].shape)
    self.assertEqual((2,), spec['box2'].shape)
    self.assertEqual(np.float32, spec['box1'].dtype)
    self.assertEqual(np.float32, spec['box2'].dtype)
    self.assertEqual('box1', spec['box1'].name)
    self.assertEqual('box2', spec['box2'].name)
    np.testing.assert_array_equal(np.array([-1, -1], dtype=np.int),
                                  spec['box1'].minimum)
    np.testing.assert_array_equal(np.array([1, 1], dtype=np.int),
                                  spec['box1'].maximum)
    np.testing.assert_array_equal(np.array([-1, -1], dtype=np.int),
                                  spec['box2'].minimum)
    np.testing.assert_array_equal(np.array([1, 1], dtype=np.int),
                                  spec['box2'].maximum) 
Example #4
Source File: wrappers_test.py    From agents with Apache License 2.0 6 votes vote down vote up
def test_limit_duration_wrapped_env_forwards_calls(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    env = wrappers.TimeLimit(env, 10)

    action_spec = env.action_spec()
    self.assertEqual((), action_spec.shape)
    self.assertEqual(0, action_spec.minimum)
    self.assertEqual(1, action_spec.maximum)

    observation_spec = env.observation_spec()
    self.assertEqual((4,), observation_spec.shape)
    high = np.array([
        4.8,
        np.finfo(np.float32).max, 2 / 15.0 * math.pi,
        np.finfo(np.float32).max
    ])
    np.testing.assert_array_almost_equal(-high, observation_spec.minimum)
    np.testing.assert_array_almost_equal(high, observation_spec.maximum) 
Example #5
Source File: wrappers_test.py    From agents with Apache License 2.0 6 votes vote down vote up
def test_with_varying_observation_specs(
      self, observation_keys, observation_shapes, observation_dtypes):
    """Vary the observation spec and step the environment."""
    obs_spec = collections.OrderedDict()
    for idx, key in enumerate(observation_keys):
      obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx],
                                           observation_dtypes)
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec)
    env = wrappers.FlattenObservationsWrapper(env)
    time_step = env.step(
        array_spec.sample_bounded_spec(action_spec, np.random.RandomState()))
    # Check that all observations returned from environment is packed into one
    # dimension.
    expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys())
    self.assertEqual(time_step.observation.shape, expected_shape)
    self.assertEqual(
        env.observation_spec(),
        array_spec.ArraySpec(
            shape=expected_shape,
            dtype=observation_dtypes,
            name='packed_observations')) 
Example #6
Source File: wrappers_test.py    From agents with Apache License 2.0 6 votes vote down vote up
def test_with_varying_observation_specs(
      self, observation_keys, observation_shapes, observation_dtypes):
    """Vary the observation spec and step the environment."""
    obs_spec = collections.OrderedDict()
    for idx, key in enumerate(observation_keys):
      obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx],
                                           observation_dtypes)
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec)
    env = MockGoalReplayEnvWrapper(env)
    random_action = array_spec.sample_bounded_spec(action_spec,
                                                   np.random.RandomState())
    time_step = env.step(random_action)
    self.assertIsInstance(time_step.observation, dict)
    self.assertEqual(time_step.observation.keys(),
                     env.observation_spec().keys())
    time_step = env.reset()
    self.assertIsInstance(time_step.observation, dict)
    self.assertEqual(time_step.observation.keys(),
                     env.observation_spec().keys()) 
Example #7
Source File: multiprocessing_env.py    From universe with MIT License 6 votes vote down vote up
def configure(self, n=1, pool_size=None, episode_limit=None):
        self.n = n
        self.envs = [self.spec.make() for _ in range(self.n)]

        if pool_size is None:
            pool_size = min(len(self.envs), multiprocessing.cpu_count() - 1)
            pool_size = max(1, pool_size)

        self.worker_n = []
        m = int((self.n + pool_size - 1) / pool_size)
        for i in range(0, self.n, m):
            envs = self.envs[i:i+m]
            self.worker_n.append(Worker(envs, i))

        if episode_limit is not None:
            self._episode_id.episode_limit = episode_limit 
Example #8
Source File: gym_core.py    From universe with MIT License 6 votes vote down vote up
def __init__(self, env, gym_core_id=None):
        super(GymCoreAction, self).__init__(env)

        if gym_core_id is None:
            # self.spec is None while inside of the make, so we need
            # to pass gym_core_id in explicitly there. This case will
            # be hit when instantiating by hand.
            gym_core_id = self.spec._kwargs['gym_core_id']

        spec = gym.spec(gym_core_id)
        raw_action_space = gym_core_action_space(gym_core_id)

        self._actions = raw_action_space.actions
        self.action_space = gym_spaces.Discrete(len(self._actions))

        if spec._entry_point.startswith('gym.envs.atari:'):
            self.key_state = translator.AtariKeyState(gym.make(gym_core_id))
        else:
            self.key_state = None 
Example #9
Source File: classic_atari.py    From vel with MIT License 5 votes vote down vote up
def specification(self) -> EnvSpec:
        """ Return environment specification """
        return gym.spec(self.envname) 
Example #10
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_wrapped_cartpole_reset(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)

    first_time_step = env.reset()
    self.assertTrue(first_time_step.is_first())
    self.assertEqual(0.0, first_time_step.reward)
    self.assertEqual(1.0, first_time_step.discount)
    self.assertEqual((4,), first_time_step.observation.shape)
    self.assertEqual(np.float32, first_time_step.observation.dtype) 
Example #11
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_wrapped_cartpole_final(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    time_step = env.reset()

    while not time_step.is_last():
      time_step = env.step(np.array(1, dtype=np.int32))

    self.assertTrue(time_step.is_last())
    self.assertNotEqual(None, time_step.reward)
    self.assertEqual(0.0, time_step.discount)
    self.assertEqual((4,), time_step.observation.shape) 
Example #12
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_get_info(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    self.assertEqual(None, env.get_info())
    env.reset()
    self.assertEqual(None, env.get_info())
    env.step(np.array(0, dtype=np.int32))
    self.assertEqual({}, env.get_info()) 
Example #13
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_automatic_reset_after_create(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)

    first_time_step = env.step(0)
    self.assertTrue(first_time_step.is_first()) 
Example #14
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_automatic_reset_after_done(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    time_step = env.reset()

    while not time_step.is_last():
      time_step = env.step(np.array(1, dtype=np.int32))

    self.assertTrue(time_step.is_last())
    first_time_step = env.step(0)
    self.assertTrue(first_time_step.is_first()) 
Example #15
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_automatic_reset_after_done_not_using_reset_directly(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    time_step = env.step(1)

    while not time_step.is_last():
      time_step = env.step(np.array(1, dtype=np.int32))

    self.assertTrue(time_step.is_last())
    first_time_step = env.step(0)
    self.assertTrue(first_time_step.is_first()) 
Example #16
Source File: multiprocessing_env.py    From universe with MIT License 5 votes vote down vote up
def __init__(self, env_id):
        self.worker_n = None

        # Pull the relevant info from a transient env instance
        self.spec = gym.spec(env_id)
        env = self.spec.make()

        current_metadata = self.metadata
        self.metadata = env.metadata.copy()
        self.metadata.update(current_metadata)

        self.action_space = env.action_space
        self.observation_space = env.observation_space
        self.reward_range = env.reward_range 
Example #17
Source File: gym_wrapper_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_obs_dtype(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    time_step = env.reset()
    self.assertEqual(env.observation_spec().dtype, time_step.observation.dtype) 
Example #18
Source File: wrappers_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_default_batch_properties(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    self.assertFalse(env.batched)
    self.assertEqual(env.batch_size, None)
    wrap_env = wrappers.PyEnvironmentBaseWrapper(env)
    self.assertEqual(wrap_env.batched, env.batched)
    self.assertEqual(wrap_env.batch_size, env.batch_size) 
Example #19
Source File: mujoco.py    From vel with MIT License 5 votes vote down vote up
def specification(self) -> EnvSpec:
        """ Return environment specification """
        return gym.spec(self.envname) 
Example #20
Source File: classic_atari.py    From vel with MIT License 5 votes vote down vote up
def env_maker(environment_id):
    """ Create a relatively raw atari environment """
    env = gym.make(environment_id)
    assert 'NoFrameskip' in env.spec.id

    # Wait for between 1 and 30 rounds doing nothing on start
    env = NoopResetEnv(env, noop_max=30)

    # Do the same action for k steps. Return max of last 2 frames. Return sum of rewards
    env = MaxAndSkipEnv(env, skip=4)

    return env 
Example #21
Source File: wrappers_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_with_varying_observation_filters(self, observations_to_keep):
    """Vary the observations to save from the environment."""
    obs_spec = collections.OrderedDict({
        'obs1': array_spec.ArraySpec((1,), np.int32),
        'obs2': array_spec.ArraySpec((2,), np.int32),
        'obs3': array_spec.ArraySpec((3,), np.int32)
    })

    observations_to_keep = np.array([observations_to_keep]).flatten()
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec)
    # Create the wrapper with list of observations to keep before packing it
    # into one dimension.
    env = wrappers.FlattenObservationsWrapper(
        env, observations_whitelist=observations_to_keep)
    time_step = env.step(
        array_spec.sample_bounded_spec(action_spec, np.random.RandomState()))
    # The expected shape is the sum of observation lengths in the observation
    # spec that has been filtered by the observations_to_keep list.
    expected_shape = self._get_expected_shape(obs_spec, observations_to_keep)
    # Test the expected shape of observations returned from stepping the
    # environment and additionally, check the environment spec.
    self.assertEqual(time_step.observation.shape, expected_shape)
    self.assertEqual(
        env.observation_spec(),
        array_spec.ArraySpec(
            shape=expected_shape, dtype=np.int32, name='packed_observations')) 
Example #22
Source File: envs.py    From feudal_networks with MIT License 5 votes vote down vote up
def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)

    if spec.tags.get('feudal', False):
        return create_feudal_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id) 
Example #23
Source File: wrappers_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def _get_expected_shape(self, observation, observations_to_keep):
    """Gets the expected shape of a flattened observation nest."""
    # The expected shape is the sum of observation lengths in the observation
    # spec.  For a multi-dimensional observation, it is flattened, thus the
    # length is the product of its shape, i.e. Two arrays ([3, 3], [2, 3])
    # result in a len-9 and len-6 observation, with total length of 15.
    expected_shape = 0
    for obs in observations_to_keep:
      expected_shape += np.prod(observation[obs].shape)
    return (expected_shape,) 
Example #24
Source File: wrappers_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_observation_spec_changed(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    obs_shape = env.observation_spec().shape

    history_env = wrappers.HistoryWrapper(env, 3)
    self.assertEqual((3,) + obs_shape, history_env.observation_spec().shape) 
Example #25
Source File: wrappers_test.py    From agents with Apache License 2.0 5 votes vote down vote up
def test_observation_spec_changed_with_action(self):
    cartpole_env = gym.spec('CartPole-v1').make()
    env = gym_wrapper.GymWrapper(cartpole_env)
    obs_shape = env.observation_spec().shape
    action_shape = env.action_spec().shape

    history_env = wrappers.HistoryWrapper(env, 3, include_actions=True)
    self.assertEqual((3,) + obs_shape,
                     history_env.observation_spec()['observation'].shape)
    self.assertEqual((3,) + action_shape,
                     history_env.observation_spec()['action'].shape) 
Example #26
Source File: envs.py    From FeatureControlHRL with MIT License 5 votes vote down vote up
def create_env(env_id, client_id, remotes, **kwargs):
    spec = gym.spec(env_id)

    if spec.tags.get('flashgames', False):
        return create_flash_env(env_id, client_id, remotes, **kwargs)
    elif spec.tags.get('atari', False) and spec.tags.get('vnc', False):
        return create_vncatari_env(env_id, client_id, remotes, **kwargs)
    else:
        # Assume atari.
        assert "." not in env_id  # universe environments have dots in names.
        return create_atari_env(env_id) 
Example #27
Source File: classic_control.py    From vel with MIT License 5 votes vote down vote up
def specification(self) -> EnvSpec:
        """ Return environment specification """
        return gym.spec(self.envname) 
Example #28
Source File: suite_atari.py    From agents with Apache License 2.0 5 votes vote down vote up
def load(
    environment_name: Text,
    discount: types.Int = 1.0,
    max_episode_steps: Optional[types.Int] = None,
    gym_env_wrappers: Sequence[
        types.GymEnvWrapper] = DEFAULT_ATARI_GYM_WRAPPERS,
    env_wrappers: Sequence[types.PyEnvWrapper] = (),
    spec_dtype_map: Optional[Dict[gym.Space, np.dtype]] = None
) -> py_environment.PyEnvironment:
  """Loads the selected environment and wraps it with the specified wrappers."""
  if spec_dtype_map is None:
    spec_dtype_map = {gym.spaces.Box: np.uint8}

  gym_spec = gym.spec(environment_name)
  gym_env = gym_spec.make()

  if max_episode_steps is None and gym_spec.max_episode_steps is not None:
    max_episode_steps = gym_spec.max_episode_steps

  return suite_gym.wrap_env(
      gym_env,
      discount=discount,
      max_episode_steps=max_episode_steps,
      gym_env_wrappers=gym_env_wrappers,
      time_limit_wrapper=atari_wrappers.AtariTimeLimit,
      env_wrappers=env_wrappers,
      spec_dtype_map=spec_dtype_map,
      auto_reset=False) 
Example #29
Source File: vnc_core_env.py    From universe with MIT License 5 votes vote down vote up
def __init__(self, gym_core_id, fps=60, vnc_pixels=True):
        super(GymCoreSyncEnv, self).__init__(gym_core_id, fps=fps)
        # Metadata has already been cloned
        self.metadata['semantics.async'] = False

        self.gym_core_id = gym_core_id
        self.vnc_pixels = vnc_pixels

        if not vnc_pixels:
            self._core_env = gym.spec(gym_core_id).make()
        else:
            self._core_env = None 
Example #30
Source File: __init__.py    From universe with MIT License 5 votes vote down vote up
def WrappedGymCoreEnv(gym_core_id, fps=None, rewarder_observation=False):
    # Don't need to store the ID on the instance; it'll be retrieved
    # directly from the spec
    env = wrap(envs.VNCEnv(fps=fps))
    if rewarder_observation:
        env = GymCoreObservation(env, gym_core_id=gym_core_id)
    return env