python source code of test_categorical_mlp

garage-master
- src
  - garage
    - replay_buffer
      - replay_buffer.py
      - her_replay_buffer.py
      - __init__.py
      - path_buffer.py
    - plotter
      - plotter.py
      - __init__.py
    - np
      - algos
        rl_algorithm.py
        meta_rl_algorithm.py
        cem.py
        nop.py
        cma_es.py
        __init__.py
      - policies
        fixed_policy.py
        policy.py
        __init__.py
        scripted_policy.py
      - embeddings
        encoder.py
        __init__.py
      - q_functions
        __init__.py
        q_function.py
      - _functions.py
      - exploration_policies
        exploration_policy.py
        epsilon_greedy_policy.py
        add_ornstein_uhlenbeck_noise.py
        __init__.py
        add_gaussian_noise.py
      - baselines
        linear_multi_feature_baseline.py
        linear_feature_baseline.py
        baseline.py
        __init__.py
        zero_baseline.py
      - __init__.py
      - optimizers
        minibatch_dataset.py
        __init__.py
    - tf
      - plotter
        plotter.py
        __init__.py
      - algos
        ddpg.py
        rl2.py
        vpg.py
        td3.py
        npo.py
        ppo.py
        tnpg.py
        trpo.py
        te.py
        rl2ppo.py
        te_npo.py
        reps.py
        _rl2npo.py
        rl2trpo.py
        __init__.py
        dqn.py
        erwr.py
        te_ppo.py
      - policies
        categorical_gru_policy.py
        discrete_qf_derived_policy.py
        gaussian_mlp_policy.py
        policy.py
        categorical_lstm_policy.py
        gaussian_lstm_policy.py
        categorical_cnn_policy.py
        gaussian_gru_policy.py
        task_embedding_policy.py
        __init__.py
        uniform_control_policy.py
        continuous_mlp_policy.py
        categorical_mlp_policy.py
        gaussian_mlp_task_embedding_policy.py
      - distributions
        diagonal_gaussian.py
        bernoulli.py
        distribution.py
        __init__.py
        recurrent_categorical.py
        categorical.py
        recurrent_diagonal_gaussian.py
      - embeddings
        encoder.py
        gaussian_mlp_encoder.py
        __init__.py
      - q_functions
        discrete_mlp_q_function.py
        continuous_mlp_q_function.py
        discrete_cnn_q_function.py
        __init__.py
        continuous_cnn_q_function.py
        q_function.py
      - models
        mlp_model.py
        gaussian_mlp_model.py
        module.py
        gaussian_cnn_model.py
        mlp_dueling_model.py
        sequential.py
        lstm_model.py
        gru_model.py
        normalized_input_mlp_model.py
        cnn_mlp_merge_model.py
        categorical_lstm_model.py
        cnn.py
        model.py
        lstm.py
        categorical_mlp_model.py
        categorical_cnn_model.py
        __init__.py
        cnn_model.py
        parameter.py
        mlp.py
        gru.py
        mlp_merge_model.py
        gaussian_lstm_model.py
        cnn_model_max_pooling.py
        categorical_gru_model.py
        gaussian_gru_model.py
      - misc
        __init__.py
        tensor_utils.py
      - _functions.py
      - baselines
        gaussian_mlp_baseline.py
        continuous_mlp_baseline.py
        __init__.py
        gaussian_cnn_baseline.py
      - __init__.py
      - regressors
        gaussian_cnn_regressor_model.py
        regressor.py
        bernoulli_mlp_regressor.py
        gaussian_mlp_regressor_model.py
        gaussian_mlp_regressor.py
        categorical_mlp_regressor_model.py
        __init__.py
        categorical_mlp_regressor.py
      - optimizers
        conjugate_gradient_optimizer.py
        penalty_lbfgs_optimizer.py
        lbfgs_optimizer.py
        __init__.py
        utils.py
        first_order_optimizer.py
      - samplers
        __init__.py
        worker.py
    - misc
      - __init__.py
      - tensor_utils.py
    - _functions.py
    - _dtypes.py
    - torch
      - algos
        maml.py
        ddpg.py
        bc.py
        maml_ppo.py
        maml_vpg.py
        vpg.py
        pearl.py
        ppo.py
        maml_trpo.py
        mtsac.py
        trpo.py
        sac.py
        __init__.py
      - policies
        tanh_gaussian_mlp_policy.py
        gaussian_mlp_policy.py
        policy.py
        stochastic_policy.py
        context_conditioned_policy.py
        deterministic_mlp_policy.py
        __init__.py
      - value_functions
        gaussian_mlp_value_function.py
        value_function.py
        __init__.py
      - distributions
        tanh_normal.py
        __init__.py
      - embeddings
        mlp_encoder.py
        __init__.py
      - q_functions
        continuous_mlp_q_function.py
        __init__.py
      - _functions.py
      - __init__.py
      - optimizers
        conjugate_gradient_optimizer.py
        optimizer_wrapper.py
        __init__.py
        differentiable_sgd.py
      - modules
        mlp_module.py
        multi_headed_mlp_module.py
        gaussian_mlp_module.py
        __init__.py
    - __init__.py
    - sampler
      - sampler.py
      - local_sampler.py
      - multiprocessing_sampler.py
      - env_update.py
      - vec_worker.py
      - __init__.py
      - worker_factory.py
      - utils.py
      - worker.py
      - default_worker.py
      - ray_sampler.py
    - experiment
      - task_sampler.py
      - local_tf_runner.py
      - deterministic.py
      - local_runner.py
      - experiment.py
      - __init__.py
      - meta_evaluator.py
      - snapshotter.py
    - envs
      - point_env.py
      - dm_control
        dm_control_viewer.py
        __init__.py
        dm_control_env.py
      - mujoco
        half_cheetah_env_meta_base.py
        half_cheetah_dir_env.py
        half_cheetah_vel_env.py
        __init__.py
      - grid_world_env.py
      - step.py
      - garage_env.py
      - normalized_env.py
      - __init__.py
      - task_onehot_wrapper.py
      - multi_env_wrapper.py
      - wrappers
        episodic_life.py
        atari_env.py
        clip_reward.py
        max_and_skip.py
        pixel_observation.py
        stack_frames.py
        noop.py
        grayscale.py
        resize.py
        __init__.py
        fire_reset.py
      - bullet
        __init__.py
        bullet_env.py
      - env_spec.py
- Makefile
- examples
  - step_bullet_kuka_env.py
  - np
    - cem_cartpole.py
    - cma_es_cartpole.py
  - jupyter
    - custom_env.ipynb
  - step_dm_control_env.py
  - sim_policy.py
  - tf
    - ppo_memorize_digits.py
    - rl2_ppo_halfcheetah.py
    - reps_gym_cartpole.py
    - trpo_cartpole.py
    - te_ppo_point.py
    - rl2_ppo_metaworld_ml1_push.py
    - trpo_gym_tf_cartpole.py
    - trpo_cartpole_bullet.py
    - vpg_cartpole.py
    - ppo_pendulum.py
    - trpo_gym_tf_cartpole_pretrained.py
    - multi_env_ppo.py
    - trpo_swimmer_ray_sampler.py
    - her_ddpg_fetchreach.py
    - te_ppo_metaworld_mt10.py
    - trpo_cartpole_recurrent.py
    - dqn_pong.py
    - td3_pendulum.py
    - rl2_ppo_metaworld_ml10.py
    - trpo_cubecrash.py
    - erwr_cartpole.py
    - ddpg_pendulum.py
    - rl2_ppo_metaworld_ml10_meta_test.py
    - te_ppo_metaworld_ml1_push.py
    - rl2_ppo_metaworld_ml45.py
    - multi_env_trpo.py
    - rl2_ppo_halfcheetah_meta_test.py
    - trpo_swimmer.py
    - te_ppo_metaworld_mt50.py
    - rl2_trpo_halfcheetah.py
    - dqn_cartpole.py
    - resume_training.py
  - step_env.py
  - torch
    - bc_point_deterministic_policy.py
    - pearl_half_cheetah_vel.py
    - trpo_pendulum.py
    - maml_ppo_half_cheetah_dir.py
    - sac_half_cheetah_batch.py
    - pearl_metaworld_ml10.py
    - mtsac_metaworld_mt10.py
    - mttrpo_metaworld_mt50.py
    - maml_vpg_half_cheetah_dir.py
    - ppo_pendulum.py
    - maml_trpo_metaworld_ml45.py
    - bc_point.py
    - mtppo_metaworld_mt50.py
    - mttrpo_metaworld_mt10.py
    - maml_trpo_metaworld_ml1_push.py
    - vpg_pendulum.py
    - mtsac_metaworld_mt50.py
    - ddpg_pendulum.py
    - trpo_pendulum_ray_sampler.py
    - pearl_metaworld_ml1_push.py
    - pearl_metaworld_ml45.py
    - mttrpo_metaworld_ml1_push.py
    - maml_trpo_metaworld_ml10.py
    - maml_trpo_half_cheetah_dir.py
    - mtppo_metaworld_mt10.py
    - mtppo_metaworld_ml1_push.py
    - resume_training.py
    - mtsac_metaworld_ml1_pick_place.py
- readthedocs.yml
- .pre-commit-config.yaml
- LICENSE
- CONTRIBUTING.md
- .editorconfig
- CHANGELOG.md
- .pylintrc
- setup.py
- benchmarks
  - src
    - garage_benchmarks
      - parameters.py
      - run_benchmarks.py
      - experiments
        algos
        ppo_garage_pytorch.py
        vpg_garage_pytorch.py
        ddpg_garage_tf.py
        trpo_garage_pytorch.py
        her_garage_tf.py
        trpo_garage_tf.py
        td3_garage_tf.py
        __init__.py
        ppo_garage_tf.py
        vpg_garage_tf.py
        policies
        categorical_gru_policy.py
        gaussian_mlp_policy.py
        categorical_lstm_policy.py
        gaussian_lstm_policy.py
        categorical_cnn_policy.py
        gaussian_gru_policy.py
        __init__.py
        continuous_mlp_policy.py
        categorical_mlp_policy.py
        q_functions
        continuous_mlp_q_function.py
        __init__.py
        baselines
        gaussian_mlp_baseline.py
        continuous_mlp_baseline.py
        __init__.py
        gaussian_cnn_baseline.py
        __init__.py
      - __init__.py
      - benchmark_algos.py
      - benchmark_q_functions.py
      - benchmark_policies.py
      - helper.py
      - benchmark_baselines.py
      - benchmark_auto.py
  - setup.py
  - README.md
- docker
  - entrypoint-headless.sh
  - entrypoint-runtime.sh
  - Dockerfile
- VERSION
- setup.cfg
- .travis.yml
- README.md
- tests
  - fixtures
    - algos
      - dummy_tf_algo.py
      - dummy_algo.py
      - __init__.py
    - policies
      - dummy_recurrent_policy.py
      - dummy_policy.py
      - __init__.py
    - distributions
      - dummy_distribution.py
      - __init__.py
    - q_functions
      - __init__.py
      - simple_q_function.py
    - models
      - simple_cnn_model_with_max_pooling.py
      - simple_gru_model.py
      - simple_gaussian_lstm_model.py
      - simple_cnn_model.py
      - simple_mlp_model.py
      - simple_mlp_merge_model.py
      - simple_gaussian_mlp_model.py
      - simple_lstm_model.py
      - simple_categorical_gru_model.py
      - simple_categorical_lstm_model.py
      - __init__.py
      - simple_categorical_mlp_model.py
      - simple_gaussian_cnn_model.py
      - simple_gaussian_gru_model.py
    - tf
      - algos
        dummy_off_policy_algo.py
      - __init__.py
    - logger.py
    - __init__.py
    - fixtures.py
    - regressors
      - __init__.py
      - simple_mlp_regressor.py
      - simple_gaussian_cnn_regressor.py
      - simple_gaussian_mlp_regressor.py
    - sampler
      - ray_fixtures.py
      - __init__.py
    - experiment
      - fixture_experiment.py
      - __init__.py
    - envs
      - dummy
        dummy_discrete_pixel_env_baselines.py
        dummy_dict_env.py
        dummy_multitask_box_env.py
        dummy_reward_box_env.py
        dummy_discrete_2d_env.py
        __init__.py
        dummy_box_env.py
        dummy_discrete_pixel_env.py
        base.py
        dummy_discrete_env.py
      - __init__.py
      - wrappers
        reshape_observation.py
        __init__.py
  - wrappers.py
  - quirks.py
  - mock.py
  - __init__.py
  - garage
    - replay_buffer
      - test_her_replay_buffer.py
      - test_path_buffer.py
      - __init__.py
    - np
      - algos
        test_cma_es.py
        test_cem.py
        __init__.py
      - policies
        test_fixed_policy.py
        test_scripted_policy.py
      - exploration_strategies
        test_epsilon_greedy_policy.py
        test_add_gaussian_noise.py
      - __init__.py
    - tf
      - algos
        test_reps.py
        test_erwr.py
        test_tnpg.py
        test_rl2trpo.py
        test_ddpg.py
        test_te.py
        test_npo.py
        test_trpo.py
        test_ppo.py
        __init__.py
        test_rl2ppo.py
        test_td3.py
        test_dqn.py
        test_vpg.py
      - policies
        test_categorical_cnn_policy.py
        test_continuous_mlp_policy.py
        test_gaussian_lstm_policy.py
        test_gaussian_mlp_task_embedding_policy.py
        test_categorical_lstm_policy.py
        test_policies.py
        test_gaussian_mlp_policy.py
        test_gaussian_policies.py
        test_categorical_gru_policy.py
        test_categorical_mlp_policy.py
        test_qf_derived_policy.py
        __init__.py
        test_gaussian_gru_policy.py
        test_categorical_policies.py
      - distributions
        test_diagonal_gaussian.py
      - embeddings
        __init__.py
        test_gaussian_mlp_encoder.py
      - q_functions
        test_continuous_mlp_q_function.py
        test_discrete_cnn_q_function.py
        test_continuous_cnn_q_function.py
        __init__.py
        test_discrete_mlp_q_function.py
      - models
        test_gru_model.py
        test_gaussian_gru_model.py
        test_cnn.py
        test_gaussian_mlp_model.py
        test_categorical_mlp_model.py
        test_parameter.py
        test_gaussian_lstm_model.py
        test_cnn_model.py
        test_model.py
        test_cnn_mlp_merge_model.py
        test_gru.py
        __init__.py
        test_lstm.py
        test_categorical_lstm_model.py
        test_gaussian_cnn_model.py
        test_mlp.py
        test_categorical_gru_model.py
        test_mlp_model.py
        test_mlp_concat.py
        test_lstm_model.py
        test_categorical_cnn_model.py
      - misc
        __init__.py
        test_tensor_utils.py
      - baselines
        test_gaussian_mlp_baseline.py
        test_gaussian_cnn_baseline.py
        test_continuous_mlp_baseline.py
        __init__.py
        test_baselines.py
      - __init__.py
      - regressors
        test_categorical_mlp_regressor.py
        __init__.py
        test_gaussian_mlp_regressor.py
        test_bernoulli_mlp_regressor.py
      - experiment
        test_local_tf_runner.py
        __init__.py
      - optimizers
        test_conjugate_gradient_optimizer.py
        __init__.py
      - samplers
        test_tf_worker.py
        test_ray_batched_sampler_tf.py
        test_task_embedding_worker.py
        __init__.py
      - envs
        test_base.py
        __init__.py
    - misc
      - __init__.py
      - test_tensor_utils.py
    - .pylintrc
    - torch
      - algos
        test_maml_trpo.py
        test_bc.py
        test_ddpg.py
        test_maml_ppo.py
        test_trpo.py
        test_ppo.py
        test_maml_vpg.py
        test_mtsac.py
        __init__.py
        test_sac.py
        test_maml.py
        test_pearl_worker.py
        test_vpg.py
        test_pearl.py
      - policies
        test_gaussian_mlp_policy.py
        __init__.py
        test_context_conditioned_policy.py
        test_tanh_gaussian_mlp_policy.py
        test_deterministic_mlp_policy.py
      - distributions
        test_tanh_normal_dist.py
      - q_functions
        test_continuous_mlp_q_function.py
      - test_functions.py
      - __init__.py
      - optimizers
        test_differentiable_sgd.py
        test_torch_conjugate_gradient_optimizer.py
      - modules
        test_gaussian_mlp_module.py
        test_multi_headed_mlp_module.py
        test_mlp_module.py
    - test_functions.py
    - __init__.py
    - sampler
      - test_utils.py
      - test_multiprocessing_sampler.py
      - test_ray_batched_sampler.py
      - test_vec_worker.py
      - __init__.py
      - test_sampler.py
      - test_rl2_worker.py
      - test_local_sampler.py
    - experiment
      - test_local_runner.py
      - test_deterministic.py
      - test_experiment.py
      - test_meta_evaluator.py
      - test_resume.py
      - test_snapshotter.py
      - test_snapshotter_integration.py
      - __init__.py
      - test_task_sampler.py
    - test_dtypes.py
    - envs
      - test_task_onehot_wrapper.py
      - dm_control
        test_dm_control_tf_policy.py
        __init__.py
        test_dm_control_env.py
      - box2d
        parser
        __init__.py
      - test_half_cheetah_meta_envs.py
      - test_normalized_gym.py
      - test_garage_env.py
      - test_rl2_env.py
      - test_grid_world_env.py
      - test_normalized_env.py
      - test_point_env.py
      - test_multi_env_wrapper.py
      - __init__.py
      - test_env_spec.py
      - wrappers
        test_stack_frames_env.py
        test_episodic_life.py
        test_pixel_observation_wrapper.py
        test_atari_env.py
        test_noop.py
        test_clip_reward.py
        test_fire_reset.py
        test_grayscale_env.py
        __init__.py
        test_resize_env.py
        test_max_and_skip.py
      - bullet
        test_bullet_env.py
        __init__.py
  - helpers.py
  - integration_tests
    - test_sigint.py
    - __init__.py
    - test_examples.py
- scripts
  - travisci
    - check_no_deps_changed.sh
    - check_docs_only.sh
    - check_precommit.sh
  - setup_macos.sh
  - setup_linux.sh
  - garage
  - setup_colab.sh
  - check_commit_message
- CODEOWNERS
- .codecov.yml
- .mergify.yml
- .gitignore
- docs
  - Makefile
  - user
    - docker.md
    - pixel_observations.md
    - save_load_resume_exp.md
    - benchmarking.md
    - experiments.rst
    - implement_env.rst
    - monitor_experiments_with_tensorboard.md
    - testing.md
    - implement_algo.rst
    - writing_documentation.md
    - training_a_policy.md
    - installation.rst
  - index.md
  - requirements.txt
  - conf.py
  - _static
    - theme_overrides.css
  - autoapi_templates
    - python
      - module.rst
- .dockerignore
- MANIFEST.in

import pickle

import numpy as np
import pytest
import tensorflow as tf
import tensorflow_probability as tfp

from garage.tf.models import CategoricalMLPModel
from tests.fixtures import TfGraphTestCase


class TestCategoricalMLPModel(TfGraphTestCase):

    def setup_method(self):
        super().setup_method()
        self.input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 5))
        self.obs = np.ones((1, 5))

    def test_dist(self):
        model = CategoricalMLPModel(output_dim=1)
        dist = model.build(self.input_var).dist
        assert isinstance(dist, tfp.distributions.OneHotCategorical)

    @pytest.mark.parametrize('output_dim', [1, 2, 5, 10])
    def test_output_normalized(self, output_dim):
        model = CategoricalMLPModel(output_dim=output_dim)
        obs_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, output_dim))
        obs = np.ones((1, output_dim))
        dist = model.build(obs_ph).dist
        probs = tf.compat.v1.get_default_session().run(tf.reduce_sum(
            dist.probs),
                                                       feed_dict={obs_ph: obs})
        assert np.isclose(probs, 1.0)

    # yapf: disable
    @pytest.mark.parametrize('output_dim, hidden_sizes', [
        (1, (1, )),
        (1, (2, )),
        (2, (3, )),
        (2, (1, 1)),
        (3, (2, 2)),
    ])
    # yapf: enable
    def test_is_pickleable(self, output_dim, hidden_sizes):
        model = CategoricalMLPModel(output_dim=output_dim,
                                    hidden_sizes=hidden_sizes,
                                    hidden_nonlinearity=None,
                                    hidden_w_init=tf.ones_initializer(),
                                    output_w_init=tf.ones_initializer())
        dist = model.build(self.input_var).dist
        # assign bias to all one
        with tf.compat.v1.variable_scope('CategoricalMLPModel/mlp',
                                         reuse=True):
            bias = tf.compat.v1.get_variable('hidden_0/bias')

        bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(dist.probs,
                                feed_dict={self.input_var: self.obs})

        h = pickle.dumps(model)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 5))
            model_pickled = pickle.loads(h)
            dist2 = model_pickled.build(input_var).dist
            output2 = sess.run(dist2.probs, feed_dict={input_var: self.obs})

            assert np.array_equal(output1, output2)