python source code of enjoy-adv

""" DQN - Test-time attacks

============ Sample usage ============
No attack, testing a DQN model of Breakout trained without parameter noise:
$> python3 enjoy-adv.py --env Breakout --model-dir \
           ./data/Breakout/model-100 --video ./Breakout.mp4

No attack, testing a DQN model of Breakout trained with parameter noise
(NoisyNet implementation):
$> python3 enjoy-adv.py --env Breakout --noisy \
           --model-dir ./data/Breakout/model-173000 --video ./Breakout.mp4

Whitebox FGSM attack, testing a DQN model of Breakout
trained without parameter noise:
$> python3 enjoy-adv.py --env Breakout --model-dir \
        ./data/Breakout/model-173000 --attack fgsm --video ./Breakout.mp4

Whitebox FGSM attack, testing a DQN model of Breakout
trained with parameter noise (NoisyNet implementation):
$> python3 enjoy-adv.py --env Breakout --noisy --model-dir \
        ./data/Breakout/model-173000 --attack fgsm --video ./Breakout.mp4

Blackbox FGSM attack, testing a DQN model of Breakout
trained without parameter noise:
$> python3 enjoy-adv.py --env Breakout --model-dir \
    ./data/Breakout/model-173000 --attack fgsm --blackbox \
    --model-dir2 ./data/Breakout/model-173000-2 --video ./Breakout.mp4

Blackbox FGSM attack, testing a DQN model of Breakout
trained with parameter noise (NoisyNet implementation),
replica model trained without parameter noise:
$> python3 enjoy-adv.py --env Breakout --noisy \
    --model-dir ./data/Breakout/model-173000 --attack fgsm --blackbox \
    --model-dir2 ./data/Breakout/model2-173000-2 --video ./Breakout.mp4

Blackbox FGSM attack, testing a DQN model of Breakout
trained with parameter noise (NoisyNet implementation),
replica model trained with parameter noise:
$> python3 enjoy-adv.py --env Breakout --noisy --model-dir \
    ./data/Breakout/model-173000 --attack fgsm --blackbox \
    --model-dir2 ./data/Breakout/model2-173000 --noisy2 --video ./Breakout.mp4

"""

import argparse
import gym
import os
import numpy as np

from gym.monitoring import VideoRecorder

import rlattack.common.tf_util as U

from rlattack import deepq
from rlattack.common.misc_util import (
    boolean_flag,
    SimpleMonitor,
)
from rlattack.common.atari_wrappers_deprecated import wrap_dqn


# V: imports#
import tensorflow as tf
import cv2
from collections import deque
from model import model, dueling_model
from statistics import statistics


class DQNModel:
    """
    Creating Q-graph, FGSM graph
    Supports loading multiple graphs - needed for blackbox attacks
    """

    def __init__(self, env, dueling, noisy, fname):
        self.g = tf.Graph()
        self.noisy = noisy
        self.dueling = dueling
        self.env = env
        with self.g.as_default():
            self.act = deepq.build_act_enjoy(
                make_obs_ph=lambda name: U.Uint8Input(
                    env.observation_space.shape, name=name),
                q_func=dueling_model if dueling else model,
                num_actions=env.action_space.n,
                noisy=noisy
            )
            self.saver = tf.train.Saver()
        self.sess = tf.Session(graph=self.g)

        if fname is not None:
            print('Loading Model...')
            self.saver.restore(self.sess, fname)

    def get_act(self):
        return self.act

    def get_session(self):
        return self.sess

    def craft_adv(self):
        with self.sess.as_default():
            with self.g.as_default():
                craft_adv_obs = deepq.build_adv(
                    make_obs_tf=lambda name: U.Uint8Input(
                        self.env.observation_space.shape, name=name),
                    q_func=dueling_model if self.dueling else model,
                    num_actions=self.env.action_space.n,
                    epsilon=1.0 / 255.0,
                    noisy=self.noisy,
                )
        return craft_adv_obs


def parse_args():
    parser = argparse.ArgumentParser("Run an already learned DQN model.")
    # Environment
    parser.add_argument("--env", type=str, required=True,
                        help="name of the game")
    parser.add_argument("--model-dir", type=str, default=None,
                        help="load model from this directory. ")
    parser.add_argument("--video", type=str, default=None,
                        help="Path to mp4 file where the \
                        video of first episode will be recorded.")
    boolean_flag(parser, "stochastic", default=True,
                 help="whether or not to use stochastic \
                 actions according to models eps value")
    boolean_flag(parser, "dueling", default=False,
                 help="whether or not to use dueling model")
    # V: Attack Arguments#
    parser.add_argument("--model-dir2", type=str, default=None,
                        help="load adversarial model from \
                        this directory (blackbox attacks). ")
    parser.add_argument("--attack", type=str, default=None,
                        help="Method to attack the model.")
    boolean_flag(parser, "noisy", default=False,
                 help="whether or not to NoisyNetwork")
    boolean_flag(parser, "noisy2", default=False,
                 help="whether or not to NoisyNetwork")
    boolean_flag(parser, "blackbox", default=False,
                 help="whether or not to NoisyNetwork")

    return parser.parse_args()


def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    env = SimpleMonitor(env)
    env = wrap_dqn(env)
    return env


def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
         attack, m_target, m_adv):
    num_episodes = 0
    num_moves = 0
    num_transfer = 0

    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)
    obs = env.reset()
    while True:
        env.unwrapped.render()
        video_recorder.capture_frame()

        # V: Attack #
        if attack is not None:
            # Craft adv. examples
            with m_adv.get_session().as_default():
                adv_obs = \
                    craft_adv_obs(np.array(obs)[None],
                                  stochastic_adv=stochastic)[0]
            with m_target.get_session().as_default():
                action = act(np.array(adv_obs)[None],
                             stochastic=stochastic)[0]
                action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
                num_moves += 1
                if action != action2:
                    num_transfer += 1
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

        obs, rew, done, info = env.step(action)
        if done:
            obs = env.reset()

        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print('Reward: ' + str(info["rewards"][-1]))
            num_episodes = len(info["rewards"])
            print('Episode: ' + str(num_episodes))
            success = float(num_transfer / num_moves) * 100.0
            print("Percentage of successful attacks: " + str(success))
            num_moves = 0
            num_transfer = 0


if __name__ == '__main__':
    args = parse_args()
    env = make_env(args.env)
    g1 = tf.Graph()
    g2 = tf.Graph()
    with g1.as_default():
        m1 = DQNModel(env, args.dueling, args.noisy,
                      os.path.join(args.model_dir, "saved"))
    if args.blackbox:
        with g2.as_default():
            m2 = DQNModel(env, args.dueling, args.noisy2,
                          os.path.join(args.model_dir2, "saved"))
            with m2.get_session().as_default():
                craft_adv_obs = m2.craft_adv()
            with m1.get_session().as_default():
                craft_adv_obs2 = m1.craft_adv()
                play(env, m1.get_act(), craft_adv_obs, craft_adv_obs2,
                     args.stochastic, args.video, args.attack, m1, m2)
    else:
        with m1.get_session().as_default():
            craft_adv_obs = m1.craft_adv()
            play(env, m1.get_act(), craft_adv_obs, None, args.stochastic,
                 args.video, args.attack, m1, m1)