python source code of emulator

# -*- coding: utf-8 -*-
# Emulator code inspired by:
# https://github.com/Jabberwockyll/deep_rl_ale/
import numpy as np
from ale_python_interface import ALEInterface
import cv2
#from skimage.transform import resize
#from skimage.color import rgb2gray
import sys
import random
import utils.logger
logger = utils.logger.getLogger('emulator')

#import matplotlib.pyplot as plt

IMG_SCALE = 255.0
IMG_SIZE_X = 84
IMG_SIZE_Y = 84
NR_IMAGES = 4
ACTION_REPEAT = 4
MAX_START_WAIT = 30
FRAMES_IN_POOL = 2
BLEND_METHOD = 'max_pool'
RANDOM_PLAY_STEPS = 1000000

class Emulator:
    def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False):
        
        self.ale = ALEInterface()

        self.ale.setInt("random_seed", rseed * (actor_id +1))

        # For fuller control on explicit action repeat (>= ALE 0.5.0) 
        self.ale.setFloat("repeat_action_probability", 0.0)
        
        # Disable frame_skip and color_averaging
        # See: http://is.gd/tYzVpj
        self.ale.setInt("frame_skip", 1)
        self.ale.setBool("color_averaging", False)
        self.ale.loadROM(rom_path + "/" + rom_name + ".bin")
        self.legal_actions = self.ale.getMinimalActionSet()        
        self.screen_width,self.screen_height = self.ale.getScreenDims()
        #self.ale.setBool('display_screen', True)
        
        # Processed historcal frames that will be fed in to the network 
        # (i.e., four 84x84 images)
        self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, 
            NR_IMAGES)) 
        self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8)
        self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8)

        self.frame_pool = np.empty((2, self.screen_height, self.screen_width))
        self.current = 0
        self.lives = self.ale.lives()

        self.visualize = visualize
        self.visualize_processed = False
        self.windowname = rom_name + ' ' + str(actor_id)
        if self.visualize:
            logger.debug("Opening emulator window...")
            #from skimage import io
            #io.use_plugin('qt')
            cv2.startWindowThread()
            cv2.namedWindow(self.windowname)
            logger.debug("Emulator window opened")
            
        if self.visualize_processed:
            logger.debug("Opening processed frame window...")
            cv2.startWindowThread()
            logger.debug("Processed frame window opened")
            cv2.namedWindow(self.windowname + "_processed")
            
        self.single_life_episodes = single_life_episodes

    def get_screen_image(self):
        """ Add screen (luminance) to frame pool """
        # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), 
        #     self.ale.getScreenRGB()]
        self.ale.getScreenGrayscale(self.gray_screen)
        self.ale.getScreenRGB(self.rgb_screen)
        self.frame_pool[self.current] = np.squeeze(self.gray_screen)
        self.current = (self.current + 1) % FRAMES_IN_POOL
        return self.rgb_screen

    def new_game(self):
        """ Restart game """
        self.ale.reset_game()
        self.lives = self.ale.lives()

        if MAX_START_WAIT < 0:
            logger.debug("Cannot time travel yet.")
            sys.exit()
        elif MAX_START_WAIT > 0:
            wait = random.randint(0, MAX_START_WAIT)
        else:
            wait = 0
        for _ in xrange(wait):
            self.ale.act(self.legal_actions[0])

    def process_frame_pool(self):
        """ Preprocess frame pool """
        
        img = None
        if BLEND_METHOD == "max_pool":
            img = np.amax(self.frame_pool, axis=0)
        
        #img resize(img[:210, :], (84, 84))
        img = cv2.resize(img[:210, :], (84, 84), 
            interpolation=cv2.INTER_LINEAR)
        
        img = img.astype(np.float32)
        img *= (1.0/255.0)
        
        return img
        # Reduce height to 210, if not so
        #cropped_img = img[:210, :]
        # Downsample to 110x84
        #down_sampled_img = resize(cropped_img, (84, 84))
        
        # Crop to 84x84 playing area
        #stackable_image = down_sampled_img[:, 26:110]
        #return stackable_image

    def action_repeat(self, a):
        """ Repeat action and grab screen into frame pool """
        reward = 0
        for i in xrange(ACTION_REPEAT):
            reward += self.ale.act(self.legal_actions[a])
            new_screen_image_rgb = self.get_screen_image()
        return reward, new_screen_image_rgb

    def get_reshaped_state(self, state):
        return np.reshape(state, 
            (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))
        #return np.reshape(self.screen_images_processed, 
        #    (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES))

    def get_initial_state(self):
        """ Get the initial state """
        self.new_game()
        for step in xrange(NR_IMAGES):
            reward, new_screen_image_rgb = self.action_repeat(0)
            self.screen_images_processed[:, :, step] = self.process_frame_pool()
            self.show_screen(new_screen_image_rgb)
        if self.is_terminal():
            MAX_START_WAIT -= 1
            return self.get_initial_state()
        return np.copy(self.screen_images_processed) #get_reshaped_state()      

    def next(self, action):
        """ Get the next state, reward, and game over signal """
        reward, new_screen_image_rgb = self.action_repeat(np.argmax(action))
        self.screen_images_processed[:, :, 0:3] = \
            self.screen_images_processed[:, :, 1:4]
        self.screen_images_processed[:, :, 3] = self.process_frame_pool()
        self.show_screen(new_screen_image_rgb)
        terminal = self.is_terminal()
        self.lives = self.ale.lives()
        return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal
    
    def show_screen(self, image):
        """ Show visuals for raw and processed images """
        if self.visualize:
            #io.imshow(image[:210, :], fancy=True)
            cv2.imshow(self.windowname, image[:210, :])
        if self.visualize_processed:
            #io.imshow(self.screen_images_processed[:, :, 3], fancy=True)
            cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3])
            
    def is_terminal(self):
        if self.single_life_episodes:
            return (self.is_over() or (self.lives > self.ale.lives()))
        else:
            return self.is_over()

    def is_over(self):
        return self.ale.game_over()


if __name__ == "__main__":
    emulator = Emulator("../atari_roms", "breakout", True, 0, 1)
    emulator.get_initial_state()
    n_actions = len(emulator.legal_actions)
    one_hot_action = np.zeros(n_actions)
    for _ in xrange(RANDOM_PLAY_STEPS):
        one_hot_action[random.randint(0, n_actions - 1)] = 1
        _, _, over = emulator.next(one_hot_action, False)
        if over:
            break
        one_hot_action.fill(0)