python source code of cued

# Copyright 2018 the pycolab Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Position yourself to catch blocks based on a visual cue.

This game proceeds in two phases. The first phase is a "programming phase",
where the player sees each of the four visual cues (green blocks at the bottom
of the game board) paired randomly with either of two additional visual cues
(larger green blocks just above the cues, called "ball symbols"). These pairings
tell the player what actions they should take in the second phase of the game.

In the second phase of the game, the player must repeatedly move itself up or
down to position itself in front of either of two blocks: a yellow block or a
cyan block. These blocks approach the player from right to left. If the player
"catches" the correct block, it receives a point. The correct block is indicted
by the visual cue shown as the blocks begin to approach the player. If the cue
was paired with the left "ball symbol" during the programming phase, the player
should catch the yellow block; otherwise it should catch the cyan block.

Each episode of "Cued Catch" starts with a different mapping from cues to
blocks.  The player must learn to remember these associations in order to play
the game successfully.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import curses
import random
import sys

from pycolab import ascii_art
from pycolab import human_ui
from pycolab import things as plab_things
from pycolab.prefab_parts import sprites as prefab_sprites


# ASCII art for the game board. Not too representative: there is usually some
# cue showing on some part of the board.
GAME_ART = [
    '            ',
    '   P    a   ',
    '        b   ',
    '            ',
    '            ',
    '            ',
    '            ',
]


if __name__ == '__main__':  # Avoid defining flags when used as a library.
  parser = argparse.ArgumentParser(
      description='Play Cued Catch.',
      epilog=(
          'NOTE: Default options configure the game as the agents in the paper '
          'played it. These settings may not be much fun for humans, though.'))
  parser.add_argument('--initial_cue_duration', metavar='t', type=int,
                      default=10, help='Programming cue duration.')
  parser.add_argument('--cue_duration', metavar='t', type=int, default=10,
                      help='Query cue duration.')
  parser.add_argument('--num_trials', metavar='K', type=int, default=100,
                      help='Number of trials per episode.')
  # This flag is for establishing a control that requires no long term memory.
  parser.add_argument('--always_show_ball_symbol', action='store_true',
                      help='Control case: show ball symbols during trials.')
  # This flag is for experiments that require noise-tolerant memory.
  parser.add_argument('--reward_sigma', metavar='s', type=float, default=0.0,
                      help='Stddev for noise to add to ball-catch rewards.')
  # This flag is for experiments that require very long term memory.
  parser.add_argument('--reward_free_trials', metavar='K', type=int, default=40,
                      help='Provide no reward for the first K trials')
  FLAGS = parser.parse_args()


# These colours are only for humans to see in the CursesUi.
COLOURS = {' ': (0, 0, 0),        # Black background
           'P': (999, 999, 999),  # This is you, the player
           'Q': (0, 999, 0),      # Cue blocks
           'a': (999, 999, 0),    # Top ball
           'b': (0, 999, 999)}    # Bottom ball


def make_game(initial_cue_duration, cue_duration, num_trials,
              always_show_ball_symbol=False,
              reward_sigma=0.0,
              reward_free_trials=0):
  return ascii_art.ascii_art_to_game(
      art=GAME_ART,
      what_lies_beneath=' ',
      sprites={'P': ascii_art.Partial(
          PlayerSprite,
          reward_sigma=reward_sigma,
          reward_free_trials=reward_free_trials),
               'a': BallSprite,
               'b': BallSprite},
      drapes={'Q': ascii_art.Partial(
          CueDrape,
          initial_cue_duration, cue_duration, num_trials,
          always_show_ball_symbol)},
      update_schedule=['P', 'a', 'b', 'Q'])


class PlayerSprite(prefab_sprites.MazeWalker):
  """A `Sprite` for our player, the catcher."""

  def __init__(self, corner, position, character,
               reward_sigma=0.0, reward_free_trials=0):
    """Initialise a PlayerSprite.

    Args:
      corner: standard `Sprite` constructor parameter.
      position: standard `Sprite` constructor parameter.
      character: standard `Sprite` constructor parameter.
      reward_sigma: standard deviation of reward for catching the ball (or
          not). A value of 0.0 means rewards with no noise.
      reward_free_trials: number of trials before any reward can be earned.
    """
    super(PlayerSprite, self).__init__(
        corner, position, character, impassable='', confined_to_board=True)
    self._reward_sigma = reward_sigma
    self._trials_till_reward = reward_free_trials

  def update(self, actions, board, layers, backdrop, things, the_plot):
    # Our motions are quite constrained: we can only move up or down one spot.
    if actions == 1 and self.virtual_position.row > 1:    # go up?
      self._north(board, the_plot)
    elif actions == 2 and self.virtual_position.row < 2:  # go down?
      self._south(board, the_plot)
    elif actions in [0, 4]:                               # quit the game?
      the_plot.terminate_episode()
    else:                                                 # do nothing?
      self._stay(board, the_plot)                         # (or can't move?)

    # Give ourselves a point if we landed on the correct ball.
    correct_ball = 'a' if the_plot.get('which_ball') == 'top' else 'b'
    if self._reward_sigma:
      if (self.position.col == things[correct_ball].position.col and
          self._trials_till_reward <= 0):
        the_plot.add_reward(
            float(self.position == things[correct_ball].position) +
            random.normalvariate(mu=0, sigma=self._reward_sigma))
      else:
        the_plot.add_reward(0)

    else:
      the_plot.add_reward(int(
          self.position == things[correct_ball].position and
          self._trials_till_reward <= 0
      ))

    # Decrement trials left till reward.
    if (self.position.col == things[correct_ball].position.col and
        self._trials_till_reward > 0):
      self._trials_till_reward -= 1


class BallSprite(plab_things.Sprite):
  """A `Sprite` for the balls approaching the player."""

  def __init__(self, corner, position, character):
    """Mark ourselves as invisible at first."""
    super(BallSprite, self).__init__(corner, position, character)
    # Save start position.
    self._start_position = position
    # But mark ourselves invisible for now.
    self._visible = False

  def update(self, actions, board, layers, backdrop, things, the_plot):
    # Wait patiently until the initial programming cues have been shown.
    if not the_plot.get('programming_complete'): return

    # Cues are shown; we are visible now.
    self._visible = True

    # If we're to the left of the player, reposition ourselves back at the start
    # position and tell the cue drape to pick a new correct ball.
    if self.position.col < things['P'].position.col:
      self._position = self._start_position
      the_plot['last_ball_reset'] = the_plot.frame
    else:
      self._position = self.Position(self.position.row, self.position.col - 1)


class CueDrape(plab_things.Drape):
  """"Programs" the player, then chooses correct balls and shows cues.

  The cue drape goes through two phases.

  In the first phase, it presents each of the four cues serially along with a
  symbol that indicates whether the top ball or the bottom ball is the correct
  choice for that cue. (The symbol does not resemble one of the balls.) During
  this phase, no balls appear. Agent actions can move the player but accomplish
  nothing else. Each associational cue presentation lasts for a number of
  timesteps controlled by the `initial_cue_duration` constructor argument.

  Once all four cues have been shown in this way, the second phase presents a
  sequence of `num_trials` fixed-length trials. In each trial, one of the four
  cues is shown for `cue_duration` timesteps, and the two balls advance toward
  the player from the right-hand side of the screen. The agent must position the
  player to "catch" the ball that matches the cue shown at the beginning of the
  trial.

  The two phases can also be visually distinguished by the presence of some
  additional markers on the board.
  """

  _NUM_CUES = 4  # Must divide 12 evenly and be divisible by 2. So, 2, 4, 6, 12.

  def __init__(self, curtain, character,
               initial_cue_duration,
               cue_duration,
               num_trials,
               always_show_ball_symbol):
    super(CueDrape, self).__init__(curtain, character)

    self._initial_cue_duration = initial_cue_duration
    self._cue_duration = cue_duration
    self._num_trials_left = num_trials
    self._always_show_ball_symbol = always_show_ball_symbol

    # Assign balls to each of the cues.
    self._cues_to_balls = random.sample(
        ['top'] * (self._NUM_CUES // 2) + ['bottom'] * (self._NUM_CUES // 2),
        self._NUM_CUES)

    self._phase = 'first'
    # State for first phase.
    self._first_phase_tick = self._NUM_CUES * self._initial_cue_duration
    # State for second phase, initialised to bogus values.
    self._second_phase_cue_choice = -1
    self._second_phase_tick = -1
    self._second_phase_last_reset = -float('inf')

  def update(self, actions, board, layers, backdrop, things, the_plot):
    # Show the agent which phase we're in.
    self._show_phase_cue(self._phase)
    # Do phase-specific update.
    if self._phase == 'first':
      self._do_first_phase(the_plot)
    elif self._phase == 'second':
      self._do_second_phase(the_plot)

  ## Phase-specific updates.

  def _do_first_phase(self, the_plot):
    # Iterate through showing each of the cues.
    self._first_phase_tick -= 1  # Decrement number of steps left in this phase.
    cue = self._first_phase_tick // self._initial_cue_duration
    self._show_ball_symbol(self._cues_to_balls[cue])
    self._show_cue(cue)
    # End of phase? Move on to the next phase.
    if self._first_phase_tick <= 0:
      self._phase = 'second'
      the_plot['programming_complete'] = True
      self._second_phase_reset(the_plot)

  def _do_second_phase(self, the_plot):
    self._show_ball_symbol('neither')  # Clear ball symbol.
    # Reset ourselves if the balls have moved beyond the player.
    if the_plot.get('last_ball_reset') > self._second_phase_last_reset:
      self._second_phase_reset(the_plot)
    # Show the cue if it's still visible in this trial.
    if self._second_phase_tick > 0:
      self._show_cue(self._second_phase_cue_choice)
      if self._always_show_ball_symbol: self._show_ball_symbol(
          self._cues_to_balls[self._second_phase_cue_choice])
    else:
      self._show_cue(None)
      self._show_ball_symbol(None)
    # Countdown second phase clock.
    self._second_phase_tick -= 1

  def _second_phase_reset(self, the_plot):
    self._second_phase_cue_choice = random.randrange(self._NUM_CUES)
    the_plot['which_ball'] = self._cues_to_balls[self._second_phase_cue_choice]
    self._second_phase_tick = self._cue_duration
    self._second_phase_last_reset = the_plot.frame
    # Terminate if we've run out of trials.
    if self._num_trials_left <= 0: the_plot.terminate_episode()
    self._num_trials_left -= 1

  ## Display helpers

  def _show_phase_cue(self, phase):
    self.curtain[1:3, :] = False
    if phase == 'first':
      self.curtain[1:3, 0:2] = True
      self.curtain[1:3, -2:] = True
    # No cue for the second phase.

  def _show_ball_symbol(self, ball):
    self.curtain[3:5, :] = False
    if ball == 'top':
      self.curtain[3:5, 0:6] = True
    elif ball == 'bottom':
      self.curtain[3:5, -6:] = True

  def _show_cue(self, cue=None):
    self.curtain[-2:, :] = False
    if 0 <= cue < self._NUM_CUES:
      width = self.curtain.shape[1] // self._NUM_CUES
      l = cue * width
      r = l + width
      self.curtain[-2:, l:r] = True


def main(argv):
  del argv  # Unused.

  # Build a cued_catch game.
  game = make_game(FLAGS.initial_cue_duration,
                   FLAGS.cue_duration, FLAGS.num_trials,
                   FLAGS.always_show_ball_symbol,
                   FLAGS.reward_sigma,
                   FLAGS.reward_free_trials)

  # Make a CursesUi to play it with.
  ui = human_ui.CursesUi(
      keys_to_actions={curses.KEY_UP: 1, curses.KEY_DOWN: 2,
                       -1: 3,
                       'q': 4, 'Q': 4},
      delay=200, colour_fg=COLOURS)

  # Let the game begin!
  ui.play(game)


if __name__ == '__main__':
  main(sys.argv)