python source code of pose_env

# coding=utf-8
# Copyright 2020 The Tensor2Robot Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Lint as python3
"""T2RModels for duck pose prediction task."""

from typing import Optional, Tuple, Text
from absl import logging
import gin
import numpy as np
from six.moves import range
from tensor2robot.layers import vision_layers
from tensor2robot.models import abstract_model
from tensor2robot.models import critic_model
from tensor2robot.models import regression_model
from tensor2robot.preprocessors import abstract_preprocessor
from tensor2robot.research.dql_grasping_lib import tf_modules
from tensor2robot.utils import tensorspec_utils
import tensorflow.compat.v1 as tf  # tf
from tensorflow.contrib import framework
from tensorflow.contrib import layers

TensorSpec = tensorspec_utils.ExtendedTensorSpec  # pylint: disable=invalid-name

TRAIN = tf.estimator.ModeKeys.TRAIN
PREDICT = tf.estimator.ModeKeys.PREDICT


class DefaultPoseEnvContinuousPreprocessor(
    abstract_preprocessor.AbstractPreprocessor):
  """The default pose env preprocessor.

  This preprocessor simply converts all images from uint8 to float32.
  """

  def get_in_feature_specification(self, mode):
    """See base class."""
    feature_spec = tensorspec_utils.TensorSpecStruct()
    feature_spec['state/image'] = TensorSpec(
        shape=self._model_feature_specification_fn(mode).state.image.shape,
        dtype=tf.uint8,
        name=self._model_feature_specification_fn(mode).state.image.name,
        data_format=self._model_feature_specification_fn(
            mode).state.image.data_format)
    feature_spec['action/pose'] = self._model_feature_specification_fn(
        mode).action.pose
    return feature_spec

  def get_in_label_specification(
      self, mode):
    """See base class."""
    return tensorspec_utils.flatten_spec_structure(
        self._model_label_specification_fn(mode))

  def get_out_feature_specification(
      self, mode):
    """See base class."""
    return tensorspec_utils.flatten_spec_structure(
        self._model_feature_specification_fn(mode))

  def get_out_label_specification(
      self, mode):
    """See base class."""
    return tensorspec_utils.flatten_spec_structure(
        self._model_label_specification_fn(mode))

  def _preprocess_fn(
      self,
      features,
      labels,
      mode,
  ):
    """See base class."""
    features.state.image = tf.image.convert_image_dtype(features.state.image,
                                                        tf.float32)
    return features, labels


@gin.configurable
class PoseEnvContinuousMCModel(critic_model.CriticModel):
  """Continuous MC model for Pose Env."""

  def get_action_specification(self):
    pose_spec = TensorSpec(shape=(2,), dtype=tf.float32, name='pose')
    return tensorspec_utils.TensorSpecStruct(
        pose=pose_spec)

  def get_state_specification(self):
    image_spec = TensorSpec(
        shape=(64, 64, 3), dtype=tf.float32, name='state/image',
        data_format='jpeg')
    return tensorspec_utils.TensorSpecStruct(
        image=image_spec)

  @property
  def default_preprocessor_cls(self):
    return DefaultPoseEnvContinuousPreprocessor

  def get_label_specification(self, mode):
    del mode
    reward_spec = TensorSpec(shape=(), dtype=tf.float32, name='reward')
    return tensorspec_utils.TensorSpecStruct(reward=reward_spec)

  def _q_features(self, state, action, is_training=True, reuse=tf.AUTO_REUSE):
    """Compute feature representation of state and action.

    This method is re-used for computing partial context features for RL^2
    algorithm.

    Args:
      state: Image observation tensor.
      action: Continuous-valued 1-D action tensor.
      is_training: Bool specifying whether this graph is being constructed for
        training or not.
      reuse: Bool or TF re-use ENUM.

    Returns:
      Q values of shape (num_batch, feature_size) where feature_size is
        h * w * 32.
    """
    # State-action embedding module.
    net = state
    channels = 32
    with tf.variable_scope('q_features', reuse=reuse):
      with framework.arg_scope(tf_modules.argscope(is_training=is_training)):
        for layer_index in range(3):
          net = layers.conv2d(net, channels, kernel_size=3)
          logging.info('conv%d %s', layer_index, net.get_shape())
      action_context = layers.fully_connected(action, channels)
      _, h, w, _ = net.shape.as_list()
      num_batch_net = tf.shape(net)[0]
      num_batch_context = tf.shape(action_context)[0]
      # assume num_batch_context >= num_batch_net
      net = tf.tile(net, [num_batch_context // num_batch_net, 1, 1, 1])
      action_context = tf.reshape(action_context,
                                  [num_batch_context, 1, 1, channels])
      action_context = tf.tile(action_context, [1, h, w, 1])
      net += action_context
      net = tf.layers.flatten(net)
    return net

  def q_func(self,
             features,
             scope,
             mode,
             config = None,
             params = None,
             reuse=tf.AUTO_REUSE):
    del params
    is_training = mode == TRAIN
    with tf.variable_scope(scope, reuse=reuse, use_resource=True):
      image = tf.image.convert_image_dtype(features.state.image, tf.float32)
      net = self._q_features(
          image, features.action.pose, is_training=is_training, reuse=reuse)
      net = layers.stack(net, layers.fully_connected, [100, 100])
      net = layers.fully_connected(
          net,
          num_outputs=1,
          normalizer_fn=None,
          weights_regularizer=None,
          activation_fn=None)
      return {'q_predicted': tf.squeeze(net, 1)}

  def pack_features(self, state, context, timestep, actions):
    del context, timestep
    batch_obs = np.expand_dims(state, 0)
    return tensorspec_utils.TensorSpecStruct(
        state=batch_obs, action=actions)


class DefaultPoseEnvRegressionPreprocessor(
    abstract_preprocessor.AbstractPreprocessor):
  """The default pose env preprocessor.

  This preprocessor simply converts all images from uint8 to float32.
  """

  def get_in_feature_specification(
      self, mode):
    """See base class."""
    feature_spec = tensorspec_utils.TensorSpecStruct()
    feature_spec['state'] = TensorSpec(
        shape=self._model_feature_specification_fn(mode).state.shape,
        dtype=tf.uint8,
        name=self._model_feature_specification_fn(mode).state.name,
        data_format=self._model_feature_specification_fn(
            mode).state.data_format)
    return feature_spec

  def get_in_label_specification(
      self, mode):
    """See base class."""
    return tensorspec_utils.flatten_spec_structure(
        self._model_label_specification_fn(mode))

  def get_out_feature_specification(
      self, mode):
    """See base class."""
    return tensorspec_utils.flatten_spec_structure(
        self._model_feature_specification_fn(mode))

  def get_out_label_specification(
      self, mode):
    """See base class."""
    return tensorspec_utils.flatten_spec_structure(
        self._model_label_specification_fn(mode))

  def _preprocess_fn(
      self, features,
      labels,
      mode
  ):
    """See base class."""
    features.state = tf.image.convert_image_dtype(features.state, tf.float32)
    return features, labels


@gin.configurable
class PoseEnvRegressionModel(regression_model.RegressionModel):
  """Continuous regression output model for Pose Env."""

  @property
  def default_preprocessor_cls(self):
    return DefaultPoseEnvRegressionPreprocessor

  def get_feature_specification(self, mode):
    del mode
    state_spec = TensorSpec(
        shape=(64, 64, 3), dtype=tf.float32, name='state/image',
        data_format='jpeg')
    return tensorspec_utils.TensorSpecStruct(state=state_spec)

  def get_label_specification(self, mode):
    del mode
    target_spec = TensorSpec(
        shape=(self._action_size), dtype=tf.float32, name='target_pose')
    reward_spec = TensorSpec(shape=(1), dtype=tf.float32, name='reward')
    return tensorspec_utils.TensorSpecStruct(
        target_pose=target_spec, reward=reward_spec)

  def pack_features(self, state, context, timestep):
    del context, timestep
    batch_obs = np.expand_dims(state, 0)
    return tensorspec_utils.TensorSpecStruct(state=batch_obs)

  @property
  def action_size(self):
    return self._action_size

  def get_config(self):
    """This model trains fairly quickly so evaluate frequently."""
    return tf.estimator.RunConfig(
        save_checkpoints_steps=2000,
        keep_checkpoint_max=5)

  def a_func(
      self,
      features,
      scope,
      mode,
      config = None,
      params = None,
      reuse=tf.AUTO_REUSE,
      context_fn=None,
  ):
    """A (state) regression function.

    This function can return a stochastic or a deterministic tensor.

    Args:
      features: This is the first item returned from the input_fn and parsed by
        tensorspec_utils.validate_and_pack. A spec_structure which fulfills the
        requirements of the self.get_feature_spefication.
      scope: String specifying variable scope.
      mode: (ModeKeys) Specifies if this is training, evaluation or prediction.
      config: Optional configuration object. Will receive what is passed to
        Estimator in config parameter, or the default config. Allows updating
        things in your model_fn based on configuration such as num_ps_replicas,
        or model_dir.
      params: An optional dict of hyper parameters that will be passed into
        input_fn and model_fn. Keys are names of parameters, values are basic
        python types. There are reserved keys for TPUEstimator, including
        'batch_size'.
      reuse: Whether or not to reuse variables under variable scope 'scope'.
      context_fn: Optional python function that takes in features and returns
        new features of same shape. For merging information like in RL^2.

    Returns:
      outputs: A {key: Tensor} mapping. The key 'action' is required.
    """
    del config
    is_training = mode == TRAIN
    image = tf.image.convert_image_dtype(features.state, tf.float32)
    with tf.variable_scope(scope, reuse=reuse, use_resource=True):
      with tf.variable_scope('state_features', reuse=reuse, use_resource=True):
        feature_points, end_points = vision_layers.BuildImagesToFeaturesModel(
            image,
            is_training=is_training,
            normalizer_fn=layers.layer_norm)
      del end_points
      if context_fn:
        feature_points = context_fn(feature_points)
      estimated_pose, _ = vision_layers.BuildImageFeaturesToPoseModel(
          feature_points, num_outputs=self._action_size)
    return {'inference_output': estimated_pose,
            'state_features': feature_points}

  def loss_fn(self, labels, inference_outputs, mode, params=None):
    del mode
    return tf.losses.mean_squared_error(
        labels=labels.target_pose,
        predictions=inference_outputs['inference_output'],
        weights=labels.reward)