python source code of TIN

# --------------------------------------------------------
# Tensorflow TIN
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim import arg_scope
from tensorflow.contrib.slim.python.slim.nets import resnet_utils
from tensorflow.contrib.slim.python.slim.nets import resnet_v1
from tensorflow.contrib.layers.python.layers import layers
from tensorflow.contrib.layers.python.layers import regularizers
from tensorflow.python.ops import nn_ops
from tensorflow.contrib.layers.python.layers import initializers
from tensorflow.python.framework import ops

from ult.config_vcoco import cfg
from ult.visualization import draw_bounding_boxes_HOI

import numpy as np
import ipdb

def resnet_arg_scope(is_training=True,
                     weight_decay=cfg.TRAIN.WEIGHT_DECAY,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True):
    batch_norm_params = {
        'is_training': False,
        'decay': batch_norm_decay,
        'epsilon': batch_norm_epsilon,
        'scale': batch_norm_scale,
        'trainable': False,
        'updates_collections': ops.GraphKeys.UPDATE_OPS
    }
    with arg_scope(
        [slim.conv2d, slim.fully_connected],
        weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
        weights_initializer = slim.variance_scaling_initializer(),
        biases_regularizer  = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), 
        biases_initializer  = tf.constant_initializer(0.0),
        trainable           = is_training,
        activation_fn       = tf.nn.relu,
        normalizer_fn       = slim.batch_norm,
        normalizer_params   = batch_norm_params):
        with arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc:
            return arg_sc



class ResNet50():
    def __init__(self):
        self.visualize = {}
        self.intermediate = {}
        self.predictions = {}
        self.score_summaries = {}
        self.event_summaries = {}
        self.train_summaries = []
        self.losses = {}

        self.image       = tf.placeholder(tf.float32, shape=[1, None, None, 3], name = 'image')
        self.spatial     = tf.placeholder(tf.float32, shape=[None, 64, 64, 3], name = 'sp') # Pattern.reshape( num_pos_neg, 64, 64, 3) 
        self.Hsp_boxes   = tf.placeholder(tf.float32, shape=[None, 5], name = 'Hsp_boxes')
        self.O_boxes     = tf.placeholder(tf.float32, shape=[None, 5], name = 'O_boxes') #Object_augmented[:num_pos].reshape(num_pos, 5)
        self.gt_class_H  = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_H')
        self.gt_class_HO = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_HO')
        self.gt_class_sp = tf.placeholder(tf.float32, shape=[None, 29], name = 'gt_class_sp')
        self.Mask_HO     = tf.placeholder(tf.float32, shape=[None, 29], name = 'HO_mask')
        self.Mask_H      = tf.placeholder(tf.float32, shape=[None, 29], name = 'H_mask')
        self.Mask_sp     = tf.placeholder(tf.float32, shape=[None, 29], name = 'sp_mask')
        self.gt_binary_label = tf.placeholder(tf.float32, shape=[None, 2], name = 'gt_binary_label')
        self.H_num       = tf.placeholder(tf.int32)
        self.HO_weight   = np.array([3.3510249, 3.4552405, 4.0257854, 0.0, 4.088436, 
                                    3.4370995, 3.85842, 4.637334, 3.5487218, 3.536237, 
                                    2.5578923, 3.342811, 3.8897269, 4.70686, 3.3952892, 
                                    3.9706533, 4.504736, 0.0, 1.4873443, 3.700363, 
                                    4.1058283, 3.6298118, 0.0, 6.490651, 5.0808263, 
                                    1.520838, 3.3888445, 0.0, 3.9899964], dtype = 'float32').reshape(1,29)
        self.H_weight   = np.array([4.0984106, 4.102459, 4.0414762, 4.060745, 4.0414762, 
                                    3.9768186, 4.23686, 5.3542085, 3.723717, 3.4699364, 
                                    2.4587274, 3.7167964, 4.08836, 5.050695, 3.9077065, 
                                    4.534647, 3.4699364, 2.9466882, 1.8585607, 3.9433942, 
                                    3.9433942, 4.3523254, 3.8368235, 6.4963055, 5.138182, 
                                    1.7807873, 4.080392, 1.9544303, 4.5761204],dtype = 'float32').reshape(1,29)
        self.binary_weight = np.array([1.0986122886681098, 0.4054651081081644], dtype = 'float32').reshape(1,2)
        self.num_classes = 29
        self.num_binary  = 2 # existence (0 or 1) of HOI
        self.num_fc      = 1024
        self.scope       = 'resnet_v1_50'
        self.stride      = [16, ]
        self.lr          = tf.placeholder(tf.float32)
        if tf.__version__ == '1.1.0':
            self.blocks     = [resnet_utils.Block('block1', resnet_v1.bottleneck,[(256,   64, 1)] * 2 + [(256,   64, 2)]),
                               resnet_utils.Block('block2', resnet_v1.bottleneck,[(512,  128, 1)] * 3 + [(512,  128, 2)]),
                               resnet_utils.Block('block3', resnet_v1.bottleneck,[(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
                               resnet_utils.Block('block4', resnet_v1.bottleneck,[(2048, 512, 1)] * 3),
                               resnet_utils.Block('block5', resnet_v1.bottleneck,[(2048, 512, 1)] * 3)]
        else:
            from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block
            self.blocks = [resnet_v1_block('block1', base_depth=64,  num_units=3, stride=2),
                           resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
                           resnet_v1_block('block3', base_depth=256, num_units=6, stride=1),
                           resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
                           resnet_v1_block('block5', base_depth=512, num_units=3, stride=1)]

    def build_base(self):
        with tf.variable_scope(self.scope, self.scope):
            net = resnet_utils.conv2d_same(self.image, 64, 7, stride=2, scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')

        return net

    def image_to_head(self, is_training):
        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            net    = self.build_base()
            net, _ = resnet_v1.resnet_v1(net,
                                         self.blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope)
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            head, _ = resnet_v1.resnet_v1(net,
                                          self.blocks[cfg.RESNET.FIXED_BLOCKS:-2], # - (Restore_flag - 3)
                                          global_pool=False,
                                          include_root_block=False,
                                          scope=self.scope)
        return head


    def sp_to_head(self):
        with tf.variable_scope(self.scope, self.scope):
            conv1_sp      = slim.conv2d(self.spatial[:,:,:,0:2], 64, [5, 5], padding='VALID', scope='conv1_sp')
            pool1_sp      = slim.max_pool2d(conv1_sp, [2, 2], scope='pool1_sp')
            conv2_sp      = slim.conv2d(pool1_sp,     32, [5, 5], padding='VALID', scope='conv2_sp')
            pool2_sp      = slim.max_pool2d(conv2_sp, [2, 2], scope='pool2_sp')
            pool2_flat_sp = slim.flatten(pool2_sp)

        return pool2_flat_sp #Pattern.reshape( num_pos_neg, 64, 64, 2) 


    def res5(self, pool5_H, pool5_O, sp, is_training, name):
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):

            fc7_H, _ = resnet_v1.resnet_v1(pool5_H,
                                           self.blocks[-2:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           reuse=False,
                                           scope=self.scope)

            fc7_H = tf.reduce_mean(fc7_H, axis=[1, 2])


            fc7_O, _ = resnet_v1.resnet_v1(pool5_O,
                                       self.blocks[-1:],
                                       global_pool=False,
                                       include_root_block=False,
                                       reuse=False,
                                       scope=self.scope)

            fc7_O = tf.reduce_mean(fc7_O, axis=[1, 2])
        
        return fc7_H, fc7_O

    def head_to_tail(self, fc7_H, fc7_O, pool5_SH, pool5_SO, sp, is_training, name):
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):

            fc7_SH = tf.reduce_mean(pool5_SH, axis=[1, 2])
            fc7_SO = tf.reduce_mean(pool5_SO, axis=[1, 2])

            Concat_SH     = tf.concat([fc7_H[:self.H_num,:], fc7_SH], 1)

            fc8_SH        = slim.fully_connected(Concat_SH, self.num_fc, scope='fc8_SH')
            fc8_SH        = slim.dropout(fc8_SH, keep_prob=0.5, is_training=is_training, scope='dropout8_SH')
            fc9_SH        = slim.fully_connected(fc8_SH, self.num_fc, scope='fc9_SH')
            fc9_SH        = slim.dropout(fc9_SH, keep_prob=0.5, is_training=is_training, scope='dropout9_SH')  

            Concat_SO     = tf.concat([fc7_O, fc7_SO], 1)

            fc8_SO        = slim.fully_connected(Concat_SO, self.num_fc, scope='fc8_SO')
            fc8_SO        = slim.dropout(fc8_SO, keep_prob=0.5, is_training=is_training, scope='dropout8_SO')
            fc9_SO        = slim.fully_connected(fc8_SO, self.num_fc, scope='fc9_SO')
            fc9_SO        = slim.dropout(fc9_SO,    keep_prob=0.5, is_training=is_training, scope='dropout9_SO')  

            Concat_SHsp   = tf.concat([fc7_H, sp], 1)
            Concat_SHsp   = slim.fully_connected(Concat_SHsp, self.num_fc, scope='Concat_SHsp')
            Concat_SHsp   = slim.dropout(Concat_SHsp, keep_prob=0.5, is_training=is_training, scope='dropout6_SHsp')
            fc7_SHsp      = slim.fully_connected(Concat_SHsp, self.num_fc, scope='fc7_SHsp')
            fc7_SHsp      = slim.dropout(fc7_SHsp,  keep_prob=0.5, is_training=is_training, scope='dropout7_SHsp')

        return fc9_SH, fc9_SO, fc7_SHsp, fc7_SH, fc7_SO

    # binary discriminator for 0/1 classification of interaction, fc7_H, fc7_SH, fc7_O, fc7_SO, sp
    def binary_discriminator(self, fc7_H, fc7_O, fc7_SH, fc7_SO, sp, is_training, name):
        with tf.variable_scope(name) as scope:

            conv1_pose_map      = slim.conv2d(self.spatial[:,:,:,2:], 32, [5, 5], padding='VALID', scope='conv1_pose_map')
            pool1_pose_map      = slim.max_pool2d(conv1_pose_map, [2, 2], scope='pool1_pose_map')
            conv2_pose_map      = slim.conv2d(pool1_pose_map,     16, [5, 5], padding='VALID', scope='conv2_pose_map')
            pool2_pose_map      = slim.max_pool2d(conv2_pose_map, [2, 2], scope='pool2_pose_map')
            pool2_flat_pose_map = slim.flatten(pool2_pose_map)

            # fc7_H + sp---fc1024---fc8_binary_1
            fc_binary_1    = tf.concat([fc7_H, sp, pool2_flat_pose_map], 1) # pos + neg
            fc8_binary_1     = slim.fully_connected(fc_binary_1, 1024, scope = 'fc8_binary_1') # 1024? we should try
            fc8_binary_1     = slim.dropout(fc8_binary_1, keep_prob = cfg.TRAIN_DROP_OUT_BINARY, is_training = is_training, scope = 'dropout8_binary_1') # [pos + neg,1024]

            # fc7_O + fc7_SH + fc7_SO---fc1024---fc8_binary_2
            fc_binary_2    = tf.concat([fc7_O, fc7_SH], 1) # pos
            fc_binary_2    = tf.concat([fc_binary_2, fc7_SO], 1) # pos
            fc8_binary_2     = slim.fully_connected(fc_binary_2, 1024, scope = 'fc8_binary_2')
            fc8_binary_2     = slim.dropout(fc8_binary_2, keep_prob = cfg.TRAIN_DROP_OUT_BINARY, is_training = is_training, scope = 'dropout8_binary_2') # [pos,1024]
            fc8_binary_2   = tf.concat([fc8_binary_2, fc8_binary_1[self.H_num:,:]], 0) # copy the neg part of fc8_binary_1 and concat

            # fc8_binary_1 + fc8_binary_2---fc1024---fc9_binary
            fc8_binary     = tf.concat([fc8_binary_1, fc8_binary_2], 1)
            fc9_binary     = slim.fully_connected(fc8_binary, 1024, scope = 'fc9_binary')
            fc9_binary     = slim.dropout(fc9_binary, keep_prob = cfg.TRAIN_DROP_OUT_BINARY, is_training = is_training, scope = 'dropout9_binary')
        return fc9_binary

    def crop_pool_layer(self, bottom, rois, name):
        with tf.variable_scope(name) as scope:

            batch_ids    = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
            bottom_shape = tf.shape(bottom)
            height       = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self.stride[0])
            width        = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self.stride[0])
            x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
            y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
            x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
            y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height

            bboxes        = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
            if cfg.RESNET.MAX_POOL:
                pre_pool_size = cfg.POOLING_SIZE * 2
                crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
                crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
            else:
                crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE], name="crops")
        return crops

    def attention_pool_layer_H(self, bottom, fc7_H, is_training, name):
        with tf.variable_scope(name) as scope:

            fc1         = slim.fully_connected(fc7_H, 512, scope='fc1_b')
            fc1         = slim.dropout(fc1, keep_prob=0.8, is_training=is_training, scope='dropout1_b')
            fc1         = tf.reshape(fc1, [tf.shape(fc1)[0], 1, 1, tf.shape(fc1)[1]])
            att         = tf.reduce_mean(tf.multiply(bottom, fc1), 3, keep_dims=True)
        return att


    def attention_norm_H(self, att, name):
        with tf.variable_scope(name) as scope:

            att         = tf.transpose(att, [0, 3, 1, 2])
            att_shape   = tf.shape(att)
            att         = tf.reshape(att, [att_shape[0], att_shape[1], -1])
            att         = tf.nn.softmax(att)
            att         = tf.reshape(att, att_shape)
            att         = tf.transpose(att, [0, 2, 3, 1])
        return att

    def attention_pool_layer_O(self, bottom, fc7_O, is_training, name):
        with tf.variable_scope(name) as scope:

            fc1         = slim.fully_connected(fc7_O, 512, scope='fc1_b')
            fc1         = slim.dropout(fc1, keep_prob=0.8, is_training=is_training, scope='dropout1_b')
            fc1         = tf.reshape(fc1, [tf.shape(fc1)[0], 1, 1, tf.shape(fc1)[1]])
            att         = tf.reduce_mean(tf.multiply(bottom, fc1), 3, keep_dims=True)
        return att


    def attention_norm_O(self, att, name):
        with tf.variable_scope(name) as scope:

            att         = tf.transpose(att, [0, 3, 1, 2])
            att_shape   = tf.shape(att)
            att         = tf.reshape(att, [att_shape[0], att_shape[1], -1])
            att         = tf.nn.softmax(att)
            att         = tf.reshape(att, att_shape)
            att         = tf.transpose(att, [0, 2, 3, 1])
        return att

    def region_classification(self, fc7_H, fc7_O, fc7_SHsp, is_training, initializer, name):
        with tf.variable_scope(name) as scope:
            cls_score_H  = slim.fully_connected(fc7_H, self.num_classes, 
                                               weights_initializer=initializer,
                                               trainable=is_training,
                                               activation_fn=None, scope='cls_score_H')
            cls_prob_H   = tf.nn.sigmoid(cls_score_H, name='cls_prob_H') 
            tf.reshape(cls_prob_H, [1, self.num_classes])   

            cls_score_O  = slim.fully_connected(fc7_O, self.num_classes, 
                                               weights_initializer=initializer,
                                               trainable=is_training,
                                               activation_fn=None, scope='cls_score_O')
            cls_prob_O  = tf.nn.sigmoid(cls_score_O, name='cls_prob_O') 
            tf.reshape(cls_prob_O, [1, self.num_classes]) 

            cls_score_sp = slim.fully_connected(fc7_SHsp, self.num_classes, 
                                               weights_initializer=initializer,
                                               trainable=is_training,
                                               activation_fn=None, scope='cls_score_sp')
            cls_prob_sp  = tf.nn.sigmoid(cls_score_sp, name='cls_prob_sp') 
            tf.reshape(cls_prob_sp, [1, self.num_classes]) 


            self.predictions["cls_score_H"] = cls_score_H
            self.predictions["cls_prob_H"]  = cls_prob_H
            self.predictions["cls_score_O"] = cls_score_O
            self.predictions["cls_prob_O"]  = cls_prob_O
            self.predictions["cls_score_sp"] = cls_score_sp
            self.predictions["cls_prob_sp"]  = cls_prob_sp

            self.predictions["cls_prob_HO_final"]  = cls_prob_sp * (cls_prob_O + cls_prob_H)

        return cls_prob_H, cls_prob_O, cls_prob_sp

    def binary_classification(self, fc9_binary, is_training, initializer, name):
        with tf.variable_scope(name) as scope:
            cls_score_binary = slim.fully_connected(fc9_binary, self.num_binary, 
                                               weights_initializer=initializer,
                                               trainable=is_training,
                                               activation_fn=None, scope='cls_score_binary')
            cls_prob_binary  = tf.nn.sigmoid(cls_score_binary, name='cls_prob_binary') 
            tf.reshape(cls_prob_binary, [1, self.num_binary])

            self.predictions["cls_score_binary"]  = cls_score_binary
            self.predictions["cls_prob_binary"]  = cls_prob_binary
        return cls_prob_binary

    def bottleneck(self, bottom, is_training, name, reuse=False):
        with tf.variable_scope(name) as scope:

            if reuse:
                scope.reuse_variables()

            head_bottleneck = slim.conv2d(bottom, 1024, [1, 1], scope=name)

        return head_bottleneck



    def build_network(self, is_training):
        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)

        # ResNet Backbone
        head       = self.image_to_head(is_training)
        sp         = self.sp_to_head()
        pool5_H    = self.crop_pool_layer(head, self.Hsp_boxes, 'Crop_H')
        pool5_O    = self.crop_pool_layer(head, self.O_boxes,   'Crop_O')

        fc7_H, fc7_O = self.res5(pool5_H, pool5_O, sp, is_training, 'res5')

        head_phi = slim.conv2d(head, 512, [1, 1], scope='head_phi')

        head_g   = slim.conv2d(head, 512, [1, 1], scope='head_g')

        Att_H      = self.attention_pool_layer_H(head_phi, fc7_H[:self.H_num,:], is_training, 'Att_H')
        Att_H      = self.attention_norm_H(Att_H, 'Norm_Att_H')
        att_head_H = tf.multiply(head_g, Att_H)

        Att_O      = self.attention_pool_layer_O(head_phi, fc7_O, is_training, 'Att_O')
        Att_O      = self.attention_norm_O(Att_O, 'Norm_Att_O')
        att_head_O = tf.multiply(head_g, Att_O)

        pool5_SH     = self.bottleneck(att_head_H, is_training, 'bottleneck', False)
        pool5_SO     = self.bottleneck(att_head_O, is_training, 'bottleneck', True)

        fc9_SH, fc9_SO, fc7_SHsp, fc7_SH, fc7_SO = self.head_to_tail(fc7_H, fc7_O, pool5_SH, pool5_SO, sp, is_training, 'fc_HO')
        fc9_binary = self.binary_discriminator(fc7_H, fc7_O, fc7_SH, fc7_SO, sp, is_training, 'fc_binary')

        cls_prob_H, cls_prob_O, cls_prob_sp = self.region_classification(fc9_SH, fc9_SO, fc7_SHsp, is_training, initializer, 'classification')
        cls_prob_binary = self.binary_classification(fc9_binary, is_training, initializer, 'binary_classification')

        self.score_summaries.update(self.predictions)

        self.visualize["attention_map_H"] = (Att_H - tf.reduce_min(Att_H[0,:,:,:])) / tf.reduce_max((Att_H[0,:,:,:] - tf.reduce_min(Att_H[0,:,:,:])))
        self.visualize["attention_map_O"] = (Att_O - tf.reduce_min(Att_O[0,:,:,:])) / tf.reduce_max((Att_O[0,:,:,:] - tf.reduce_min(Att_O[0,:,:,:])))
        return cls_prob_H, cls_prob_O, cls_prob_sp, cls_prob_binary


    def create_architecture(self, is_training):

        cls_prob_H, cls_prob_O, cls_prob_sp, cls_prob_binary = self.build_network(is_training)

        for var in tf.trainable_variables():
            self.train_summaries.append(var)

        self.add_loss()
        layers_to_output = {}
        layers_to_output.update(self.losses)

        val_summaries = []
        with tf.device("/cpu:0"):
            val_summaries.append(self.add_gt_image_summary_H())
            val_summaries.append(self.add_gt_image_summary_HO())
            tf.summary.image('ATTENTION_MAP_H',  self.visualize["attention_map_H"], max_outputs=1)
            tf.summary.image('ATTENTION_MAP_O',  self.visualize["attention_map_O"], max_outputs=1)
            for key, var in self.event_summaries.items():
                val_summaries.append(tf.summary.scalar(key, var))
            # for key, var in self.score_summaries.items():
            #     self.add_score_summary(key, var)
            # for var in self.train_summaries:
            #     self.add_train_summary(var)
        
        val_summaries.append(tf.summary.scalar('lr', self.lr))
        self.summary_op     = tf.summary.merge_all()
        self.summary_op_val = tf.summary.merge(val_summaries)

        return layers_to_output


    def add_loss(self):

        with tf.variable_scope('LOSS') as scope:
            cls_score_H  = self.predictions["cls_score_H"]
            cls_score_O  = self.predictions["cls_score_O"]
            cls_score_sp = self.predictions["cls_score_sp"]
            cls_score_binary = self.predictions["cls_score_binary"]

            cls_score_H_with_weight = tf.multiply(cls_score_H, self.H_weight)
            cls_score_O_with_weight = tf.multiply(cls_score_O, self.HO_weight)
            # cls_score_sp_with_weight = tf.multiply(cls_score_sp, self.HO_weight)
            cls_score_binary_with_weight = tf.multiply(cls_score_binary, self.binary_weight)

            label_H      = self.gt_class_H
            label_HO     = self.gt_class_HO
            label_sp     = self.gt_class_sp
            label_binary = self.gt_binary_label

            H_mask       = self.Mask_H
            HO_mask      = self.Mask_HO
            sp_mask      = self.Mask_sp

            H_cross_entropy  = tf.reduce_mean(tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(labels = label_H,  logits = cls_score_H_with_weight),   H_mask))
            HO_cross_entropy = tf.reduce_mean(tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(labels = label_HO, logits = cls_score_O_with_weight),  HO_mask))
            sp_cross_entropy = tf.reduce_mean(tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(labels = label_sp, logits = cls_score_sp), sp_mask))
            binary_cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = label_binary, logits = cls_score_binary_with_weight))


            self.losses['H_cross_entropy']  = H_cross_entropy
            self.losses['HO_cross_entropy'] = HO_cross_entropy
            self.losses['sp_cross_entropy'] = sp_cross_entropy
            self.losses['binary_cross_entropy'] = binary_cross_entropy

            loss = 2 * H_cross_entropy + HO_cross_entropy + sp_cross_entropy + binary_cross_entropy

            self.losses['total_loss'] = loss
            self.event_summaries.update(self.losses)

        return loss


    def add_gt_image_summary_H(self):

        image = tf.py_func(draw_bounding_boxes_HOI, 
                      [tf.reverse(self.image+cfg.PIXEL_MEANS, axis=[-1]), self.Hsp_boxes, self.gt_class_H],
                      tf.float32, name="gt_boxes_H")
        return tf.summary.image('GROUND_TRUTH_H', image)

    def add_gt_image_summary_HO(self):

        image = tf.py_func(draw_bounding_boxes_HOI, 
                      [tf.reverse(self.image+cfg.PIXEL_MEANS, axis=[-1]), self.O_boxes, self.gt_class_HO],
                      tf.float32, name="gt_boxes_HO")
        return tf.summary.image('GROUND_TRUTH_HO)', image)


    def add_score_summary(self, key, tensor):
        tf.summary.histogram('SCORE/' + tensor.op.name + '/' + key + '/scores', tensor)


    def add_train_summary(self, var):
        tf.summary.histogram('TRAIN/' + var.op.name, var)



    def train_step(self, sess, blobs, lr, train_op):
        feed_dict = {self.image: blobs['image'], self.Hsp_boxes: blobs['Hsp_boxes'],
                     self.O_boxes: blobs['O_boxes'], self.gt_class_H: blobs['gt_class_H'], 
                     self.gt_class_HO: blobs['gt_class_HO'], self.Mask_H: blobs['Mask_H'],
                     self.Mask_HO: blobs['Mask_HO'], self.spatial:blobs['sp'],
                     self.lr: lr, self.Mask_sp: blobs['Mask_sp'],
                     self.gt_class_sp: blobs['gt_class_sp'], self.H_num: blobs['H_num'],
                     self.gt_binary_label: blobs['binary_label']}
        
        loss_cls_H, loss_cls_HO, loss, _ = sess.run([self.losses['H_cross_entropy'],
                                                     self.losses['HO_cross_entropy'],
                                                     self.losses['total_loss'],
                                                     train_op],
                                                     feed_dict=feed_dict)
        return loss_cls_H, loss_cls_HO, loss

    def train_step_with_summary(self, sess, blobs, lr, train_op):
        feed_dict = {self.image: blobs['image'], self.Hsp_boxes: blobs['Hsp_boxes'],
                     self.O_boxes: blobs['O_boxes'], self.gt_class_H: blobs['gt_class_H'], 
                     self.gt_class_HO: blobs['gt_class_HO'], self.Mask_H: blobs['Mask_H'],
                     self.Mask_HO: blobs['Mask_HO'], self.spatial:blobs['sp'],
                     self.lr: lr, self.Mask_sp: blobs['Mask_sp'],
                     self.gt_class_sp: blobs['gt_class_sp'], self.H_num: blobs['H_num'],
                     self.gt_binary_label: blobs['binary_label']}

        loss_cls_H, loss_cls_HO, loss, summary, _ = sess.run([self.losses['H_cross_entropy'],
                                                              self.losses['HO_cross_entropy'],
                                                              self.losses['total_loss'],
                                                              self.summary_op,
                                                              train_op],
                                                              feed_dict=feed_dict)
        return loss_cls_H, loss_cls_HO, loss, summary
    
    def test_image_H(self, sess, image, blobs):
        feed_dict = {self.image: image, self.Hsp_boxes: blobs['H_boxes'], self.H_num: blobs['H_num']}

        cls_prob_H = sess.run([self.predictions["cls_prob_H"]], feed_dict=feed_dict)

        return cls_prob_H

    
    def test_image_HO(self, sess, image, blobs):
        feed_dict = {self.image: image, self.Hsp_boxes: blobs['H_boxes'], self.O_boxes: blobs['O_boxes'], self.spatial: blobs['sp'], self.H_num: blobs['H_num']}

        cls_prob_HO, cls_prob_binary = sess.run([self.predictions["cls_prob_HO_final"],self.predictions["cls_prob_binary"]], feed_dict=feed_dict)

        return cls_prob_HO, cls_prob_binary