Python nets.resnet_v1.resnet_arg_scope() Examples

The following are 30 code examples of nets.resnet_v1.resnet_arg_scope(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nets.resnet_v1 , or try the search function .
Example #1
Source File: resnet_v1_101.py    From vehicle-triplet-reid with MIT License 5 votes vote down vote up
def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_101(image, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_101/block4'], [1, 2], name='pool5', keep_dims=False)

    return endpoints, 'resnet_v1_101' 
Example #2
Source File: resnet_v1_50.py    From vehicle-triplet-reid with MIT License 5 votes vote down vote up
def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False)

    return endpoints, 'resnet_v1_50' 
Example #3
Source File: rf_benchmark.py    From receptive_field with Apache License 2.0 5 votes vote down vote up
def _resnet_rf(csv_writer=None):
  """Computes RF and associated parameters for resnet models.

  The computed values are written to stdout.

  Args:
    csv_writer: A CSV writer for RF parameters, which is used if it is not None.
  """
  for model_type in _SUPPORTED_RESNET_VARIANTS:
    arg_sc = resnet_v1.resnet_arg_scope()
    _process_model_rf_and_flops(model_type, csv_writer, arg_sc) 
Example #4
Source File: resnet_v1_101.py    From triplet-reid with MIT License 5 votes vote down vote up
def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_101(image, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_101/block4'], [1, 2], name='pool5')

    return endpoints, 'resnet_v1_101' 
Example #5
Source File: resnet_v1_50.py    From triplet-reid with MIT License 5 votes vote down vote up
def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_50/block4'], [1, 2], name='pool5')

    return endpoints, 'resnet_v1_50' 
Example #6
Source File: delf_v1.py    From g-tensorflow-models with Apache License 2.0 4 votes vote down vote up
def GetAttentionPrelogit(
      self,
      images,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False,
      use_batch_norm=True):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
      _, end_points = self.GetResnet50Subnetwork(
          images, is_training=training_resnet, reuse=reuse)

    feature_map = end_points[self._target_layer_type]

    # Construct attention subnetwork on top of features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        (prelogits, attention_prob, attention_score,
         end_points) = self._GetAttentionSubnetwork(
             feature_map,
             end_points,
             attention_nonlinear=attention_nonlinear,
             attention_type=attention_type,
             kernel=kernel,
             reuse=reuse)

    return prelogits, attention_prob, attention_score, feature_map, end_points 
Example #7
Source File: ssd_resnet_v1_fpn_feature_extractor.py    From Elphas with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        _, image_features = self._resnet_base_fn(
            inputs=ops.pad_to_multiple(preprocessed_inputs,
                                       self._pad_to_multiple),
            num_classes=None,
            is_training=self._is_training and self._batch_norm_trainable,
            global_pool=False,
            output_stride=None,
            store_non_strided_activations=True,
            scope=scope)
      image_features = self._filter_features(image_features)
      last_feature_map = image_features['block4']
    with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
      with slim.arg_scope(self._conv_hyperparams):
        for i in range(5, 7):
          last_feature_map = slim.conv2d(
              last_feature_map,
              num_outputs=256,
              kernel_size=[3, 3],
              stride=2,
              padding='SAME',
              scope='block{}'.format(i))
          image_features['bottomup_{}'.format(i)] = last_feature_map
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            [
                image_features[key] for key in
                ['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
            ],
            depth=256,
            scope='top_down_features')
    return feature_maps.values() 
Example #8
Source File: delf_v1.py    From object_detection_with_tensorflow with MIT License 4 votes vote down vote up
def GetAttentionPrelogit(
      self,
      images,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False,
      use_batch_norm=True):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
      _, end_points = self.GetResnet50Subnetwork(
          images, is_training=training_resnet, reuse=reuse)

    feature_map = end_points[self._target_layer_type]

    # Construct attention subnetwork on top of features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        (prelogits, attention_prob, attention_score,
         end_points) = self._GetAttentionSubnetwork(
             feature_map,
             end_points,
             attention_nonlinear=attention_nonlinear,
             attention_type=attention_type,
             kernel=kernel,
             reuse=reuse)

    return prelogits, attention_prob, attention_score, feature_map, end_points 
Example #9
Source File: delf_v1.py    From object_detection_with_tensorflow with MIT License 4 votes vote down vote up
def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map 
Example #10
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From MAX-Object-Detector with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values() 
Example #11
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From multilabel-image-classification-tensorflow with MIT License 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values() 
Example #12
Source File: delf_v1.py    From g-tensorflow-models with Apache License 2.0 4 votes vote down vote up
def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map 
Example #13
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From g-tensorflow-models with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values() 
Example #14
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From models with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return list(feature_maps.values()) 
Example #15
Source File: ssd_resnet_v1_fpn_feature_extractor.py    From models with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              min_base_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          image_features = self._filter_features(image_features)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append('block{}'.format(level - 1))
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth),
              use_native_resize_op=self._use_native_resize_op)
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(
                fpn_features['top_down_block{}'.format(level - 1)])
          last_feature_map = fpn_features['top_down_block{}'.format(
              base_fpn_max_level - 1)]
          # Construct coarse features
          for i in range(base_fpn_max_level, self._fpn_max_level):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_block{}'.format(i))
            feature_maps.append(last_feature_map)
    return feature_maps 
Example #16
Source File: delf_v1.py    From multilabel-image-classification-tensorflow with MIT License 4 votes vote down vote up
def GetAttentionPrelogit(
      self,
      images,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False,
      use_batch_norm=True):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
      _, end_points = self.GetResnet50Subnetwork(
          images, is_training=training_resnet, reuse=reuse)

    feature_map = end_points[self._target_layer_type]

    # Construct attention subnetwork on top of features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        (prelogits, attention_prob, attention_score,
         end_points) = self._GetAttentionSubnetwork(
             feature_map,
             end_points,
             attention_nonlinear=attention_nonlinear,
             attention_type=attention_type,
             kernel=kernel,
             reuse=reuse)

    return prelogits, attention_prob, attention_score, feature_map, end_points 
Example #17
Source File: delf_v1.py    From multilabel-image-classification-tensorflow with MIT License 4 votes vote down vote up
def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map 
Example #18
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From vehicle_counting_tensorflow with MIT License 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values() 
Example #19
Source File: ssd_resnet_v1_fpn_feature_extractor.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              min_base_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          image_features = self._filter_features(image_features)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append('block{}'.format(level - 1))
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(
                fpn_features['top_down_block{}'.format(level - 1)])
          last_feature_map = fpn_features['top_down_block{}'.format(
              base_fpn_max_level - 1)]
          # Construct coarse features
          for i in range(base_fpn_max_level, self._fpn_max_level):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_block{}'.format(i))
            feature_maps.append(last_feature_map)
    return feature_maps 
Example #20
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From Live-feed-object-device-identification-using-Tensorflow-and-OpenCV with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values() 
Example #21
Source File: model.py    From uai-sdk with Apache License 2.0 4 votes vote down vote up
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            # Note: donot use FLAGS.text_scale here as UAI Inference does not use FLAGS, instead it uses 512
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * 512
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry 
Example #22
Source File: model.py    From EAST_ICPR with GNU General Public License v3.0 4 votes vote down vote up
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry 
Example #23
Source File: ssd_resnet_v1_ppn_feature_extractor.py    From BMW-TensorFlow-Training-GUI with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values() 
Example #24
Source File: ssd_resnet_v1_fpn_feature_extractor.py    From ros_tensorflow with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              scope=scope)
      image_features = self._filter_features(image_features)
      last_feature_map = image_features['block4']
    with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
      with slim.arg_scope(self._conv_hyperparams_fn()):
        for i in range(5, 7):
          last_feature_map = slim.conv2d(
              last_feature_map,
              num_outputs=256,
              kernel_size=[3, 3],
              stride=2,
              padding='SAME',
              scope='block{}'.format(i))
          image_features['bottomup_{}'.format(i)] = last_feature_map
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            [
                image_features[key] for key in
                ['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
            ],
            depth=256,
            scope='top_down_features')
    return feature_maps.values() 
Example #25
Source File: ssd_resnet_v1_fpn_feature_extractor.py    From Gun-Detector with Apache License 2.0 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              scope=scope)
      image_features = self._filter_features(image_features)
      last_feature_map = image_features['block4']
    with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
      with slim.arg_scope(self._conv_hyperparams_fn()):
        for i in range(5, 7):
          last_feature_map = slim.conv2d(
              last_feature_map,
              num_outputs=256,
              kernel_size=[3, 3],
              stride=2,
              padding='SAME',
              scope='block{}'.format(i))
          image_features['bottomup_{}'.format(i)] = last_feature_map
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            [
                image_features[key] for key in
                ['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
            ],
            depth=256,
            scope='top_down_features')
    return feature_maps.values() 
Example #26
Source File: delf_v1.py    From Gun-Detector with Apache License 2.0 4 votes vote down vote up
def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map 
Example #27
Source File: delf_v1.py    From Gun-Detector with Apache License 2.0 4 votes vote down vote up
def GetAttentionPrelogit(
      self,
      images,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False,
      use_batch_norm=True):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
      _, end_points = self.GetResnet50Subnetwork(
          images, is_training=training_resnet, reuse=reuse)

    feature_map = end_points[self._target_layer_type]

    # Construct attention subnetwork on top of features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        (prelogits, attention_prob, attention_score,
         end_points) = self._GetAttentionSubnetwork(
             feature_map,
             end_points,
             attention_nonlinear=attention_nonlinear,
             attention_type=attention_type,
             kernel=kernel,
             reuse=reuse)

    return prelogits, attention_prob, attention_score, feature_map, end_points 
Example #28
Source File: ssd_resnet_v1_fpn_feature_extractor.py    From Traffic-Rule-Violation-Detection-System with MIT License 4 votes vote down vote up
def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        _, image_features = self._resnet_base_fn(
            inputs=ops.pad_to_multiple(preprocessed_inputs,
                                       self._pad_to_multiple),
            num_classes=None,
            is_training=self._is_training and self._batch_norm_trainable,
            global_pool=False,
            output_stride=None,
            store_non_strided_activations=True,
            scope=scope)
      image_features = self._filter_features(image_features)
      last_feature_map = image_features['block4']
    with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights):
      with slim.arg_scope(self._conv_hyperparams):
        for i in range(5, 7):
          last_feature_map = slim.conv2d(
              last_feature_map,
              num_outputs=256,
              kernel_size=[3, 3],
              stride=2,
              padding='SAME',
              scope='block{}'.format(i))
          image_features['bottomup_{}'.format(i)] = last_feature_map
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            [
                image_features[key] for key in
                ['block2', 'block3', 'block4', 'bottomup_5', 'bottomup_6']
            ],
            depth=256,
            scope='top_down_features')
    return feature_maps.values() 
Example #29
Source File: delf_v1.py    From yolo_v2 with Apache License 2.0 4 votes vote down vote up
def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map 
Example #30
Source File: delf_v1.py    From yolo_v2 with Apache License 2.0 4 votes vote down vote up
def GetAttentionPrelogit(
      self,
      images,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False,
      use_batch_norm=True):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
      _, end_points = self.GetResnet50Subnetwork(
          images, is_training=training_resnet, reuse=reuse)

    feature_map = end_points[self._target_layer_type]

    # Construct attention subnetwork on top of features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        (prelogits, attention_prob, attention_score,
         end_points) = self._GetAttentionSubnetwork(
             feature_map,
             end_points,
             attention_nonlinear=attention_nonlinear,
             attention_type=attention_type,
             kernel=kernel,
             reuse=reuse)

    return prelogits, attention_prob, attention_score, feature_map, end_points