Python model_utils.FramePooling() Examples

The following are 3 code examples of model_utils.FramePooling(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module model_utils , or try the search function .
Example #1
Source File: frame_level_models.py    From Youtube-8M-WILLOW with Apache License 2.0 4 votes vote down vote up
def forward(self, reshaped_input):

        feature_size = self.feature_size
        cluster_size = self.cluster_size
        add_batch_norm = self.add_batch_norm
        max_frames = self.max_frames
        is_training = self.is_training

        cluster_weights = tf.get_variable("cluster_weights",
          [feature_size, cluster_size],
          initializer = tf.random_normal_initializer(stddev=1 / math.sqrt(feature_size)))
        
        tf.summary.histogram("cluster_weights", cluster_weights)
        activation = tf.matmul(reshaped_input, cluster_weights)
        
        if add_batch_norm:
          activation = slim.batch_norm(
              activation,
              center=True,
              scale=True,
              is_training=is_training,
              scope="cluster_bn")
        else:
          cluster_biases = tf.get_variable("cluster_biases",
            [cluster_size],
            initializer = tf.random_normal(stddev=1 / math.sqrt(feature_size)))
          tf.summary.histogram("cluster_biases", cluster_biases)
          activation += cluster_biases

        if activation == 'glu':
            space_ind = range(cluster_size/2)
            gate_ind = range(cluster_size/2,cluster_size)

            gates = tf.sigmoid(activation[:,gate_ind])
            activation = tf.multiply(activation[:,space_ind],gates)

        elif activation == 'relu':
            activation = tf.nn.relu6(activation)
        
        tf.summary.histogram("cluster_output", activation)

        activation = tf.reshape(activation, [-1, max_frames, cluster_size])

        avg_activation = utils.FramePooling(activation, 'average')
        avg_activation = tf.nn.l2_normalize(avg_activation,1)

        max_activation = utils.FramePooling(activation, 'max')
        max_activation = tf.nn.l2_normalize(max_activation,1)
        
        return tf.concat([avg_activation,max_activation],1) 
Example #2
Source File: xp_frame_level_models.py    From Y8M with Apache License 2.0 4 votes vote down vote up
def create_model(self, model_input, vocab_size, num_frames,
                   l2_penalty=1e-4, **unused_params):
    """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
#    num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
#    feature_size = model_input.get_shape().as_list()[2]
#        
#    logging.info('model_input shape: {}'.format(
#            model_input.get_shape().as_list()))
#
#    denominators = tf.reshape(
#        tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
#    avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators
    
    avg_pooled = utils.FramePooling(model_input, 'average')
    
    logging.info( 'avg_pooled shape: {}'.format(
            avg_pooled.get_shape().as_list() )) 
    
    aggregated_model = getattr(video_level_models,
                               FLAGS.video_level_classifier_model)
    return aggregated_model().create_model(
        model_input=avg_pooled,
        vocab_size=vocab_size,
        num_mixtures=2,
        **unused_params) 
Example #3
Source File: xp_frame_level_models.py    From Y8M with Apache License 2.0 4 votes vote down vote up
def create_model(self, model_input, vocab_size, num_frames,
                   l2_penalty=1e-4, **unused_params):
    """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
#    num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
#    feature_size = model_input.get_shape().as_list()[2]
#        
#    logging.info('model_input shape: {}'.format(
#            model_input.get_shape().as_list()))
#
#    denominators = tf.reshape(
#        tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
#    avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators
    
    avg_pooled = utils.FramePooling(model_input, 'average')
    
    logging.info( 'avg_pooled shape: {}'.format(
            avg_pooled.get_shape().as_list() )) 
    
    aggregated_model = getattr(video_level_models,
                               FLAGS.video_level_classifier_model)
    return aggregated_model().create_model(
        model_input=avg_pooled,
        vocab_size=vocab_size,
        num_mixtures=2,
        **unused_params)