python source code of pooling

# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
An encoder that pools over embeddings, as described in
https://arxiv.org/abs/1611.02344.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from pydoc import locate

import numpy as np
import tensorflow as tf

from seq2seq.encoders.encoder import Encoder, EncoderOutput


def position_encoding(sentence_size, embedding_size):
  """
  Position Encoding described in section 4.1 of
  End-To-End Memory Networks (https://arxiv.org/abs/1503.08895).

  Args:
    sentence_size: length of the sentence
    embedding_size: dimensionality of the embeddings

  Returns:
    A numpy array of shape [sentence_size, embedding_size] containing
    the fixed position encodings for each sentence position.
  """
  encoding = np.ones((sentence_size, embedding_size), dtype=np.float32)
  ls = sentence_size + 1
  le = embedding_size + 1
  for k in range(1, le):
    for j in range(1, ls):
      encoding[j-1, k-1] = (1.0 - j/float(ls)) - (
          k / float(le)) * (1. - 2. * j/float(ls))
  return encoding


def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen):
  """Creates position embeddings.

  Args:
    embedding_dim: Dimensionality of the embeddings. An integer.
    num_positions: The number of positions to be embedded. For example,
      if you have inputs of length up to 100, this should be 100. An integer.
    lengths: The lengths of the inputs to create position embeddings for.
      An int32 tensor of shape `[batch_size]`.
    maxlen: The maximum length of the input sequence to create position
      embeddings for. An int32 tensor.

  Returns:
    A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains
    embeddings for each position. All elements past `lengths` are zero.
  """
  # Create constant position encodings
  position_encodings = tf.constant(
      position_encoding(num_positions, embedding_dim),
      name="position_encoding")

  # Slice to size of current sequence
  pe_slice = position_encodings[:maxlen, :]
  # Replicate encodings for each element in the batch
  batch_size = tf.shape(lengths)[0]
  pe_batch = tf.tile([pe_slice], [batch_size, 1, 1])

  # Mask out positions that are padded
  positions_mask = tf.sequence_mask(
      lengths=lengths, maxlen=maxlen, dtype=tf.float32)
  positions_embed = pe_batch * tf.expand_dims(positions_mask, 2)

  return positions_embed

class PoolingEncoder(Encoder):
  """An encoder that pools over embeddings, as described in
  https://arxiv.org/abs/1611.02344. The encoder supports optional positions
  embeddings and a configurable pooling window.

  Params:
    dropout_keep_prob: Dropout keep probability applied to the embeddings.
    pooling_fn: The 1-d pooling function to use, e.g.
      `tensorflow.layers.average_pooling1d`.
    pool_size: The pooling window, passed as `pool_size` to
      the pooling function.
    strides: The stride during pooling, passed as `strides`
      the pooling function.
    position_embeddings.enable: If true, add position embeddings to the
      inputs before pooling.
    position_embeddings.combiner_fn: Function used to combine the
      position embeddings with the inputs. For example, `tensorflow.add`.
    position_embeddings.num_positions: Size of the position embedding matrix.
      This should be set to the maximum sequence length of the inputs.
  """

  def __init__(self, params, mode, name="pooling_encoder"):
    super(PoolingEncoder, self).__init__(params, mode, name)
    self._pooling_fn = locate(self.params["pooling_fn"])
    self._combiner_fn = locate(self.params["position_embeddings.combiner_fn"])

  @staticmethod
  def default_params():
    return {
        "dropout_keep_prob": 0.8,
        "pooling_fn": "tensorflow.layers.average_pooling1d",
        "pool_size": 5,
        "strides": 1,
        "position_embeddings.enable": True,
        "position_embeddings.combiner_fn": "tensorflow.multiply",
        "position_embeddings.num_positions": 100,
    }

  def encode(self, inputs, sequence_length):
    if self.params["position_embeddings.enable"]:
      positions_embed = _create_position_embedding(
          embedding_dim=inputs.get_shape().as_list()[-1],
          num_positions=self.params["position_embeddings.num_positions"],
          lengths=sequence_length,
          maxlen=tf.shape(inputs)[1])
      inputs = self._combiner_fn(inputs, positions_embed)

    # Apply dropout
    inputs = tf.contrib.layers.dropout(
        inputs=inputs,
        keep_prob=self.params["dropout_keep_prob"],
        is_training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)

    outputs = self._pooling_fn(
        inputs=inputs,
        pool_size=self.params["pool_size"],
        strides=self.params["strides"],
        padding="SAME")

    # Final state is the average representation of the pooled embeddings
    final_state = tf.reduce_mean(outputs, 1)

    return EncoderOutput(
        outputs=outputs,
        final_state=final_state,
        attention_values=inputs,
        attention_values_length=sequence_length)