from keras.engine import Layer
from keras import backend as K


class Position_Embedding(Layer):
    """
    Computes sequence position information for Attention based models
    https://github.com/bojone/attention/blob/master/attention_keras.py

    # Arguments:
        A tensor with shape (batch_size, seq_len, word_size)
    # Returns:
        A position tensor with shape (batch_size, seq_len, position_size)
    """

    def __init__(self, size=None, mode='sum', **kwargs):
        self.size = size  # 必须为偶数
        self.mode = mode
        super(Position_Embedding, self).__init__(**kwargs)

    def call(self, x):
        if (self.size is None) or (self.mode == 'sum'):
            self.size = int(x.shape[-1])
        batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
        position_j = 1. / K.pow(10000.,
                                2 * K.arange(self.size / 2, dtype='float32'
                                             ) / self.size)
        position_j = K.expand_dims(position_j, 0)
        # K.arange不支持变长,只好用这种方法生成
        position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
        position_i = K.expand_dims(position_i, 2)
        position_ij = K.dot(position_i, position_j)
        position_ij = K.concatenate(
            [K.cos(position_ij), K.sin(position_ij)], 2)
        if self.mode == 'sum':
            return position_ij + x
        elif self.mode == 'concat':
            return K.concatenate([position_ij, x], 2)

    def compute_output_shape(self, input_shape):
        if self.mode == 'sum':
            return input_shape
        elif self.mode == 'concat':
            return (input_shape[0], input_shape[1], input_shape[2] + self.size)