from keras.engine import Layer from keras import backend as K class Position_Embedding(Layer): """ Computes sequence position information for Attention based models https://github.com/bojone/attention/blob/master/attention_keras.py # Arguments: A tensor with shape (batch_size, seq_len, word_size) # Returns: A position tensor with shape (batch_size, seq_len, position_size) """ def __init__(self, size=None, mode='sum', **kwargs): self.size = size # 必须为偶数 self.mode = mode super(Position_Embedding, self).__init__(**kwargs) def call(self, x): if (self.size is None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32' ) / self.size) position_j = K.expand_dims(position_j, 0) # K.arange不支持变长,只好用这种方法生成 position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate( [K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2) def compute_output_shape(self, input_shape): if self.mode == 'sum': return input_shape elif self.mode == 'concat': return (input_shape[0], input_shape[1], input_shape[2] + self.size)