"""Encoder for sentence classification with 1D convolutions and max-pooling.""" from typing import Dict, List, Tuple from typeguard import check_argument_types import tensorflow as tf from neuralmonkey.dataset import Dataset from neuralmonkey.decorators import tensor from neuralmonkey.model.feedable import FeedDict from neuralmonkey.model.parameterized import InitializerSpecs from neuralmonkey.model.model_part import ModelPart from neuralmonkey.model.stateful import Stateful from neuralmonkey.nn.utils import dropout from neuralmonkey.vocabulary import Vocabulary, pad_batch, sentence_mask from neuralmonkey.tf_utils import get_variable class SequenceCNNEncoder(ModelPart, Stateful): """Encoder processing a sequence using a CNN.""" # pylint: disable=too-many-arguments def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, filters: List[Tuple[int, int]], max_input_len: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of the CNN sequence encoder. Based on: Yoon Kim: Convolutional Neural Networks for Sentence Classification (http://emnlp2014.org/papers/pdf/EMNLP2014181.pdf) Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word filters: Specification of CNN filters. It is a list of tuples specifying the filter size and number of channels. dropout_keep_prob: The dropout keep probability (default 1.0) """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.dropout_keep_prob = dropout_keep_prob self.filters = filters @property def input_types(self) -> Dict[str, tf.DType]: return {self.data_id: tf.string} @property def input_shapes(self) -> Dict[str, tf.TensorShape]: return {self.data_id: tf.TensorShape([None, None])} @tensor def inputs(self) -> tf.Tensor: return self.vocabulary.strings_to_indices(self.input_tokens) @tensor def input_tokens(self) -> tf.Tensor: return self.dataset[self.data_id] @tensor def input_mask(self) -> tf.Tensor: return sentence_mask(self.inputs) @tensor def embedded_inputs(self) -> tf.Tensor: with tf.variable_scope("input_projection"): embedding_matrix = get_variable( "word_embeddings", [len(self.vocabulary), self.embedding_size], initializer=tf.variance_scaling_initializer( mode="fan_avg", distribution="uniform")) return dropout( tf.nn.embedding_lookup(embedding_matrix, self.inputs), self.dropout_keep_prob, self.train_mode) @tensor def output(self) -> tf.Tensor: pooled_outputs = [] for filter_size, num_filters in self.filters: with tf.variable_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, self.embedding_size, num_filters] w_filter = get_variable( "conv_W", filter_shape, initializer=tf.variance_scaling_initializer( mode="fan_avg", distribution="uniform")) b_filter = get_variable( "conv_bias", [num_filters], initializer=tf.zeros_initializer()) conv = tf.nn.conv1d( self.embedded_inputs, w_filter, stride=1, padding="VALID", name="conv") # Apply nonlinearity conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter)) # Max-pooling over the outputs pooled = tf.reduce_max(conv_relu, 1) pooled_outputs.append(pooled) # Combine all the pooled features return tf.concat(pooled_outputs, axis=1) def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary with the encoder inputs. Arguments: dataset: The dataset to use train: Boolean flag telling whether it is training time """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.get_series(self.data_id) fd[self.input_tokens] = pad_batch(list(sentences), self.max_input_len) return fd