# -*- coding: utf-8 -*- """ @Author: Shaoweihua.Liu @Contact: liushaoweihua@126.com @Site: github.com/liushaoweihua @File: models.py @Time: 2020/3/3 10:37 AM """ # Some codes come from <bert4keras>: # Author: Jianlin Su # Github: https://github.com/bojone/bert4keras # Site: kexue.fm # Version: 0.2.5 from __future__ import absolute_import from __future__ import division from __future__ import print_function import keras import numpy as np import tensorflow as tf import keras.backend as K from keras_contrib.layers import CRF from .bert import * def set_gelu(version): """设置gelu版本 """ version = version.lower() assert version in ["erf", "tanh"], "gelu version must be erf or tanh" if version == "erf": keras.utils.get_custom_objects()["gelu"] = gelu_erf else: keras.utils.get_custom_objects()["gelu"] = gelu_tanh def gelu_erf(x): """基于Erf直接计算的gelu函数 """ return 0.5 * x * (1.0 + tf.math.erf(x / np.sqrt(2.0))) def gelu_tanh(x): """基于Tanh近似计算的gelu函数 """ cdf = 0.5 * (1.0 + K.tanh( (np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3))))) return x * cdf set_gelu("tanh") class NerBaseModel: """Bert Ner模型基础类 """ def __init__(self, bert_config, bert_checkpoint, albert, max_len, numb_tags, dropout_rate): self.bert_config = bert_config self.bert_checkpoint = bert_checkpoint self.albert = albert self.max_len = max_len self.numb_tags = numb_tags self.dropout_rate = dropout_rate self._build_bert_model() def _build_bert_model(self): """加载bert模型 """ self.bert_model = build_bert_model( self.bert_config, self.bert_checkpoint, albert=self.albert) for l in self.bert_model.layers: l.trainable = True def build(self): """Ner模型 """ x_in = Input(shape=(self.max_len,), name="Origin-Input-Token") s_in = Input(shape=(self.max_len,), name="Origin-Input-Segment") x = self.bert_model([x_in, s_in]) x = Lambda(lambda X: X[:, 1:], name="Ignore-CLS")(x) x = self._task_layers(x) x = CRF(self.numb_tags, sparse_target=True, name="CRF")(x) model = Model([x_in, s_in], x) return model def _task_layers(self, layer): """下游网络层 """ raise NotImplementedError class NerCnnModel(NerBaseModel): """Bert Ner模型 + Cnn下游模型 """ def __init__(self, filters, kernel_size, blocks, *args, **kwargs): super(NerCnnModel, self).__init__(*args, **kwargs) self.filters = filters self.kernel_size = kernel_size self.blocks = blocks def _task_layers(self, layer): def dilation_conv1d(dilation_rate, name): return Conv1D(self.filters, self.kernel_size, padding="same", dilation_rate=dilation_rate, name=name) def idcnn_block(name): return [dilation_conv1d(1, name + "1"), dilation_conv1d(1, name + "2"), dilation_conv1d(2, name + "3")] stack_layers = [] for layer_idx in range(self.blocks): name = "Idcnn-Block-%s-Layer-" % layer_idx idcnns = idcnn_block(name) cnn = idcnns[0](layer) cnn = idcnns[1](cnn) cnn = idcnns[2](cnn) stack_layers.append(cnn) stack_layers = concatenate(stack_layers, axis=-1) return stack_layers class NerRnnModel(NerBaseModel): """Bert Ner模型 + Rnn下游模型 """ def __init__(self, cell_type, units, num_hidden_layers, *args, **kwargs): super(NerRnnModel, self).__init__(*args, **kwargs) self.cell_type = cell_type.lower() allowed_cell_type = ["lstm", "gru"] assert self.cell_type in allowed_cell_type, "cell_type must be one of %s" % allowed_cell_type self.units = units self.num_hidden_layers = num_hidden_layers def _task_layers(self, layer): if self.cell_type == "lstm": cell = LSTM cell_name = "Lstm" elif self.cell_type == "gru": cell = GRU cell_name = "Gru" else: raise ValueError("cell_type should be 'lstm' or 'gru'.") rnn = layer for layer_idx in range(self.num_hidden_layers): name = cell_name + "-%s" % layer_idx rnn = Bidirectional( cell(units=self.units, return_sequences=True, recurrent_dropout=self.dropout_rate), name=name)(rnn) return rnn