import tensorflow as tf from .layers import EncoderLayer @tf.keras.utils.register_keras_serializable() class Encoder(tf.keras.layers.Layer): def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, max_tokens, dropout_rate, **kwargs): super(Encoder, self).__init__(**kwargs) self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) self.pos_encoding = self.positional_encoding(max_tokens, d_model) self.enc_layers = [EncoderLayer(d_model, num_heads, dff, dropout_rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(dropout_rate) def call(self, x, training=None, mask=None): seq_len = tf.shape(x)[1] x = self.embedding(x) x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) x += self.pos_encoding[:, :seq_len, :] x = self.dropout(x, training=training) for i in range(self.num_layers): x = self.enc_layers[i](x, training=training, mask=mask) return x def positional_encoding(self, max_len, d_model): angle_rads = self.get_angles(tf.range(max_len, dtype=tf.float32)[:, tf.newaxis], tf.range(d_model, dtype=tf.float32)[tf.newaxis, :], d_model) sines = tf.math.sin(angle_rads[:, 0::2]) cosines = tf.math.cos(angle_rads[:, 1::2]) pos_encoding = tf.concat([sines, cosines], axis=-1) return pos_encoding[tf.newaxis, ...] def get_angles(self, pos, i, d_model): angle_rates = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32)) return pos * angle_rates def get_config(self): config = super().get_config() config.update({ 'num_layers': self.num_layers, 'd_model': self.d_model, 'num_heads': self.num_heads, 'dff': self.dff, 'input_vocab_size': self.embedding.input_dim, 'max_tokens': self.pos_encoding.shape[1], 'dropout_rate': self.dropout.rate }) return config