Source code for teras._src.layers.transformer.feedforward

import keras
from teras._src.api_export import teras_export
from teras._src.typing import ActivationType



[docs]
@teras_export("teras.layers.TransformerFeedForward")
class TransformerFeedForward(keras.layers.Layer):
    """
    Transformer Feed Forward layer as proposed in the original
    Transformers architecture, in the paper,"Attention is all you need",
    with a slight addition of optional `Dropout` layer.

    Reference(s):
        https://arxiv.org/abs/1706.03762

    Args:
        embedding_dim: int, dimensionality of embeddings being used in
            the model
        hidden_dim: int, hidden dimensionality to use. By default,
            it is four-times of the `embedding_dim`.
        activation: str or callable, activation function to use for the
            inner linear layer. Defaults to "relu",
        dropout: float, dropout rate to use for the dropout layer
            that is applied in between the two linear layer.
            Defaults to 0., because the original transformer
            architecture doesn't employ a `Dropout` layer.
    """

[docs]
    def __init__(self,
                 embedding_dim: int,
                 hidden_dim: int = None,
                 activation: ActivationType = "relu",
                 dropout: float = 0.,
                 **kwargs):
        super().__init__(**kwargs)
        self.embedding_dim = embedding_dim
        self.hidden_dim = embedding_dim * 4 if hidden_dim is None else hidden_dim
        self.activation = activation
        self.dropout = dropout

        self.inner = keras.layers.Dense(self.hidden_dim,
                                        activation=self.activation,
                                        name="feedforward_inner")
        self.outer = keras.layers.Dense(self.embedding_dim,
                                        name="feedforward_outer")
        self.dropout_layer = keras.layers.Dropout(
                                        self.dropout,
                                        name="feedforward_dropout")


    def build(self, input_shape):
        self.inner.build(input_shape)
        hidden_shape = input_shape[:-1] + (self.hidden_dim,)
        self.outer.build(hidden_shape)

    def call(self, inputs):
        x = self.inner(inputs)
        x = self.dropout_layer(x)
        return self.outer(x)

    def get_config(self):
        config = super().get_config()
        config.update({
            "embedding_dim": self.embedding_dim,
            "hidden_dim": self.hidden_dim,
            "activation": self.activation,
            "dropout": self.dropout
        })
        return config