Source code for teras._src.activations

from keras import ops, random
from teras._src.api_export import teras_export


# =========================== GLU ===========================
[docs] @teras_export("teras.activations.glu") def glu(logits, axis: int = -1): """ Generalized linear unit nonlinear activation. Args: logits: `Tensor`, tensor of logits. axis: `int`, axis along which to apply glu activation. Defaults to -1. """ x, gates = ops.split(logits, indices_or_sections=2, axis=axis) return x * ops.sigmoid(gates)
# =========================== GEGLU ===========================
[docs] @teras_export("teras.activations.geglu") def geglu(logits, axis: int = -1): """ GeGLU is an activation function which is a variant of GLU Args: logits: `Tensor`, tensor of logits. axis: `int`, axis along which to apply geglu activation. Defaults to -1. """ x, gates = ops.split(logits, indices_or_sections=2, axis=axis) return x * ops.gelu(gates)
# =========================== SPARSEMAX ===========================
[docs] @teras_export("teras.activations.sparsemax") def sparsemax(logits, axis: int = -1): """ Sparsemax activation function as proposed by T. Martins et al. in the paper, "From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label Classification" Reference(s): https://arxiv.org/abs/1602.02068 Args: logits: `Tensor`, tensor of logits. axis: `int`, axis along which to apply the sparsemax activation. Defaults to -1. """ K = ops.shape(logits)[-1] idx = ops.expand_dims(ops.arange(1, K+1, dtype=logits.dtype), 0) z_sorted, _ = ops.top_k(logits, k=K) z_cumsum = ops.cumsum(z_sorted, axis=axis, dtype=logits.dtype) kz = ops.sum(1 + (idx * z_sorted) > z_cumsum, axis=axis, keepdims=True) # subtract 1 from kz to bring indices in range [0, K), # instead of (0, K] selective_cumsum = ops.take_along_axis(z_cumsum, indices=kz - 1, axis=axis) threshold = (selective_cumsum - 1) / ops.cast(kz, selective_cumsum.dtype) logits_sub_threshold = logits - threshold p = ops.relu(logits_sub_threshold) return p
# =========================== Gumbel Softmax ===========================
[docs] @teras_export("teras.activations.gumbel_softmax") def gumbel_softmax(logits, temperature: float = 0.2, hard: bool = False, seed: int = None): """ Implementation of the Gumbel Softmax activation function proposed by Eric Jang et al. in the paper Categorical Reparameterization with Gumbel-Softmax Reference(s): https://arxiv.org/abs/1611.01144 Args: logits: `Tensor` Input tensor of logits. temperature: `float`, default 0.2, Controls the sharpness or smoothness of the resulting probability distribution. A higher temperature value leads to a smoother and more uniform probability distribution. Conversely, a lower temperature value makes the distribution concentrated around the category with the highest probability. hard: `bool`, default `False`, Whether to return soft probabilities or hard one hot vectors. seed: int, seed to use for random sampling. """ u = random.uniform(ops.shape(logits), minval=0, maxval=1, seed=seed) gumbels = -ops.log(-ops.log(u)) perturbed_logits = (logits + gumbels) / temperature probabilities = ops.nn.softmax(perturbed_logits) if hard: one_hot_labels = ops.one_hot(ops.argmax(probabilities, axis=-1), ops.shape(logits)[-1]) return one_hot_labels return probabilities