diff --git a/activation/activation_kernels.cu b/activation/activation_kernels.cu
index 55e6596797010403c8f2d8cc4d2ebbcae1c75d7e..06311945aa17ce594a7cded8eb62b1534f18b90c 100644
--- a/activation/activation_kernels.cu
+++ b/activation/activation_kernels.cu
@@ -44,7 +44,7 @@ __device__ __forceinline__ T gelu_kernel(const T& x) {
   // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
   const float f = (float)x;
   constexpr float ALPHA = M_SQRT1_2;
-  return (T)(f * 0.5f * (1.0f + ::erf(f * ALPHA)));
+  return (T)(f * 0.5f * (1.0f + erf(f * ALPHA)));
 }
 
 template <typename T>
@@ -183,6 +183,7 @@ __global__ void activation_kernel(
 
 namespace vllm {
 
+
 template <typename T>
 __device__ __forceinline__ T gelu_new_kernel(const T& x) {
   const float x3 = (float)(x * x * x);
@@ -223,3 +224,21 @@ void gelu_quick(torch::Tensor& out,    // [..., d]
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel);
 }
+
+void gelu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::gelu_kernel);
+}
+
+void gelu_tanh(torch::Tensor& out,    // [..., d]
+               torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::gelu_tanh_kernel);
+}
+
+void silu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::silu_kernel);
+}
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
index 5155b241dff8af4302230c3ae23518cb41efa185..3a6358b82d007fa92ac419a82b73a371a184992c 100644
Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
index 53b5508fec27cd0ece00b9b018694ba8da40c5ba..aa07da5459427811e64acc67e85be6a1a5d8109d 100644
Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
index 7752cad4c2a06746b1a68c3637c7baef00bb5ddc..09398aaf4f3214cbf0c6b079dc7c7f6d2c12e109 100644
Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..707666b73feb1d1a677d21840923c0146c316f66
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:618cdba5f19eabc1f9c1d33e130ef03ab1b11b52f1e7b00b73f2a10d5cf1e62f
+size 2773664
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
deleted file mode 100755
index 7d5463c37b3f4a3dec8b15df1a13168019fb26e3..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332
-size 2546984
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_20250917153858::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
index 4b1fcc2dcde514cab92d358380824ca24616cd0b..0c4d3787b1aeba2c506fc491aaa28cbb5dbf9ac6 100644
Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
index 665e89cb27b58c9caff761de28b7f6574cc2140e..3aed458254d1ebba49b19df3d2984ea7ce30556f 100644
Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
index 4602c567b14a674c4a56d0e1cf8ef073fbc50beb..4fe6da8188a01106d53124e5bcb3b53d1dc0e509 100644
Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..b1d622e9f768e1d07dc670ad89deb0de15a8a46a
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87ee9280b670b3323378c17d75ee7082f419987a568769fe8479bf509ee6c245
+size 2852232
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
deleted file mode 100755
index 94c38d99b9593469317fe894be35b069017b493e..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699
-size 2621536
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_20250917153858::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
index fe2206ed48c6e6b877620ac3db87af6ee49ddf07..09ba7d3df59ba0e6bb6f28483d8d9d066e736296 100644
Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
index 6a940427d39d1a12a0806315d03b02bdfed65a3d..76b49d8e1d63e6bc3eab559ae97d3dd57281a675 100644
Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
index 725246ac4c8d6c4374d8250ea67f759a871b1c38..13146c78d42a18877fe1041ac8469d766158775e 100644
Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..9830157016a530b7cfeac9d15d361a7c2cffeffd
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28ca9a3e35c49ae49694d7c6c77f85f3664622cad9c857bf13dfbf3bc144ae1b
+size 4127912
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
deleted file mode 100755
index e5c17e44367c005d1c9f8d6b391be8d49079b2fc..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899
-size 3565432
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_20250917153858::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
index 5263d294bc5bc421b98d31436c896bbc244d0771..9b1754cfdb6ad5edfe73ae99dcd829df47bbbe92 100644
Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
index fb753a567265e3db8b71afceb9a4442139a6aea7..cb5b93c070c1bc3449aeddfd7bc67f3e73ce0671 100644
Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
index 6dd25df0a6c63b7315d2c0d9f4b3894ff1626fc8..d3c18f3d02cc0af239075a590f1f1232c7bb61f8 100644
Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..2ffd19a1b43e107e6703a009dfa85619524754b9
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fcd47dd765bba10bb09f65388f6c1b357b117b2611c17bae5bf8214499a9e39
+size 2837224
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
deleted file mode 100755
index 40900ff2070ff72eb665fdd5fd78f12d3a287cd9..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c88e87951b92ea55313ef79a34d284cb2a23713d3bdafee735caa4fc955b9dcb
-size 2610616
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py
index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_20250917153858::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
index aedb284c8147a243ebfc99ec94000b62ae672077..a7fd63365a953f7804b2a89b5dda50cd506a0fdc 100644
Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
index 7ae3e6d861e600db32e9024ae7db059642f35a3f..fe47bb82e8371e3dba3018517aec31b669970d04 100644
Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
index 51baab3cf4e592a2b8bed4cea0e9228a559b399d..232694fed7e1ea130e0cfcb18f219a62a996c206 100644
Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..a86c4c4db41ceacc50bb8a05ab438c747a8ef0ab
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e6d88c71eebabc842f6a566de7cfaf24d3d90a30572eae584a3b51dcb7e838e
+size 4117000
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
deleted file mode 100755
index 8b1ece63bdec0e63013816dae6bce9a87068f88e..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cf784c7ab178c476fc6268efe820b1948c7c5b8f049c046c851b03067da5dd59
-size 3558616
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py
index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_20250917153858::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc
index 01d30fced2b5392d0f6f4e6454cbe7d782a14daa..ed1db9c86882966d57ed36a0ed55bc4b2ca19321 100644
Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc
index 75b0e5f83e10b053d8584f2607d9a9f3009d45dc..5241c54af2fe7946d1a0fd85a475d0d3ca40a4cf 100644
Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc
index d6ed035d206ae523160771021be45010f234687e..f6d111cf4f598453f07c754bf3bce7d50cafbff8 100644
Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..56bc6e0d6cb4f9b4e7260eab9be147746e14bd98
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3c1b86db31b04bd5fe75b0c9d6915ba2766a2456ea9bd1a20f2d75c4b1acf35
+size 4154880
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
deleted file mode 100755
index 33fb245664d9daef5b07440b390db2c19ef404f1..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e7cca3169eea8cbd67c61706d102548e49aadc936f8c2943efef3e7c4c0ee0d
-size 3592400
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py
index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_20250917153858::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2adb755bcf665d377c47ff2914a4c8f8dea13e6b
Binary files /dev/null and b/tests/__pycache__/__init__.cpython-312.pyc differ
diff --git a/tests/kernels/__pycache__/__init__.cpython-312.pyc b/tests/kernels/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eea82760251f78a09024315b7812a7ace2e92f8d
Binary files /dev/null and b/tests/kernels/__pycache__/__init__.cpython-312.pyc differ
diff --git a/tests/kernels/__pycache__/allclose_default.cpython-312.pyc b/tests/kernels/__pycache__/allclose_default.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..016b17f53666a29d0c0592fd9f3dbad5442646c1
Binary files /dev/null and b/tests/kernels/__pycache__/allclose_default.cpython-312.pyc differ
diff --git a/tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc b/tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4011a3a634917ac91ff8eb7e3ba196e038bc8fe5
Binary files /dev/null and b/tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc differ
diff --git a/tests/kernels/__pycache__/utils.cpython-312.pyc b/tests/kernels/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..326f9eb51bbf99ea441bd87d0911e2db67f55aa6
Binary files /dev/null and b/tests/kernels/__pycache__/utils.cpython-312.pyc differ
diff --git a/tests/kernels/test_activation.py b/tests/kernels/test_activation.py
index 740f6837597943625d18c4d714bda3a35958c747..684783db54e696d869e691fc12d58434f47cacc8 100644
--- a/tests/kernels/test_activation.py
+++ b/tests/kernels/test_activation.py
@@ -55,6 +55,14 @@ def gelu_and_mul(x: torch.Tensor, approximate: str) -> torch.Tensor:
     d = x.shape[-1] // 2
     return F.gelu(x[..., :d], approximate=approximate) * x[..., d:]
 
+def gelu(x: torch.Tensor) -> torch.Tensor:
+    return F.gelu(x)
+
+def gelu_tanh(x: torch.Tensor) -> torch.Tensor:
+    return F.gelu(x, approximate="tanh")
+
+def silu(x: torch.Tensor) -> torch.Tensor:
+    return F.silu(x)
 
 @pytest.mark.parametrize(
     "activation_name", ["silu_and_mul", "mul_and_silu", "gelu", "gelu_tanh", "fatrelu"]
@@ -145,6 +153,24 @@ def test_act_and_mul(
             activation.ops.gelu_quick,
             activation.layers.QuickGELU,
         ),
+        (
+            gelu_tanh,
+            activation.gelu_tanh,
+            activation.ops.gelu_tanh,
+            activation.layers.GeluTanh,
+        ),
+        (
+            silu,
+            activation.silu,
+            activation.ops.silu,
+            activation.layers.Silu,
+        ),
+        (
+            gelu, 
+            activation.gelu, 
+            activation.ops.gelu, 
+            activation.layers.Gelu
+        ),
     ],
 )
 @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
diff --git a/torch-ext/activation/__init__.py b/torch-ext/activation/__init__.py
index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644
--- a/torch-ext/activation/__init__.py
+++ b/torch-ext/activation/__init__.py
@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 
 
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+
+
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
+
+
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]
diff --git a/torch-ext/activation/layers.py b/torch-ext/activation/layers.py
index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644
--- a/torch-ext/activation/layers.py
+++ b/torch-ext/activation/layers.py
@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 
+class Silu(nn.Module):
+    """An activation function for SiLU.
+
+    The function computes x -> silu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+
+class Gelu(nn.Module):
+    """An activation function for GELU.
+
+    The function computes x -> gelu(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+
+    The function computes x -> gelu_tanh(x).
+
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+
+    can_torch_compile: bool = True
+
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
+
 
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.
diff --git a/torch-ext/torch_binding.cpp b/torch-ext/torch_binding.cpp
index 321568290bf3b5d9d0eaa2dc9a98ae8111c34859..5da0bdfac63ed966be39618a714906b4e9ecc6e1 100644
--- a/torch-ext/torch_binding.cpp
+++ b/torch-ext/torch_binding.cpp
@@ -35,6 +35,18 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   // Quick GELU implementation.
   ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
   ops.impl("gelu_quick", torch::kCUDA, &gelu_quick);
+
+  // GELU with `tanh` approximation.
+  ops.def("gelu_tanh(Tensor! out, Tensor input) -> ()");
+  ops.impl("gelu_tanh", torch::kCUDA, &gelu_tanh);
+
+  // SiLU implementation.
+  ops.def("silu(Tensor! out, Tensor input) -> ()");
+  ops.impl("silu", torch::kCUDA, &silu);
+
+  // GELU with none approximation.
+  ops.def("gelu(Tensor! out, Tensor input) -> ()");
+  ops.impl("gelu", torch::kCUDA, &gelu);
 }
 
 REGISTER_EXTENSION(TORCH_EXTENSION_NAME)
diff --git a/torch-ext/torch_binding.h b/torch-ext/torch_binding.h
index 3d7e28ae62da83fb2c18131f28a2e6d37878b8f5..98f1234ea5e78b6d2b964efbd438d5b267594f52 100644
--- a/torch-ext/torch_binding.h
+++ b/torch-ext/torch_binding.h
@@ -18,3 +18,9 @@ void gelu_new(torch::Tensor &out, torch::Tensor &input);
 void gelu_fast(torch::Tensor &out, torch::Tensor &input);
 
 void gelu_quick(torch::Tensor &out, torch::Tensor &input);
+
+void gelu_tanh(torch::Tensor &out, torch::Tensor &input);
+
+void silu(torch::Tensor &out, torch::Tensor &input);
+
+void gelu(torch::Tensor &out, torch::Tensor &input);
\ No newline at end of file