diff --git a/activation/activation_kernels.cu b/activation/activation_kernels.cu index 55e6596797010403c8f2d8cc4d2ebbcae1c75d7e..06311945aa17ce594a7cded8eb62b1534f18b90c 100644 --- a/activation/activation_kernels.cu +++ b/activation/activation_kernels.cu @@ -44,7 +44,7 @@ __device__ __forceinline__ T gelu_kernel(const T& x) { // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38 const float f = (float)x; constexpr float ALPHA = M_SQRT1_2; - return (T)(f * 0.5f * (1.0f + ::erf(f * ALPHA))); + return (T)(f * 0.5f * (1.0f + erf(f * ALPHA))); } template @@ -183,6 +183,7 @@ __global__ void activation_kernel( namespace vllm { + template __device__ __forceinline__ T gelu_new_kernel(const T& x) { const float x3 = (float)(x * x * x); @@ -223,3 +224,21 @@ void gelu_quick(torch::Tensor& out, // [..., d] { LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel); } + +void gelu(torch::Tensor& out, // [..., d] + torch::Tensor& input) // [..., d] +{ + LAUNCH_ACTIVATION_KERNEL(vllm::gelu_kernel); +} + +void gelu_tanh(torch::Tensor& out, // [..., d] + torch::Tensor& input) // [..., d] +{ + LAUNCH_ACTIVATION_KERNEL(vllm::gelu_tanh_kernel); +} + +void silu(torch::Tensor& out, // [..., d] + torch::Tensor& input) // [..., d] +{ + LAUNCH_ACTIVATION_KERNEL(vllm::silu_kernel); +} \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 5155b241dff8af4302230c3ae23518cb41efa185..3a6358b82d007fa92ac419a82b73a371a184992c 100644 Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 53b5508fec27cd0ece00b9b018694ba8da40c5ba..aa07da5459427811e64acc67e85be6a1a5d8109d 100644 Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 7752cad4c2a06746b1a68c3637c7baef00bb5ddc..09398aaf4f3214cbf0c6b079dc7c7f6d2c12e109 100644 Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..707666b73feb1d1a677d21840923c0146c316f66 --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250917153858.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618cdba5f19eabc1f9c1d33e130ef03ab1b11b52f1e7b00b73f2a10d5cf1e62f +size 2773664 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so deleted file mode 100755 index 7d5463c37b3f4a3dec8b15df1a13168019fb26e3..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332 -size 2546984 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_be5bedb_dirty -ops = torch.ops._activation_be5bedb_dirty +from . import _activation_20250917153858 +ops = torch.ops._activation_20250917153858 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file + return f"_activation_20250917153858::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 4b1fcc2dcde514cab92d358380824ca24616cd0b..0c4d3787b1aeba2c506fc491aaa28cbb5dbf9ac6 100644 Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 665e89cb27b58c9caff761de28b7f6574cc2140e..3aed458254d1ebba49b19df3d2984ea7ce30556f 100644 Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 4602c567b14a674c4a56d0e1cf8ef073fbc50beb..4fe6da8188a01106d53124e5bcb3b53d1dc0e509 100644 Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..b1d622e9f768e1d07dc670ad89deb0de15a8a46a --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ee9280b670b3323378c17d75ee7082f419987a568769fe8479bf509ee6c245 +size 2852232 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so deleted file mode 100755 index 94c38d99b9593469317fe894be35b069017b493e..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699 -size 2621536 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_be5bedb_dirty -ops = torch.ops._activation_be5bedb_dirty +from . import _activation_20250917153858 +ops = torch.ops._activation_20250917153858 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file + return f"_activation_20250917153858::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index fe2206ed48c6e6b877620ac3db87af6ee49ddf07..09ba7d3df59ba0e6bb6f28483d8d9d066e736296 100644 Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 6a940427d39d1a12a0806315d03b02bdfed65a3d..76b49d8e1d63e6bc3eab559ae97d3dd57281a675 100644 Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 725246ac4c8d6c4374d8250ea67f759a871b1c38..13146c78d42a18877fe1041ac8469d766158775e 100644 Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..9830157016a530b7cfeac9d15d361a7c2cffeffd --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ca9a3e35c49ae49694d7c6c77f85f3664622cad9c857bf13dfbf3bc144ae1b +size 4127912 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so deleted file mode 100755 index e5c17e44367c005d1c9f8d6b391be8d49079b2fc..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899 -size 3565432 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_be5bedb_dirty -ops = torch.ops._activation_be5bedb_dirty +from . import _activation_20250917153858 +ops = torch.ops._activation_20250917153858 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file + return f"_activation_20250917153858::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 5263d294bc5bc421b98d31436c896bbc244d0771..9b1754cfdb6ad5edfe73ae99dcd829df47bbbe92 100644 Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index fb753a567265e3db8b71afceb9a4442139a6aea7..cb5b93c070c1bc3449aeddfd7bc67f3e73ce0671 100644 Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 6dd25df0a6c63b7315d2c0d9f4b3894ff1626fc8..d3c18f3d02cc0af239075a590f1f1232c7bb61f8 100644 Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..2ffd19a1b43e107e6703a009dfa85619524754b9 --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250917153858.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fcd47dd765bba10bb09f65388f6c1b357b117b2611c17bae5bf8214499a9e39 +size 2837224 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so deleted file mode 100755 index 40900ff2070ff72eb665fdd5fd78f12d3a287cd9..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c88e87951b92ea55313ef79a34d284cb2a23713d3bdafee735caa4fc955b9dcb -size 2610616 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644 --- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_be5bedb_dirty -ops = torch.ops._activation_be5bedb_dirty +from . import _activation_20250917153858 +ops = torch.ops._activation_20250917153858 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file + return f"_activation_20250917153858::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py +++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index aedb284c8147a243ebfc99ec94000b62ae672077..a7fd63365a953f7804b2a89b5dda50cd506a0fdc 100644 Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 7ae3e6d861e600db32e9024ae7db059642f35a3f..fe47bb82e8371e3dba3018517aec31b669970d04 100644 Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index 51baab3cf4e592a2b8bed4cea0e9228a559b399d..232694fed7e1ea130e0cfcb18f219a62a996c206 100644 Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..a86c4c4db41ceacc50bb8a05ab438c747a8ef0ab --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6d88c71eebabc842f6a566de7cfaf24d3d90a30572eae584a3b51dcb7e838e +size 4117000 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so deleted file mode 100755 index 8b1ece63bdec0e63013816dae6bce9a87068f88e..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf784c7ab178c476fc6268efe820b1948c7c5b8f049c046c851b03067da5dd59 -size 3558616 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644 --- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_be5bedb_dirty -ops = torch.ops._activation_be5bedb_dirty +from . import _activation_20250917153858 +ops = torch.ops._activation_20250917153858 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file + return f"_activation_20250917153858::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py +++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc index 01d30fced2b5392d0f6f4e6454cbe7d782a14daa..ed1db9c86882966d57ed36a0ed55bc4b2ca19321 100644 Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc index 75b0e5f83e10b053d8584f2607d9a9f3009d45dc..5241c54af2fe7946d1a0fd85a475d0d3ca40a4cf 100644 Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc index d6ed035d206ae523160771021be45010f234687e..f6d111cf4f598453f07c754bf3bce7d50cafbff8 100644 Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..56bc6e0d6cb4f9b4e7260eab9be147746e14bd98 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3c1b86db31b04bd5fe75b0c9d6915ba2766a2456ea9bd1a20f2d75c4b1acf35 +size 4154880 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so deleted file mode 100755 index 33fb245664d9daef5b07440b390db2c19ef404f1..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e7cca3169eea8cbd67c61706d102548e49aadc936f8c2943efef3e7c4c0ee0d -size 3592400 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py index 745e06b31cb5b9718d3b85236f4cc257459070d7..a24764a95a7a5490ca596cd418d5ce2c2591c906 100644 --- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _activation_be5bedb_dirty -ops = torch.ops._activation_be5bedb_dirty +from . import _activation_20250917153858 +ops = torch.ops._activation_20250917153858 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_activation_be5bedb_dirty::{op_name}" \ No newline at end of file + return f"_activation_20250917153858::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py +++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2adb755bcf665d377c47ff2914a4c8f8dea13e6b Binary files /dev/null and b/tests/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/kernels/__pycache__/__init__.cpython-312.pyc b/tests/kernels/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eea82760251f78a09024315b7812a7ace2e92f8d Binary files /dev/null and b/tests/kernels/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/kernels/__pycache__/allclose_default.cpython-312.pyc b/tests/kernels/__pycache__/allclose_default.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..016b17f53666a29d0c0592fd9f3dbad5442646c1 Binary files /dev/null and b/tests/kernels/__pycache__/allclose_default.cpython-312.pyc differ diff --git a/tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc b/tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4011a3a634917ac91ff8eb7e3ba196e038bc8fe5 Binary files /dev/null and b/tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc differ diff --git a/tests/kernels/__pycache__/utils.cpython-312.pyc b/tests/kernels/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..326f9eb51bbf99ea441bd87d0911e2db67f55aa6 Binary files /dev/null and b/tests/kernels/__pycache__/utils.cpython-312.pyc differ diff --git a/tests/kernels/test_activation.py b/tests/kernels/test_activation.py index 740f6837597943625d18c4d714bda3a35958c747..684783db54e696d869e691fc12d58434f47cacc8 100644 --- a/tests/kernels/test_activation.py +++ b/tests/kernels/test_activation.py @@ -55,6 +55,14 @@ def gelu_and_mul(x: torch.Tensor, approximate: str) -> torch.Tensor: d = x.shape[-1] // 2 return F.gelu(x[..., :d], approximate=approximate) * x[..., d:] +def gelu(x: torch.Tensor) -> torch.Tensor: + return F.gelu(x) + +def gelu_tanh(x: torch.Tensor) -> torch.Tensor: + return F.gelu(x, approximate="tanh") + +def silu(x: torch.Tensor) -> torch.Tensor: + return F.silu(x) @pytest.mark.parametrize( "activation_name", ["silu_and_mul", "mul_and_silu", "gelu", "gelu_tanh", "fatrelu"] @@ -145,6 +153,24 @@ def test_act_and_mul( activation.ops.gelu_quick, activation.layers.QuickGELU, ), + ( + gelu_tanh, + activation.gelu_tanh, + activation.ops.gelu_tanh, + activation.layers.GeluTanh, + ), + ( + silu, + activation.silu, + activation.ops.silu, + activation.layers.Silu, + ), + ( + gelu, + activation.gelu, + activation.ops.gelu, + activation.layers.Gelu + ), ], ) @pytest.mark.parametrize("num_tokens", NUM_TOKENS) diff --git a/torch-ext/activation/__init__.py b/torch-ext/activation/__init__.py index 1c4f207354093c6ef83eb5d7f3a5a3b22b95d357..1a9cd15a0a75f95c5ab956fb05c2a9860f218156 100644 --- a/torch-ext/activation/__init__.py +++ b/torch-ext/activation/__init__.py @@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) return out +def gelu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu(out, x) + return out + +def silu(out: torch.Tensor, x: torch.Tensor) -> None: + ops.silu(out, x) + return out + + +def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None: + ops.gelu_tanh(out, x) + return out + + def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None: ops.gelu_fast(out, x) return out @@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None: __all__ = [ "silu_and_mul", + "mul_and_silu", "gelu_and_mul", "gelu_tanh_and_mul", "fatrelu_and_mul", "gelu_fast", "gelu_new", "gelu_quick", + "gelu_tanh", + "silu", + "gelu", "layers", ] diff --git a/torch-ext/activation/layers.py b/torch-ext/activation/layers.py index 45b31181ffb80509a85d729a7f7ee86fc2cf014a..0aec9c95fa75e4d3ff699ce69fc6618798b179c1 100644 --- a/torch-ext/activation/layers.py +++ b/torch-ext/activation/layers.py @@ -23,6 +23,57 @@ class SiluAndMul(nn.Module): ops.silu_and_mul(out, x) return out +class Silu(nn.Module): + """An activation function for SiLU. + + The function computes x -> silu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.silu(out, x) + return out + +class Gelu(nn.Module): + """An activation function for GELU. + + The function computes x -> gelu(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu(out, x) + return out + +class GeluTanh(nn.Module): + """An activation function for GELU with `tanh` approximation. + + The function computes x -> gelu_tanh(x). + + Shapes: + x: (num_tokens, d) or (batch_size, seq_len, d) + return: (num_tokens, d) or (batch_size, seq_len, d) + """ + + can_torch_compile: bool = True + + def forward(self, x: torch.Tensor): + out = torch.empty_like(x) + ops.gelu_tanh(out, x) + return out + class MulAndSilu(nn.Module): """An activation function for SwiGLU. diff --git a/torch-ext/torch_binding.cpp b/torch-ext/torch_binding.cpp index 321568290bf3b5d9d0eaa2dc9a98ae8111c34859..5da0bdfac63ed966be39618a714906b4e9ecc6e1 100644 --- a/torch-ext/torch_binding.cpp +++ b/torch-ext/torch_binding.cpp @@ -35,6 +35,18 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { // Quick GELU implementation. ops.def("gelu_quick(Tensor! out, Tensor input) -> ()"); ops.impl("gelu_quick", torch::kCUDA, &gelu_quick); + + // GELU with `tanh` approximation. + ops.def("gelu_tanh(Tensor! out, Tensor input) -> ()"); + ops.impl("gelu_tanh", torch::kCUDA, &gelu_tanh); + + // SiLU implementation. + ops.def("silu(Tensor! out, Tensor input) -> ()"); + ops.impl("silu", torch::kCUDA, &silu); + + // GELU with none approximation. + ops.def("gelu(Tensor! out, Tensor input) -> ()"); + ops.impl("gelu", torch::kCUDA, &gelu); } REGISTER_EXTENSION(TORCH_EXTENSION_NAME) diff --git a/torch-ext/torch_binding.h b/torch-ext/torch_binding.h index 3d7e28ae62da83fb2c18131f28a2e6d37878b8f5..98f1234ea5e78b6d2b964efbd438d5b267594f52 100644 --- a/torch-ext/torch_binding.h +++ b/torch-ext/torch_binding.h @@ -18,3 +18,9 @@ void gelu_new(torch::Tensor &out, torch::Tensor &input); void gelu_fast(torch::Tensor &out, torch::Tensor &input); void gelu_quick(torch::Tensor &out, torch::Tensor &input); + +void gelu_tanh(torch::Tensor &out, torch::Tensor &input); + +void silu(torch::Tensor &out, torch::Tensor &input); + +void gelu(torch::Tensor &out, torch::Tensor &input); \ No newline at end of file