[Mlir-commits] [mlir] [MLIR][NVVM] Add `nvvm.cos` OP (PR #193792)
Guray Ozen
llvmlistbot at llvm.org
Thu Apr 23 09:17:55 PDT 2026
https://github.com/grypp created https://github.com/llvm/llvm-project/pull/193792
Implement `nvvm.cos` with ftz flag
>From f883c2338c5a05d9a73cf3a25f322a814f971468 Mon Sep 17 00:00:00 2001
From: Guray Ozen <gozen at nvidia.com>
Date: Thu, 23 Apr 2026 17:59:06 +0200
Subject: [PATCH] [MLIR][NVVM] Add `nvvm.cos` OP
Implement `nvvm.cos` with ftz flag
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 29 +++++++++++++++++++
.../Dialect/LLVMIR/nvvm-transcendentals.mlir | 15 ++++++++++
.../Target/LLVMIR/nvvm/transcendentals.mlir | 15 ++++++++++
3 files changed, 59 insertions(+)
create mode 100644 mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index c892ee18166f2..360896db863c6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -518,6 +518,35 @@ def NVVM_RcpApproxFtzF32Op : NVVM_IntrOp<"rcp.approx.ftz.f", [Pure], 1> {
let assemblyFormat = "$arg attr-dict `:` type($res)";
}
+//===----------------------------------------------------------------------===//
+// NVVM transcendental op definitions
+//===----------------------------------------------------------------------===//
+// PTX provides only fast approximations for sin/cos/lg2/ex2/tanh. Exposing
+// these as plain `nvvm.{sin,cos,lg2,ex2,tanh}` (without a `.approx` suffix)
+// keeps the NVVM dialect self-contained -- users don't need to reach for
+// `math.*` ops for something that already has a PTX instruction.
+
+def NVVM_CosOp : NVVM_Op<"cos", [Pure, SameOperandsAndResultType]> {
+ let summary = "Cosine (fast approximation)";
+ let description = [{
+ Computes a fast approximation of the cosine of the input value (in
+ radians). Lowers to PTX `cos.approx{.ftz}.f32`. The `ftz` attribute,
+ when set, flushes subnormal inputs and results to sign-preserving zero.
+
+ For more information, see PTX ISA:
+ [cos](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-cos)
+ }];
+ let arguments = (ins F32:$src,
+ DefaultValuedAttr<BoolAttr, "false">:$ftz);
+ let results = (outs F32:$res);
+ let assemblyFormat = "$src attr-dict `:` type($src)";
+ string llvmBuilder = [{
+ unsigned IID = $ftz ? llvm::Intrinsic::nvvm_cos_approx_ftz_f
+ : llvm::Intrinsic::nvvm_cos_approx_f;
+ $res = createIntrinsicCall(builder, IID, {$src});
+ }];
+}
+
//===----------------------------------------------------------------------===//
// NVVM redux op definitions
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir b/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir
new file mode 100644
index 0000000000000..a30171dacb4f3
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/nvvm-transcendentals.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @nvvm_cos_f32
+func.func @nvvm_cos_f32(%arg0: f32) -> f32 {
+ // CHECK: nvvm.cos {{.*}} : f32
+ %0 = nvvm.cos %arg0 : f32
+ return %0 : f32
+}
+
+// CHECK-LABEL: @nvvm_cos_ftz_f32
+func.func @nvvm_cos_ftz_f32(%arg0: f32) -> f32 {
+ // CHECK: nvvm.cos {{.*}} {ftz = true} : f32
+ %0 = nvvm.cos %arg0 {ftz = true} : f32
+ return %0 : f32
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir b/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir
new file mode 100644
index 0000000000000..e11dab54cbb67
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/transcendentals.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: @nvvm_cos
+llvm.func @nvvm_cos(%arg0: f32) -> f32 {
+ // CHECK: call float @llvm.nvvm.cos.approx.f(float %{{.*}})
+ %0 = nvvm.cos %arg0 : f32
+ llvm.return %0 : f32
+}
+
+// CHECK-LABEL: @nvvm_cos_ftz
+llvm.func @nvvm_cos_ftz(%arg0: f32) -> f32 {
+ // CHECK: call float @llvm.nvvm.cos.approx.ftz.f(float %{{.*}})
+ %0 = nvvm.cos %arg0 {ftz = true} : f32
+ llvm.return %0 : f32
+}
More information about the Mlir-commits
mailing list