[Mlir-commits] [mlir] [MLIR][NVVM] Add div Ops (PR #198744)
Varad Rahul Kamthe
llvmlistbot at llvm.org
Wed May 20 07:03:26 PDT 2026
https://github.com/varadk27 updated https://github.com/llvm/llvm-project/pull/198744
>From e34c0f46a4994fe35e9f111fff569d89fe1db5ff Mon Sep 17 00:00:00 2001
From: Varad Rahul Kamthe <vkamthe at nvidia.com>
Date: Wed, 20 May 2026 09:48:42 +0000
Subject: [PATCH 1/2] Add div Ops
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 56 +++++++++++++++
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 71 +++++++++++++++++++
mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir | 57 +++++++++++++++
.../Target/LLVMIR/nvvm/divf/divf_invalid.mlir | 17 +++++
4 files changed, 201 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 030c33526b16a..7931d4b9e07cb 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6579,6 +6579,62 @@ def NVVM_SqrtApproxOp : NVVM_F32UnaryApproxOp<"sqrt.approx"> {
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM div op definitions
+//===----------------------------------------------------------------------===//
+
+def NVVM_DivFOp
+ : NVVM_SingleResultIntrinsicOp<"divf", [Pure, SameOperandsAndResultType]> {
+ let summary = "Divide one value by another";
+ let description = [{
+ Divides lhs by rhs, stores result in res (`res = lhs / rhs`).
+
+ For more information, see PTX ISA:
+ [div](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
+ }];
+ let arguments = (ins AnyTypeOf<[F32, F64]>:$lhs, AnyTypeOf<[F32, F64]>:$rhs,
+ FPArithRoundingMode:$rnd, DefaultValuedAttr<BoolAttr, "false">:$ftz);
+ let results = (outs AnyTypeOf<[F32, F64]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
+ let hasVerifier = 1;
+}
+
+def NVVM_DivFApproxOp
+ : NVVM_SingleResultIntrinsicOp<"divf.approx", [Pure,
+ SameOperandsAndResultType]> {
+ let summary = "Fast, approximate divide";
+ let description = [{
+ Implements a fast approximation to divide, computed as `res = lhs * (1/rhs)`.
+ The maximum ulp error is 2 in the normal range.
+
+ For more information, see PTX ISA:
+ [div](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
+ }];
+ let arguments = (ins F32:$lhs, F32:$rhs,
+ DefaultValuedAttr<BoolAttr, "false">:$ftz);
+ let results = (outs F32:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
+}
+
+def NVVM_DivFFullOp
+ : NVVM_SingleResultIntrinsicOp<"divf.full", [Pure,
+ SameOperandsAndResultType]> {
+ let summary = "Full-range approximate divide";
+ let description = [{
+ Implements a relatively fast, full-range approximation that scales
+ operands to achieve better accuracy, but is not fully IEEE 754 compliant
+ and does not support rounding modifiers. The maximum ulp error is 2
+ across the full range of inputs.
+
+ For more information, see PTX ISA:
+ [div](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
+ }];
+ let arguments = (ins F32:$lhs, F32:$rhs,
+ DefaultValuedAttr<BoolAttr, "false">:$ftz);
+ let results = (outs F32:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
+}
+
//===----------------------------------------------------------------------===//
// NVVM tensormap.replace Op
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index aa9e05013eaed..8929eb9358f86 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3349,6 +3349,16 @@ LogicalResult NVVM::SqrtOp::verify() {
return success();
}
+LogicalResult NVVM::DivFOp::verify() {
+ if (getRnd() == NVVM::FPRoundingMode::NONE)
+ return emitOpError("rounding mode cannot be None");
+
+ if (getRes().getType().isF64() && getFtz())
+ return emitOpError("FTZ is not supported for f64");
+
+ return success();
+}
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
@@ -3593,6 +3603,67 @@ SqrtApproxOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
return {id, {mt.lookupValue(thisOp.getSrc())}};
}
+mlir::NVVM::IDArgPair
+DivFOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::DivFOp>(op);
+ Type t = thisOp.getRes().getType();
+ NVVM::FPRoundingMode rndMode = thisOp.getRnd();
+ bool isFtz = thisOp.getFtz();
+
+ // RM is one of RN/RM/RP/RZ (verifier rejects NONE).
+ // Subtracting 1 maps RN=1..RZ=4 to 0..3.
+ unsigned rndIndex = static_cast<unsigned>(rndMode) - 1;
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_f,
+ llvm::Intrinsic::nvvm_div_rm_f,
+ llvm::Intrinsic::nvvm_div_rp_f,
+ llvm::Intrinsic::nvvm_div_rz_f,
+ };
+ static constexpr llvm::Intrinsic::ID f32FTZIDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_ftz_f,
+ llvm::Intrinsic::nvvm_div_rm_ftz_f,
+ llvm::Intrinsic::nvvm_div_rp_ftz_f,
+ llvm::Intrinsic::nvvm_div_rz_ftz_f,
+ };
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_d,
+ llvm::Intrinsic::nvvm_div_rm_d,
+ llvm::Intrinsic::nvvm_div_rp_d,
+ llvm::Intrinsic::nvvm_div_rz_d,
+ };
+
+ llvm::Intrinsic::ID id =
+ t.isF32() ? (isFtz ? f32FTZIDs[rndIndex] : f32IDs[rndIndex])
+ : f64IDs[rndIndex];
+
+ return {id,
+ {mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
+}
+
+mlir::NVVM::IDArgPair
+DivFApproxOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::DivFApproxOp>(op);
+ llvm::Intrinsic::ID id = thisOp.getFtz()
+ ? llvm::Intrinsic::nvvm_div_approx_ftz_f
+ : llvm::Intrinsic::nvvm_div_approx_f;
+ return {id,
+ {mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
+}
+
+mlir::NVVM::IDArgPair
+DivFFullOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::DivFFullOp>(op);
+ // Naming quirk: int_nvvm_div_full has NO `_f` suffix (unlike approx).
+ llvm::Intrinsic::ID id = thisOp.getFtz() ? llvm::Intrinsic::nvvm_div_full_ftz
+ : llvm::Intrinsic::nvvm_div_full;
+ return {id,
+ {mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
+}
+
mlir::NVVM::IDArgPair
PMEventOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase &builder) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir b/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
new file mode 100644
index 0000000000000..cb58bf5ab60b5
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
@@ -0,0 +1,57 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f32 divf — all 8 forms (4 rounding modes × 2 ftz states).
+llvm.func @divf_f32(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @divf_f32(float %0, float %1) {
+ // CHECK: call float @llvm.nvvm.div.rn.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rm.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rp.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rz.ftz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rm.ftz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rp.ftz.f(float %{{.*}}, float %{{.*}})
+ %r1 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ %r2 = nvvm.divf %r1, %r1 {rnd = #nvvm.fp_rnd_mode<rz>} : f32
+ %r3 = nvvm.divf %r2, %r2 {rnd = #nvvm.fp_rnd_mode<rm>} : f32
+ %r4 = nvvm.divf %r3, %r3 {rnd = #nvvm.fp_rnd_mode<rp>} : f32
+ %r5 = nvvm.divf %r4, %r4 {rnd = #nvvm.fp_rnd_mode<rn>, ftz = true} : f32
+ %r6 = nvvm.divf %r5, %r5 {rnd = #nvvm.fp_rnd_mode<rz>, ftz = true} : f32
+ %r7 = nvvm.divf %r6, %r6 {rnd = #nvvm.fp_rnd_mode<rm>, ftz = true} : f32
+ %r8 = nvvm.divf %r7, %r7 {rnd = #nvvm.fp_rnd_mode<rp>, ftz = true} : f32
+ llvm.return %r8 : f32
+}
+
+// f64 divf — all 4 forms (4 rounding modes, no ftz).
+llvm.func @divf_f64(%a : f64, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @divf_f64(double %0, double %1) {
+ // CHECK: call double @llvm.nvvm.div.rn.d(double %{{.*}}, double %{{.*}})
+ // CHECK: call double @llvm.nvvm.div.rz.d(double %{{.*}}, double %{{.*}})
+ // CHECK: call double @llvm.nvvm.div.rm.d(double %{{.*}}, double %{{.*}})
+ // CHECK: call double @llvm.nvvm.div.rp.d(double %{{.*}}, double %{{.*}})
+ %r1 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f64
+ %r2 = nvvm.divf %r1, %r1 {rnd = #nvvm.fp_rnd_mode<rz>} : f64
+ %r3 = nvvm.divf %r2, %r2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64
+ %r4 = nvvm.divf %r3, %r3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64
+ llvm.return %r4 : f64
+}
+
+// divf.approx — 2 forms.
+llvm.func @divf_approx(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @divf_approx(float %0, float %1) {
+ // CHECK: call float @llvm.nvvm.div.approx.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.approx.ftz.f(float %{{.*}}, float %{{.*}})
+ %r1 = nvvm.divf.approx %a, %b : f32
+ %r2 = nvvm.divf.approx %r1, %r1 {ftz = true} : f32
+ llvm.return %r2 : f32
+}
+
+// divf.full — 2 forms.
+llvm.func @divf_full(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @divf_full(float %0, float %1) {
+ // CHECK: call float @llvm.nvvm.div.full(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.full.ftz(float %{{.*}}, float %{{.*}})
+ %r1 = nvvm.divf.full %a, %b : f32
+ %r2 = nvvm.divf.full %r1, %r1 {ftz = true} : f32
+ llvm.return %r2 : f32
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
new file mode 100644
index 0000000000000..617749ae75c40
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-translate --mlir-to-llvmir --split-input-file --verify-diagnostics %s
+
+// -----
+
+llvm.func @divf_invalid_no_rnd(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{rounding mode cannot be None}}
+ %0 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<none>} : f32
+ llvm.return %0 : f32
+}
+
+// -----
+
+llvm.func @divf_invalid_f64_ftz(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{FTZ is not supported for f64}}
+ %0 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz = true} : f64
+ llvm.return %0 : f64
+}
>From 14e59fc03c3cdeead9a3be7db61e9c3f7a898ca9 Mon Sep 17 00:00:00 2001
From: Varad Rahul Kamthe <vkamthe at nvidia.com>
Date: Wed, 20 May 2026 14:02:33 +0000
Subject: [PATCH 2/2] Address review comments and design change
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 45 +------
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 112 ++++++++++--------
mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir | 12 +-
.../Target/LLVMIR/nvvm/divf/divf_invalid.mlir | 49 +++++++-
4 files changed, 120 insertions(+), 98 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 7931d4b9e07cb..f95402a6b7ef1 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6589,52 +6589,19 @@ def NVVM_DivFOp
let description = [{
Divides lhs by rhs, stores result in res (`res = lhs / rhs`).
- For more information, see PTX ISA:
- [div](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
}];
let arguments = (ins AnyTypeOf<[F32, F64]>:$lhs, AnyTypeOf<[F32, F64]>:$rhs,
- FPArithRoundingMode:$rnd, DefaultValuedAttr<BoolAttr, "false">:$ftz);
+ DefaultValuedAttr<FPArithRoundingMode, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<BoolAttr, "false">:$ftz,
+ DefaultValuedAttr<BoolAttr, "false">:$approx,
+ DefaultValuedAttr<BoolAttr, "false">:$full);
let results = (outs AnyTypeOf<[F32, F64]>:$res);
let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
let hasVerifier = 1;
}
-def NVVM_DivFApproxOp
- : NVVM_SingleResultIntrinsicOp<"divf.approx", [Pure,
- SameOperandsAndResultType]> {
- let summary = "Fast, approximate divide";
- let description = [{
- Implements a fast approximation to divide, computed as `res = lhs * (1/rhs)`.
- The maximum ulp error is 2 in the normal range.
-
- For more information, see PTX ISA:
- [div](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
- }];
- let arguments = (ins F32:$lhs, F32:$rhs,
- DefaultValuedAttr<BoolAttr, "false">:$ftz);
- let results = (outs F32:$res);
- let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
-}
-
-def NVVM_DivFFullOp
- : NVVM_SingleResultIntrinsicOp<"divf.full", [Pure,
- SameOperandsAndResultType]> {
- let summary = "Full-range approximate divide";
- let description = [{
- Implements a relatively fast, full-range approximation that scales
- operands to achieve better accuracy, but is not fully IEEE 754 compliant
- and does not support rounding modifiers. The maximum ulp error is 2
- across the full range of inputs.
-
- For more information, see PTX ISA:
- [div](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
- }];
- let arguments = (ins F32:$lhs, F32:$rhs,
- DefaultValuedAttr<BoolAttr, "false">:$ftz);
- let results = (outs F32:$res);
- let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
-}
-
//===----------------------------------------------------------------------===//
// NVVM tensormap.replace Op
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 8929eb9358f86..fc4fd87f0937d 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3350,10 +3350,33 @@ LogicalResult NVVM::SqrtOp::verify() {
}
LogicalResult NVVM::DivFOp::verify() {
- if (getRnd() == NVVM::FPRoundingMode::NONE)
- return emitOpError("rounding mode cannot be None");
+ bool isApprox = getApprox();
+ bool isFull = getFull();
+ bool isF64 = getRes().getType().isF64();
+ bool isFtz = getFtz();
+ NVVM::FPRoundingMode rndMode = getRnd();
+
+ // approx and full are mutually exclusive.
+ if (isApprox && isFull)
+ return emitOpError("'approx' and 'full' are mutually exclusive");
+
+ if (isApprox || isFull) {
+ // Non-rounded forms are f32-only.
+ if (isF64)
+ return emitOpError("'approx' and 'full' forms are f32-only");
+ // Non-rounded forms accept no rounding mode.
+ if (rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError(
+ "'approx' and 'full' forms do not accept a rounding mode");
+ return success();
+ }
- if (getRes().getType().isF64() && getFtz())
+ // Rounded form below.
+ // Rounded form requires a rounding mode.
+ if (rndMode == NVVM::FPRoundingMode::NONE)
+ return emitOpError("rounding mode cannot be None for the rounded divide");
+ // Rounded f64 does not support ftz.
+ if (isF64 && isFtz)
return emitOpError("FTZ is not supported for f64");
return success();
@@ -3607,59 +3630,44 @@ mlir::NVVM::IDArgPair
DivFOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::DivFOp>(op);
- Type t = thisOp.getRes().getType();
- NVVM::FPRoundingMode rndMode = thisOp.getRnd();
bool isFtz = thisOp.getFtz();
- // RM is one of RN/RM/RP/RZ (verifier rejects NONE).
- // Subtracting 1 maps RN=1..RZ=4 to 0..3.
- unsigned rndIndex = static_cast<unsigned>(rndMode) - 1;
-
- static constexpr llvm::Intrinsic::ID f32IDs[] = {
- llvm::Intrinsic::nvvm_div_rn_f,
- llvm::Intrinsic::nvvm_div_rm_f,
- llvm::Intrinsic::nvvm_div_rp_f,
- llvm::Intrinsic::nvvm_div_rz_f,
- };
- static constexpr llvm::Intrinsic::ID f32FTZIDs[] = {
- llvm::Intrinsic::nvvm_div_rn_ftz_f,
- llvm::Intrinsic::nvvm_div_rm_ftz_f,
- llvm::Intrinsic::nvvm_div_rp_ftz_f,
- llvm::Intrinsic::nvvm_div_rz_ftz_f,
- };
- static constexpr llvm::Intrinsic::ID f64IDs[] = {
- llvm::Intrinsic::nvvm_div_rn_d,
- llvm::Intrinsic::nvvm_div_rm_d,
- llvm::Intrinsic::nvvm_div_rp_d,
- llvm::Intrinsic::nvvm_div_rz_d,
- };
-
- llvm::Intrinsic::ID id =
- t.isF32() ? (isFtz ? f32FTZIDs[rndIndex] : f32IDs[rndIndex])
- : f64IDs[rndIndex];
-
- return {id,
- {mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
-}
+ llvm::Intrinsic::ID id;
-mlir::NVVM::IDArgPair
-DivFApproxOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder) {
- auto thisOp = cast<NVVM::DivFApproxOp>(op);
- llvm::Intrinsic::ID id = thisOp.getFtz()
- ? llvm::Intrinsic::nvvm_div_approx_ftz_f
- : llvm::Intrinsic::nvvm_div_approx_f;
- return {id,
- {mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
-}
+ if (thisOp.getApprox()) {
+ id = isFtz ? llvm::Intrinsic::nvvm_div_approx_ftz_f
+ : llvm::Intrinsic::nvvm_div_approx_f;
+ } else if (thisOp.getFull()) {
+ // Naming quirk: int_nvvm_div_full has no `_f` suffix (unlike approx).
+ id = isFtz ? llvm::Intrinsic::nvvm_div_full_ftz
+ : llvm::Intrinsic::nvvm_div_full;
+ } else {
+ // Rounded form — three 4-entry tables indexed by (rndMode - 1).
+ unsigned rndIndex = static_cast<unsigned>(thisOp.getRnd()) - 1;
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_f,
+ llvm::Intrinsic::nvvm_div_rm_f,
+ llvm::Intrinsic::nvvm_div_rp_f,
+ llvm::Intrinsic::nvvm_div_rz_f,
+ };
+ static constexpr llvm::Intrinsic::ID f32FTZIDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_ftz_f,
+ llvm::Intrinsic::nvvm_div_rm_ftz_f,
+ llvm::Intrinsic::nvvm_div_rp_ftz_f,
+ llvm::Intrinsic::nvvm_div_rz_ftz_f,
+ };
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_d,
+ llvm::Intrinsic::nvvm_div_rm_d,
+ llvm::Intrinsic::nvvm_div_rp_d,
+ llvm::Intrinsic::nvvm_div_rz_d,
+ };
+ Type t = thisOp.getRes().getType();
+ id = t.isF32() ? (isFtz ? f32FTZIDs[rndIndex] : f32IDs[rndIndex])
+ : f64IDs[rndIndex];
+ }
-mlir::NVVM::IDArgPair
-DivFFullOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
- llvm::IRBuilderBase &builder) {
- auto thisOp = cast<NVVM::DivFFullOp>(op);
- // Naming quirk: int_nvvm_div_full has NO `_f` suffix (unlike approx).
- llvm::Intrinsic::ID id = thisOp.getFtz() ? llvm::Intrinsic::nvvm_div_full_ftz
- : llvm::Intrinsic::nvvm_div_full;
return {id,
{mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir b/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
index cb58bf5ab60b5..4af14785e99a4 100644
--- a/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-// f32 divf — all 8 forms (4 rounding modes × 2 ftz states).
+// f32 divf — rounded, all 8 forms (4 rounding modes × 2 ftz states).
llvm.func @divf_f32(%a : f32, %b : f32) -> f32 {
// CHECK-LABEL: define float @divf_f32(float %0, float %1) {
// CHECK: call float @llvm.nvvm.div.rn.f(float %{{.*}}, float %{{.*}})
@@ -22,7 +22,7 @@ llvm.func @divf_f32(%a : f32, %b : f32) -> f32 {
llvm.return %r8 : f32
}
-// f64 divf — all 4 forms (4 rounding modes, no ftz).
+// f64 divf — rounded, all 4 forms (no ftz).
llvm.func @divf_f64(%a : f64, %b : f64) -> f64 {
// CHECK-LABEL: define double @divf_f64(double %0, double %1) {
// CHECK: call double @llvm.nvvm.div.rn.d(double %{{.*}}, double %{{.*}})
@@ -41,8 +41,8 @@ llvm.func @divf_approx(%a : f32, %b : f32) -> f32 {
// CHECK-LABEL: define float @divf_approx(float %0, float %1) {
// CHECK: call float @llvm.nvvm.div.approx.f(float %{{.*}}, float %{{.*}})
// CHECK: call float @llvm.nvvm.div.approx.ftz.f(float %{{.*}}, float %{{.*}})
- %r1 = nvvm.divf.approx %a, %b : f32
- %r2 = nvvm.divf.approx %r1, %r1 {ftz = true} : f32
+ %r1 = nvvm.divf %a, %b {approx = true} : f32
+ %r2 = nvvm.divf %r1, %r1 {approx = true, ftz = true} : f32
llvm.return %r2 : f32
}
@@ -51,7 +51,7 @@ llvm.func @divf_full(%a : f32, %b : f32) -> f32 {
// CHECK-LABEL: define float @divf_full(float %0, float %1) {
// CHECK: call float @llvm.nvvm.div.full(float %{{.*}}, float %{{.*}})
// CHECK: call float @llvm.nvvm.div.full.ftz(float %{{.*}}, float %{{.*}})
- %r1 = nvvm.divf.full %a, %b : f32
- %r2 = nvvm.divf.full %r1, %r1 {ftz = true} : f32
+ %r1 = nvvm.divf %a, %b {full = true} : f32
+ %r2 = nvvm.divf %r1, %r1 {full = true, ftz = true} : f32
llvm.return %r2 : f32
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
index 617749ae75c40..fcce8fe57c0f3 100644
--- a/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
@@ -2,16 +2,63 @@
// -----
+// Rounded form: rnd cannot be None.
llvm.func @divf_invalid_no_rnd(%a : f32, %b : f32) -> f32 {
- // expected-error at +1 {{rounding mode cannot be None}}
+ // expected-error at +1 {{rounding mode cannot be None for the rounded divide}}
%0 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<none>} : f32
llvm.return %0 : f32
}
// -----
+// Rounded f64: FTZ not supported.
llvm.func @divf_invalid_f64_ftz(%a : f64, %b : f64) -> f64 {
// expected-error at +1 {{FTZ is not supported for f64}}
%0 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz = true} : f64
llvm.return %0 : f64
}
+
+// -----
+
+// approx and full are mutually exclusive.
+llvm.func @divf_invalid_approx_and_full(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{'approx' and 'full' are mutually exclusive}}
+ %0 = nvvm.divf %a, %b {approx = true, full = true} : f32
+ llvm.return %0 : f32
+}
+
+// -----
+
+// approx is f32-only.
+llvm.func @divf_invalid_approx_f64(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{'approx' and 'full' forms are f32-only}}
+ %0 = nvvm.divf %a, %b {approx = true} : f64
+ llvm.return %0 : f64
+}
+
+// -----
+
+// full is f32-only.
+llvm.func @divf_invalid_full_f64(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{'approx' and 'full' forms are f32-only}}
+ %0 = nvvm.divf %a, %b {full = true} : f64
+ llvm.return %0 : f64
+}
+
+// -----
+
+// approx does not accept a rounding mode.
+llvm.func @divf_invalid_approx_with_rnd(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{'approx' and 'full' forms do not accept a rounding mode}}
+ %0 = nvvm.divf %a, %b {approx = true, rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ llvm.return %0 : f32
+}
+
+// -----
+
+// full does not accept a rounding mode.
+llvm.func @divf_invalid_full_with_rnd(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{'approx' and 'full' forms do not accept a rounding mode}}
+ %0 = nvvm.divf %a, %b {full = true, rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ llvm.return %0 : f32
+}
More information about the Mlir-commits
mailing list