[Mlir-commits] [mlir] c618e11 - [MLIR][NVVM] Add `nvvm.divf` Op (#198744)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon May 25 23:40:01 PDT 2026
Author: Varad Rahul Kamthe
Date: 2026-05-26T12:09:56+05:30
New Revision: c618e112145fbbbe7ab14ab202f8626703920b96
URL: https://github.com/llvm/llvm-project/commit/c618e112145fbbbe7ab14ab202f8626703920b96
DIFF: https://github.com/llvm/llvm-project/commit/c618e112145fbbbe7ab14ab202f8626703920b96.diff
LOG: [MLIR][NVVM] Add `nvvm.divf` Op (#198744)
Adds the `nvvm.divf` NVVM dialect op covering all 16 PTX `div` forms via attribute-driven selection:
- Default (`approx = false, full = false`): IEEE-compliant rounded divide (`div.<RM>[.ftz].{f32,f64}`), 12 forms -- requires `rnd` ∈ `{rn, rm, rp, rz}`; supports f32 and f64 (f64 does not accept `ftz`).
- `approx = true`: fast hardware approximation (`div.approx[.ftz].f32`), 2 forms -- f32 only.
- `full = true`: full-range approximation (`div.full[.ftz].f32`), 2 forms -- f32 only.
Added:
mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
Modified:
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 030c33526b16a..6230c0f91a865 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6579,6 +6579,28 @@ def NVVM_SqrtApproxOp : NVVM_F32UnaryApproxOp<"sqrt.approx"> {
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM div op definitions
+//===----------------------------------------------------------------------===//
+
+def NVVM_DivFOp
+ : NVVM_SingleResultIntrinsicOp<"divf", [Pure, SameOperandsAndResultType]> {
+ let summary = "Divide one value by another";
+ let description = [{
+ Divides lhs by rhs, stores result in res (`res = lhs / rhs`).
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
+ }];
+ let arguments = (ins AnyTypeOf<[F32, F64]>:$lhs, AnyTypeOf<[F32, F64]>:$rhs,
+ DefaultValuedAttr<FPArithRoundingMode, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<BoolAttr, "false">:$ftz,
+ DefaultValuedAttr<BoolAttr, "false">:$approx,
+ DefaultValuedAttr<BoolAttr, "false">:$full);
+ let results = (outs AnyTypeOf<[F32, F64]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM tensormap.replace Op
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index aa9e05013eaed..d246ce2651ef7 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3349,6 +3349,34 @@ LogicalResult NVVM::SqrtOp::verify() {
return success();
}
+LogicalResult NVVM::DivFOp::verify() {
+ bool isApprox = getApprox();
+ bool isFull = getFull();
+ bool isF64 = getRes().getType().isF64();
+ bool isFtz = getFtz();
+ NVVM::FPRoundingMode rndMode = getRnd();
+
+ if (isApprox && isFull)
+ return emitOpError("'approx' and 'full' are mutually exclusive");
+
+ if (isApprox || isFull) {
+ if (isF64)
+ return emitOpError("'approx' and 'full' forms are f32-only");
+ if (rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError(
+ "'approx' and 'full' forms do not accept a rounding mode");
+ return success();
+ }
+
+ // Rounded form below.
+ if (rndMode == NVVM::FPRoundingMode::NONE)
+ return emitOpError("rounding mode cannot be None for the rounded divide");
+ if (isF64 && isFtz)
+ return emitOpError("FTZ is not supported for f64");
+
+ return success();
+}
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
@@ -3593,6 +3621,53 @@ SqrtApproxOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
return {id, {mt.lookupValue(thisOp.getSrc())}};
}
+mlir::NVVM::IDArgPair
+DivFOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::DivFOp>(op);
+ bool isFtz = thisOp.getFtz();
+
+ llvm::Intrinsic::ID id;
+
+ if (thisOp.getApprox()) {
+ id = isFtz ? llvm::Intrinsic::nvvm_div_approx_ftz_f
+ : llvm::Intrinsic::nvvm_div_approx_f;
+ } else if (thisOp.getFull()) {
+ // Intrinsic Naming quirk: int_nvvm_div_full has no `_f` suffix (unlike
+ // approx).
+ id = isFtz ? llvm::Intrinsic::nvvm_div_full_ftz
+ : llvm::Intrinsic::nvvm_div_full;
+ } else {
+ // Rounded form — three 4-entry tables indexed by (rndMode - 1).
+ unsigned rndIndex = static_cast<unsigned>(thisOp.getRnd()) - 1;
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_f,
+ llvm::Intrinsic::nvvm_div_rm_f,
+ llvm::Intrinsic::nvvm_div_rp_f,
+ llvm::Intrinsic::nvvm_div_rz_f,
+ };
+ static constexpr llvm::Intrinsic::ID f32FTZIDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_ftz_f,
+ llvm::Intrinsic::nvvm_div_rm_ftz_f,
+ llvm::Intrinsic::nvvm_div_rp_ftz_f,
+ llvm::Intrinsic::nvvm_div_rz_ftz_f,
+ };
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_div_rn_d,
+ llvm::Intrinsic::nvvm_div_rm_d,
+ llvm::Intrinsic::nvvm_div_rp_d,
+ llvm::Intrinsic::nvvm_div_rz_d,
+ };
+ Type t = thisOp.getRes().getType();
+ id = t.isF32() ? (isFtz ? f32FTZIDs[rndIndex] : f32IDs[rndIndex])
+ : f64IDs[rndIndex];
+ }
+
+ return {id,
+ {mt.lookupValue(thisOp.getLhs()), mt.lookupValue(thisOp.getRhs())}};
+}
+
mlir::NVVM::IDArgPair
PMEventOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase &builder) {
diff --git a/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir b/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
new file mode 100644
index 0000000000000..4af14785e99a4
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/divf/divf.mlir
@@ -0,0 +1,57 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f32 divf — rounded, all 8 forms (4 rounding modes × 2 ftz states).
+llvm.func @divf_f32(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @divf_f32(float %0, float %1) {
+ // CHECK: call float @llvm.nvvm.div.rn.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rm.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rp.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rz.ftz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rm.ftz.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.rp.ftz.f(float %{{.*}}, float %{{.*}})
+ %r1 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ %r2 = nvvm.divf %r1, %r1 {rnd = #nvvm.fp_rnd_mode<rz>} : f32
+ %r3 = nvvm.divf %r2, %r2 {rnd = #nvvm.fp_rnd_mode<rm>} : f32
+ %r4 = nvvm.divf %r3, %r3 {rnd = #nvvm.fp_rnd_mode<rp>} : f32
+ %r5 = nvvm.divf %r4, %r4 {rnd = #nvvm.fp_rnd_mode<rn>, ftz = true} : f32
+ %r6 = nvvm.divf %r5, %r5 {rnd = #nvvm.fp_rnd_mode<rz>, ftz = true} : f32
+ %r7 = nvvm.divf %r6, %r6 {rnd = #nvvm.fp_rnd_mode<rm>, ftz = true} : f32
+ %r8 = nvvm.divf %r7, %r7 {rnd = #nvvm.fp_rnd_mode<rp>, ftz = true} : f32
+ llvm.return %r8 : f32
+}
+
+// f64 divf — rounded, all 4 forms (no ftz).
+llvm.func @divf_f64(%a : f64, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @divf_f64(double %0, double %1) {
+ // CHECK: call double @llvm.nvvm.div.rn.d(double %{{.*}}, double %{{.*}})
+ // CHECK: call double @llvm.nvvm.div.rz.d(double %{{.*}}, double %{{.*}})
+ // CHECK: call double @llvm.nvvm.div.rm.d(double %{{.*}}, double %{{.*}})
+ // CHECK: call double @llvm.nvvm.div.rp.d(double %{{.*}}, double %{{.*}})
+ %r1 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f64
+ %r2 = nvvm.divf %r1, %r1 {rnd = #nvvm.fp_rnd_mode<rz>} : f64
+ %r3 = nvvm.divf %r2, %r2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64
+ %r4 = nvvm.divf %r3, %r3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64
+ llvm.return %r4 : f64
+}
+
+// divf.approx — 2 forms.
+llvm.func @divf_approx(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @divf_approx(float %0, float %1) {
+ // CHECK: call float @llvm.nvvm.div.approx.f(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.approx.ftz.f(float %{{.*}}, float %{{.*}})
+ %r1 = nvvm.divf %a, %b {approx = true} : f32
+ %r2 = nvvm.divf %r1, %r1 {approx = true, ftz = true} : f32
+ llvm.return %r2 : f32
+}
+
+// divf.full — 2 forms.
+llvm.func @divf_full(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @divf_full(float %0, float %1) {
+ // CHECK: call float @llvm.nvvm.div.full(float %{{.*}}, float %{{.*}})
+ // CHECK: call float @llvm.nvvm.div.full.ftz(float %{{.*}}, float %{{.*}})
+ %r1 = nvvm.divf %a, %b {full = true} : f32
+ %r2 = nvvm.divf %r1, %r1 {full = true, ftz = true} : f32
+ llvm.return %r2 : f32
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
new file mode 100644
index 0000000000000..fcce8fe57c0f3
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/divf/divf_invalid.mlir
@@ -0,0 +1,64 @@
+// RUN: mlir-translate --mlir-to-llvmir --split-input-file --verify-diagnostics %s
+
+// -----
+
+// Rounded form: rnd cannot be None.
+llvm.func @divf_invalid_no_rnd(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{rounding mode cannot be None for the rounded divide}}
+ %0 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<none>} : f32
+ llvm.return %0 : f32
+}
+
+// -----
+
+// Rounded f64: FTZ not supported.
+llvm.func @divf_invalid_f64_ftz(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{FTZ is not supported for f64}}
+ %0 = nvvm.divf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz = true} : f64
+ llvm.return %0 : f64
+}
+
+// -----
+
+// approx and full are mutually exclusive.
+llvm.func @divf_invalid_approx_and_full(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{'approx' and 'full' are mutually exclusive}}
+ %0 = nvvm.divf %a, %b {approx = true, full = true} : f32
+ llvm.return %0 : f32
+}
+
+// -----
+
+// approx is f32-only.
+llvm.func @divf_invalid_approx_f64(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{'approx' and 'full' forms are f32-only}}
+ %0 = nvvm.divf %a, %b {approx = true} : f64
+ llvm.return %0 : f64
+}
+
+// -----
+
+// full is f32-only.
+llvm.func @divf_invalid_full_f64(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{'approx' and 'full' forms are f32-only}}
+ %0 = nvvm.divf %a, %b {full = true} : f64
+ llvm.return %0 : f64
+}
+
+// -----
+
+// approx does not accept a rounding mode.
+llvm.func @divf_invalid_approx_with_rnd(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{'approx' and 'full' forms do not accept a rounding mode}}
+ %0 = nvvm.divf %a, %b {approx = true, rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ llvm.return %0 : f32
+}
+
+// -----
+
+// full does not accept a rounding mode.
+llvm.func @divf_invalid_full_with_rnd(%a : f32, %b : f32) -> f32 {
+ // expected-error at +1 {{'approx' and 'full' forms do not accept a rounding mode}}
+ %0 = nvvm.divf %a, %b {full = true, rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ llvm.return %0 : f32
+}
More information about the Mlir-commits
mailing list