[llvm] [NVPTX] Constant fold NVVM add/mul/div/fma (PR #152544)

Thu Aug 7 09:22:40 PDT 2025

https://github.com/LewisCrawford created https://github.com/llvm/llvm-project/pull/152544

Constant fold the NVVM intrinsics for add, mul, div, fma with specific rounding modes.

>From 53aad6bc75efb9e280998604bb1917e4ca7d2d44 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Thu, 7 Aug 2025 16:19:02 +0000
Subject: [PATCH] [NVPTX] Constant fold NVVM add/mul/div/fma

Constant fold the NVVM intrinsics for add, mul, div, fma with specific
rounding modes.
---
 llvm/include/llvm/IR/NVVMIntrinsicUtils.h     | 172 +++
 llvm/lib/Analysis/ConstantFolding.cpp         | 178 ++++
 .../InstSimplify/const-fold-nvvm-add.ll       | 876 +++++++++++++++
 .../InstSimplify/const-fold-nvvm-div.ll       | 880 ++++++++++++++++
 .../InstSimplify/const-fold-nvvm-fma.ll       | 874 +++++++++++++++
 .../InstSimplify/const-fold-nvvm-mul.ll       | 994 ++++++++++++++++++
 6 files changed, 3974 insertions(+)
 create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll
 create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll
 create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll
 create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll

diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 11bfd733a8854..cc4929a1ff8da 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -414,6 +414,178 @@ inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) {
   return DenormalMode::getIEEE();
 }
 
+inline bool FAddShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_add_rm_ftz_f:
+  case Intrinsic::nvvm_add_rn_ftz_f:
+  case Intrinsic::nvvm_add_rp_ftz_f:
+  case Intrinsic::nvvm_add_rz_ftz_f:
+    return true;
+
+  case Intrinsic::nvvm_add_rm_f:
+  case Intrinsic::nvvm_add_rn_f:
+  case Intrinsic::nvvm_add_rp_f:
+  case Intrinsic::nvvm_add_rz_f:
+  case Intrinsic::nvvm_add_rm_d:
+  case Intrinsic::nvvm_add_rn_d:
+  case Intrinsic::nvvm_add_rp_d:
+  case Intrinsic::nvvm_add_rz_d:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid NVVM add intrinsic");
+}
+
+inline APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_add_rm_f:
+  case Intrinsic::nvvm_add_rm_d:
+  case Intrinsic::nvvm_add_rm_ftz_f:
+    return APFloat::rmTowardNegative;
+  case Intrinsic::nvvm_add_rn_f:
+  case Intrinsic::nvvm_add_rn_d:
+  case Intrinsic::nvvm_add_rn_ftz_f:
+    return APFloat::rmNearestTiesToEven;
+  case Intrinsic::nvvm_add_rp_f:
+  case Intrinsic::nvvm_add_rp_d:
+  case Intrinsic::nvvm_add_rp_ftz_f:
+    return APFloat::rmTowardPositive;
+  case Intrinsic::nvvm_add_rz_f:
+  case Intrinsic::nvvm_add_rz_d:
+  case Intrinsic::nvvm_add_rz_ftz_f:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM add");
+}
+
+inline bool FMulShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_mul_rm_ftz_f:
+  case Intrinsic::nvvm_mul_rn_ftz_f:
+  case Intrinsic::nvvm_mul_rp_ftz_f:
+  case Intrinsic::nvvm_mul_rz_ftz_f:
+    return true;
+
+  case Intrinsic::nvvm_mul_rm_f:
+  case Intrinsic::nvvm_mul_rn_f:
+  case Intrinsic::nvvm_mul_rp_f:
+  case Intrinsic::nvvm_mul_rz_f:
+  case Intrinsic::nvvm_mul_rm_d:
+  case Intrinsic::nvvm_mul_rn_d:
+  case Intrinsic::nvvm_mul_rp_d:
+  case Intrinsic::nvvm_mul_rz_d:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid NVVM mul intrinsic");
+}
+
+inline APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_mul_rm_f:
+  case Intrinsic::nvvm_mul_rm_d:
+  case Intrinsic::nvvm_mul_rm_ftz_f:
+    return APFloat::rmTowardNegative;
+  case Intrinsic::nvvm_mul_rn_f:
+  case Intrinsic::nvvm_mul_rn_d:
+  case Intrinsic::nvvm_mul_rn_ftz_f:
+    return APFloat::rmNearestTiesToEven;
+  case Intrinsic::nvvm_mul_rp_f:
+  case Intrinsic::nvvm_mul_rp_d:
+  case Intrinsic::nvvm_mul_rp_ftz_f:
+    return APFloat::rmTowardPositive;
+  case Intrinsic::nvvm_mul_rz_f:
+  case Intrinsic::nvvm_mul_rz_d:
+  case Intrinsic::nvvm_mul_rz_ftz_f:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM mul");
+}
+
+inline bool FDivShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_div_rm_ftz_f:
+  case Intrinsic::nvvm_div_rn_ftz_f:
+  case Intrinsic::nvvm_div_rp_ftz_f:
+  case Intrinsic::nvvm_div_rz_ftz_f:
+    return true;
+
+  case Intrinsic::nvvm_div_rm_f:
+  case Intrinsic::nvvm_div_rn_f:
+  case Intrinsic::nvvm_div_rp_f:
+  case Intrinsic::nvvm_div_rz_f:
+  case Intrinsic::nvvm_div_rm_d:
+  case Intrinsic::nvvm_div_rn_d:
+  case Intrinsic::nvvm_div_rp_d:
+  case Intrinsic::nvvm_div_rz_d:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid NVVM div intrinsic");
+}
+
+inline APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_div_rm_f:
+  case Intrinsic::nvvm_div_rm_d:
+  case Intrinsic::nvvm_div_rm_ftz_f:
+    return APFloat::rmTowardNegative;
+  case Intrinsic::nvvm_div_rn_f:
+  case Intrinsic::nvvm_div_rn_d:
+  case Intrinsic::nvvm_div_rn_ftz_f:
+    return APFloat::rmNearestTiesToEven;
+  case Intrinsic::nvvm_div_rp_f:
+  case Intrinsic::nvvm_div_rp_d:
+  case Intrinsic::nvvm_div_rp_ftz_f:
+    return APFloat::rmTowardPositive;
+  case Intrinsic::nvvm_div_rz_f:
+  case Intrinsic::nvvm_div_rz_d:
+  case Intrinsic::nvvm_div_rz_ftz_f:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM div");
+}
+
+inline bool FMAShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_fma_rm_ftz_f:
+  case Intrinsic::nvvm_fma_rn_ftz_f:
+  case Intrinsic::nvvm_fma_rp_ftz_f:
+  case Intrinsic::nvvm_fma_rz_ftz_f:
+    return true;
+
+  case Intrinsic::nvvm_fma_rm_f:
+  case Intrinsic::nvvm_fma_rn_f:
+  case Intrinsic::nvvm_fma_rp_f:
+  case Intrinsic::nvvm_fma_rz_f:
+  case Intrinsic::nvvm_fma_rm_d:
+  case Intrinsic::nvvm_fma_rn_d:
+  case Intrinsic::nvvm_fma_rp_d:
+  case Intrinsic::nvvm_fma_rz_d:
+    return false;
+  }
+  llvm_unreachable("Checking FTZ flag for invalid NVVM fma intrinsic");
+}
+
+inline APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_fma_rm_f:
+  case Intrinsic::nvvm_fma_rm_d:
+  case Intrinsic::nvvm_fma_rm_ftz_f:
+    return APFloat::rmTowardNegative;
+  case Intrinsic::nvvm_fma_rn_f:
+  case Intrinsic::nvvm_fma_rn_d:
+  case Intrinsic::nvvm_fma_rn_ftz_f:
+    return APFloat::rmNearestTiesToEven;
+  case Intrinsic::nvvm_fma_rp_f:
+  case Intrinsic::nvvm_fma_rp_d:
+  case Intrinsic::nvvm_fma_rp_ftz_f:
+    return APFloat::rmTowardPositive;
+  case Intrinsic::nvvm_fma_rz_f:
+  case Intrinsic::nvvm_fma_rz_d:
+  case Intrinsic::nvvm_fma_rz_ftz_f:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
+}
+
 } // namespace nvvm
 } // namespace llvm
 #endif // LLVM_IR_NVVMINTRINSICUTILS_H
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index dd98b62baca33..69fdd4f2b3e71 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1843,6 +1843,62 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::nvvm_sqrt_rn_ftz_f:
     return !Call->isStrictFP();
 
+  // NVVM add intrinsics with explicit rounding modes
+  case Intrinsic::nvvm_add_rm_d:
+  case Intrinsic::nvvm_add_rn_d:
+  case Intrinsic::nvvm_add_rp_d:
+  case Intrinsic::nvvm_add_rz_d:
+  case Intrinsic::nvvm_add_rm_f:
+  case Intrinsic::nvvm_add_rn_f:
+  case Intrinsic::nvvm_add_rp_f:
+  case Intrinsic::nvvm_add_rz_f:
+  case Intrinsic::nvvm_add_rm_ftz_f:
+  case Intrinsic::nvvm_add_rn_ftz_f:
+  case Intrinsic::nvvm_add_rp_ftz_f:
+  case Intrinsic::nvvm_add_rz_ftz_f:
+
+  // NVVM div intrinsics with explicit rounding modes
+  case Intrinsic::nvvm_div_rm_d:
+  case Intrinsic::nvvm_div_rn_d:
+  case Intrinsic::nvvm_div_rp_d:
+  case Intrinsic::nvvm_div_rz_d:
+  case Intrinsic::nvvm_div_rm_f:
+  case Intrinsic::nvvm_div_rn_f:
+  case Intrinsic::nvvm_div_rp_f:
+  case Intrinsic::nvvm_div_rz_f:
+  case Intrinsic::nvvm_div_rm_ftz_f:
+  case Intrinsic::nvvm_div_rn_ftz_f:
+  case Intrinsic::nvvm_div_rp_ftz_f:
+  case Intrinsic::nvvm_div_rz_ftz_f:
+
+  // NVVM mul intrinsics with explicit rounding modes
+  case Intrinsic::nvvm_mul_rm_d:
+  case Intrinsic::nvvm_mul_rn_d:
+  case Intrinsic::nvvm_mul_rp_d:
+  case Intrinsic::nvvm_mul_rz_d:
+  case Intrinsic::nvvm_mul_rm_f:
+  case Intrinsic::nvvm_mul_rn_f:
+  case Intrinsic::nvvm_mul_rp_f:
+  case Intrinsic::nvvm_mul_rz_f:
+  case Intrinsic::nvvm_mul_rm_ftz_f:
+  case Intrinsic::nvvm_mul_rn_ftz_f:
+  case Intrinsic::nvvm_mul_rp_ftz_f:
+  case Intrinsic::nvvm_mul_rz_ftz_f:
+
+  // NVVM fma intrinsics with explicit rounding modes
+  case Intrinsic::nvvm_fma_rm_d:
+  case Intrinsic::nvvm_fma_rn_d:
+  case Intrinsic::nvvm_fma_rp_d:
+  case Intrinsic::nvvm_fma_rz_d:
+  case Intrinsic::nvvm_fma_rm_f:
+  case Intrinsic::nvvm_fma_rn_f:
+  case Intrinsic::nvvm_fma_rp_f:
+  case Intrinsic::nvvm_fma_rz_f:
+  case Intrinsic::nvvm_fma_rm_ftz_f:
+  case Intrinsic::nvvm_fma_rn_ftz_f:
+  case Intrinsic::nvvm_fma_rp_ftz_f:
+  case Intrinsic::nvvm_fma_rz_ftz_f:
+
   // Sign operations are actually bitwise operations, they do not raise
   // exceptions even for SNANs.
   case Intrinsic::fabs:
@@ -3318,6 +3374,96 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
 
         return ConstantFP::get(Ty->getContext(), Res);
       }
+
+      case Intrinsic::nvvm_add_rm_f:
+      case Intrinsic::nvvm_add_rn_f:
+      case Intrinsic::nvvm_add_rp_f:
+      case Intrinsic::nvvm_add_rz_f:
+      case Intrinsic::nvvm_add_rm_d:
+      case Intrinsic::nvvm_add_rn_d:
+      case Intrinsic::nvvm_add_rp_d:
+      case Intrinsic::nvvm_add_rz_d:
+      case Intrinsic::nvvm_add_rm_ftz_f:
+      case Intrinsic::nvvm_add_rn_ftz_f:
+      case Intrinsic::nvvm_add_rp_ftz_f:
+      case Intrinsic::nvvm_add_rz_ftz_f: {
+
+        bool IsFTZ = nvvm::FAddShouldFTZ(IntrinsicID);
+        APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
+        APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
+
+        APFloat::roundingMode RoundMode =
+            nvvm::GetFAddRoundingMode(IntrinsicID);
+
+        APFloat Res = A;
+        APFloat::opStatus Status = Res.add(B, RoundMode);
+
+        if (!Res.isNaN() &&
+            (Status == APFloat::opOK || Status == APFloat::opInexact)) {
+          Res = IsFTZ ? FTZPreserveSign(Res) : Res;
+          return ConstantFP::get(Ty->getContext(), Res);
+        }
+        return nullptr;
+      }
+
+      case Intrinsic::nvvm_mul_rm_f:
+      case Intrinsic::nvvm_mul_rn_f:
+      case Intrinsic::nvvm_mul_rp_f:
+      case Intrinsic::nvvm_mul_rz_f:
+      case Intrinsic::nvvm_mul_rm_d:
+      case Intrinsic::nvvm_mul_rn_d:
+      case Intrinsic::nvvm_mul_rp_d:
+      case Intrinsic::nvvm_mul_rz_d:
+      case Intrinsic::nvvm_mul_rm_ftz_f:
+      case Intrinsic::nvvm_mul_rn_ftz_f:
+      case Intrinsic::nvvm_mul_rp_ftz_f:
+      case Intrinsic::nvvm_mul_rz_ftz_f: {
+
+        bool IsFTZ = nvvm::FMulShouldFTZ(IntrinsicID);
+        APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
+        APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
+
+        APFloat::roundingMode RoundMode =
+            nvvm::GetFMulRoundingMode(IntrinsicID);
+
+        APFloat Res = A;
+        APFloat::opStatus Status = Res.multiply(B, RoundMode);
+
+        if (!Res.isNaN() &&
+            (Status == APFloat::opOK || Status == APFloat::opInexact)) {
+          Res = IsFTZ ? FTZPreserveSign(Res) : Res;
+          return ConstantFP::get(Ty->getContext(), Res);
+        }
+        return nullptr;
+      }
+
+      case Intrinsic::nvvm_div_rm_f:
+      case Intrinsic::nvvm_div_rn_f:
+      case Intrinsic::nvvm_div_rp_f:
+      case Intrinsic::nvvm_div_rz_f:
+      case Intrinsic::nvvm_div_rm_d:
+      case Intrinsic::nvvm_div_rn_d:
+      case Intrinsic::nvvm_div_rp_d:
+      case Intrinsic::nvvm_div_rz_d:
+      case Intrinsic::nvvm_div_rm_ftz_f:
+      case Intrinsic::nvvm_div_rn_ftz_f:
+      case Intrinsic::nvvm_div_rp_ftz_f:
+      case Intrinsic::nvvm_div_rz_ftz_f: {
+        bool IsFTZ = nvvm::FDivShouldFTZ(IntrinsicID);
+        APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
+        APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
+        APFloat::roundingMode RoundMode =
+            nvvm::GetFDivRoundingMode(IntrinsicID);
+
+        APFloat Res = A;
+        APFloat::opStatus Status = Res.divide(B, RoundMode);
+        if (!Res.isNaN() &&
+            (Status == APFloat::opOK || Status == APFloat::opInexact)) {
+          Res = IsFTZ ? FTZPreserveSign(Res) : Res;
+          return ConstantFP::get(Ty->getContext(), Res);
+        }
+        return nullptr;
+      }
       }
 
       if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
@@ -3729,6 +3875,38 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
           V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);
           return ConstantFP::get(Ty->getContext(), V);
         }
+
+        case Intrinsic::nvvm_fma_rm_f:
+        case Intrinsic::nvvm_fma_rn_f:
+        case Intrinsic::nvvm_fma_rp_f:
+        case Intrinsic::nvvm_fma_rz_f:
+        case Intrinsic::nvvm_fma_rm_d:
+        case Intrinsic::nvvm_fma_rn_d:
+        case Intrinsic::nvvm_fma_rp_d:
+        case Intrinsic::nvvm_fma_rz_d:
+        case Intrinsic::nvvm_fma_rm_ftz_f:
+        case Intrinsic::nvvm_fma_rn_ftz_f:
+        case Intrinsic::nvvm_fma_rp_ftz_f:
+        case Intrinsic::nvvm_fma_rz_ftz_f: {
+          bool IsFTZ = nvvm::FMAShouldFTZ(IntrinsicID);
+          APFloat A = IsFTZ ? FTZPreserveSign(C1) : C1;
+          APFloat B = IsFTZ ? FTZPreserveSign(C2) : C2;
+          APFloat C = IsFTZ ? FTZPreserveSign(C3) : C3;
+
+          APFloat::roundingMode RoundMode =
+              nvvm::GetFMARoundingMode(IntrinsicID);
+
+          APFloat Res = A;
+          APFloat::opStatus Status = Res.fusedMultiplyAdd(B, C, RoundMode);
+
+          if (!Res.isNaN() &&
+              (Status == APFloat::opOK || Status == APFloat::opInexact)) {
+            Res = IsFTZ ? FTZPreserveSign(Res) : Res;
+            return ConstantFP::get(Ty->getContext(), Res);
+          }
+          return nullptr;
+        }
+
         case Intrinsic::amdgcn_cubeid:
         case Intrinsic::amdgcn_cubema:
         case Intrinsic::amdgcn_cubesc:
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll
new file mode 100644
index 0000000000000..a3d87439d74cf
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll
@@ -0,0 +1,876 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s
+
+; Check constant-folding for NVVM add intrinsics with different rounding modes
+
+;###############################################################
+;#                    Add(1.25, -2.0)                          #
+;###############################################################
+; Tests addition of two normal numbers (1.25 and -2.0) where the result
+; is exactly representable. All rounding modes produce the same result.
+
+define double @test_1_25_minus_2_rm_d() {
+; CHECK-LABEL: define double @test_1_25_minus_2_rm_d() {
+; CHECK-NEXT:    ret double -7.500000e-01
+;
+  %res = call double @llvm.nvvm.add.rm.d(double 1.25, double -2.0)
+  ret double %res
+}
+
+define double @test_1_25_minus_2_rn_d() {
+; CHECK-LABEL: define double @test_1_25_minus_2_rn_d() {
+; CHECK-NEXT:    ret double -7.500000e-01
+;
+  %res = call double @llvm.nvvm.add.rn.d(double 1.25, double -2.0)
+  ret double %res
+}
+
+define double @test_1_25_minus_2_rp_d() {
+; CHECK-LABEL: define double @test_1_25_minus_2_rp_d() {
+; CHECK-NEXT:    ret double -7.500000e-01
+;
+  %res = call double @llvm.nvvm.add.rp.d(double 1.25, double -2.0)
+  ret double %res
+}
+
+define double @test_1_25_minus_2_rz_d() {
+; CHECK-LABEL: define double @test_1_25_minus_2_rz_d() {
+; CHECK-NEXT:    ret double -7.500000e-01
+;
+  %res = call double @llvm.nvvm.add.rz.d(double 1.25, double -2.0)
+  ret double %res
+}
+
+define float @test_1_25_minus_2_rm_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rm_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rm.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rn_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rn_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rn.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rp_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rp_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rp.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rz_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rz_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rm_ftz_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rn_ftz_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rp_ftz_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_1_25_minus_2_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_minus_2_rz_ftz_f() {
+; CHECK-NEXT:    ret float -7.500000e-01
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+;###############################################################
+;#                          Add(0.0, NaN)                      #
+;###############################################################
+; Tests addition of a zero with NaN.
+; The result is always NaN and the operation is not constant-folded.
+
+define double @test_zero_plus_nan_rm_d() {
+; CHECK-LABEL: define double @test_zero_plus_nan_rm_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.add.rm.d(double 0.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.add.rm.d(double 0.0, double 0x7ff4444400000000)
+  ret double %res
+}
+
+define double @test_zero_plus_nan_rn_d() {
+; CHECK-LABEL: define double @test_zero_plus_nan_rn_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.add.rn.d(double 0.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.add.rn.d(double 0.0, double 0x7ff4444400000000)
+  ret double %res
+}
+
+define double @test_zero_plus_nan_rp_d() {
+; CHECK-LABEL: define double @test_zero_plus_nan_rp_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.add.rp.d(double 0.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.add.rp.d(double 0.0, double 0x7ff4444400000000)
+  ret double %res
+}
+
+define double @test_zero_plus_nan_rz_d() {
+; CHECK-LABEL: define double @test_zero_plus_nan_rz_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.add.rz.d(double 0.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.add.rz.d(double 0.0, double 0x7ff4444400000000)
+  ret double %res
+}
+
+define float @test_zero_plus_nan_rm_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rm_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rm.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rm.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rn_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rn_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rn.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rn.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rp_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rp_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rp.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rp.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rz_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rz.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rz.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rm_ftz_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rm_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rm.ftz.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rn_ftz_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rn_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rn.ftz.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rp_ftz_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rp_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rp.ftz.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_zero_plus_nan_rz_ftz_f() {
+; CHECK-LABEL: define float @test_zero_plus_nan_rz_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.add.rz.ftz.f(float 0.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float 0.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+;###############################################################
+;#                Add(Subnormal, Subnormal) -> Normal          #
+;###############################################################
+; Tests addition of two positive subnormal numbers (2^-127)
+; - Without FTZ: The result is the sum of the subnormals (2^-126) - a normal number
+; - With FTZ: The inputs are flushed to zero, so the result is zero (despite the output being normal)
+
+define double @test_subnorm_plus_subnorm_to_normal_rm_d() {
+; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rm_d() {
+; CHECK-NEXT:    ret double 0x3810000000000000
+;
+  %res = call double @llvm.nvvm.add.rm.d(double 0x3800000000000000, double 0x3800000000000000)
+  ret double %res
+}
+
+define double @test_subnorm_plus_subnorm_to_normal_rn_d() {
+; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rn_d() {
+; CHECK-NEXT:    ret double 0x3810000000000000
+;
+  %res = call double @llvm.nvvm.add.rn.d(double 0x3800000000000000, double 0x3800000000000000)
+  ret double %res
+}
+
+define double @test_subnorm_plus_subnorm_to_normal_rp_d() {
+; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rp_d() {
+; CHECK-NEXT:    ret double 0x3810000000000000
+;
+  %res = call double @llvm.nvvm.add.rp.d(double 0x3800000000000000, double 0x3800000000000000)
+  ret double %res
+}
+
+define double @test_subnorm_plus_subnorm_to_normal_rz_d() {
+; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rz_d() {
+; CHECK-NEXT:    ret double 0x3810000000000000
+;
+  %res = call double @llvm.nvvm.add.rz.d(double 0x3800000000000000, double 0x3800000000000000)
+  ret double %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rm_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rm_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.add.rm.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rn_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rn_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.add.rn.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rp_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rp_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.add.rp.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rz_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rz_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.add.rz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rm_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rn_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rp_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_subnorm_plus_subnorm_to_normal_rz_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                  Add(Normal, -Subnormal) -> Subnormal       #
+;###############################################################
+; Tests addition of 2^-126 (the smallest normal number) and -(2^127).
+; - Without FTZ: The result is correctly computed as a subnormal (2^127)
+; - With FTZ: The result is flushed to zero.
+; This verifies that the output is also flushed to zero, as we'd end up
+; with 2^-126 if we only flushed the inputs.
+
+define double @test_normal_minus_subnorm_to_subnorm_rm_d() {
+; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rm_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.add.rm.d(double 0x3810000000000000, double 0xB800000000000000)
+  ret double %res
+}
+
+define double @test_normal_minus_subnorm_to_subnorm_rn_d() {
+; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rn_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.add.rn.d(double 0x3810000000000000, double 0xB800000000000000)
+  ret double %res
+}
+
+define double @test_normal_minus_subnorm_to_subnorm_rp_d() {
+; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rp_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.add.rp.d(double 0x3810000000000000, double 0xB800000000000000)
+  ret double %res
+}
+
+define double @test_normal_minus_subnorm_to_subnorm_rz_d() {
+; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rz_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.add.rz.d(double 0x3810000000000000, double 0xB800000000000000)
+  ret double %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rm_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rm_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.add.rm.f(float 0x3810000000000000, float 0xB800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rn_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rn_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.add.rn.f(float 0x3810000000000000, float 0xB800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rp_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rp_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.add.rp.f(float 0x3810000000000000, float 0xB800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rz_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rz_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.add.rz.f(float 0x3810000000000000, float 0xB800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float 0x3810000000000000, float 0xB800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float 0x3810000000000000, float 0xB800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+define float @test_normal_minus_subnorm_to_subnorm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float 0x3800000000000000, float 0x3800000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Add(1.0, 2^(-25))                        #
+;###############################################################
+; Tests addition of 1.0 and 2^(-25) where the exact result falls between
+; 1.0 and 1.0 + 2^(-23):
+; - RN, RZ, RM: Return 1.0 (rounding toward nearest/zero/down)
+; - RP: Returns 1.0 + 2^(-23) (rounding up)
+
+define float @test_1_plus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rm_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rm.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rn_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rp_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.add.rp.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rz.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_1_plus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_plus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Add(1.0, 2^(-54))                        #
+;###############################################################
+; Tests addition of 1.0 and 2^(-54) where the exact result falls between
+; 1.0 and 1.0 + 2^(-52):
+; - RN, RZ, RM: Return 1.0 (rounding to nearest/zero/down)
+; - RP: Returns 1.0 + 2^(-52) (rounding up)
+
+define double @test_1_plus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_1_plus_ulp_rm_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rm.d(double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_1_plus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_1_plus_ulp_rn_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rn.d(double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_1_plus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_1_plus_ulp_rp_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.add.rp.d(double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_1_plus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_1_plus_ulp_rz_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rz.d(double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+;###############################################################
+;#                       Add(-1.0, 2^(-25))                    #
+;###############################################################
+; Tests addition of -1.0 and 2^(-25)) where the exact result falls between
+; -1.0 and -1.0 + 2^(-23):
+; - RN, RM: Returns -1.0 (rounding toward nearest/down)
+; - RZ, RP: Return -1.0 + 2^(-23) (rounding toward zero/up)
+
+define float @test_neg_1_plus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rm_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rm.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rn_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rp_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rp.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rz.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_plus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_plus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                       Add(-1.0, 2^(-54))                    #
+;###############################################################
+; Tests addition of -1.0 and 2^(-54) where the exact result falls between
+; -1.0 and -1.0 + 2^(-52):
+; - RN, RM: Return -1.0 (rounding toward nearest/down)
+; - RZ, RP: Return -1.0 + 2^(-52) (rounding toward zero/up)
+
+define double @test_neg_1_plus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_neg_1_plus_ulp_rm_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rm.d(double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_neg_1_plus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_neg_1_plus_ulp_rn_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rn.d(double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_neg_1_plus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_neg_1_plus_ulp_rp_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.add.rp.d(double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_neg_1_plus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_neg_1_plus_ulp_rz_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.add.rz.d(double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+;###############################################################
+;#                    Add(1.0, -2^(-25))                       #
+;###############################################################
+; Tests addition of 1.0 and -2^(-25) where the exact result falls between
+; 1.0 and 1.0 - 2^(-23):
+; - RN, RP: Return 1.0 (rounding toward nearest/up)
+; - RZ, RM: Return 1.0 - 2^(-23) (rounding toward zero/down)
+
+define float @test_1_minus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rm_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rm.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rn_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rp_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rp.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rz.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_1_minus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_minus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Add(1.0, -2^(-54))                       #
+;###############################################################
+; Tests addition of 1.0 and -2^(-54) where the exact result falls between
+; 1.0 and 1.0 - 2^(-52):
+; - RN, RP: Return 1.0 (rounding toward nearest/up)
+; - RZ, RM: Return 1.0 - 2^(-52) (rounding toward zero/down)
+
+define double @test_1_minus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_1_minus_ulp_rm_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.add.rm.d(double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_1_minus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_1_minus_ulp_rn_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rn.d(double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_1_minus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_1_minus_ulp_rp_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rp.d(double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_1_minus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_1_minus_ulp_rz_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.add.rz.d(double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+;###############################################################
+;#                    Add(-1.0, -2^(-25))                      #
+;###############################################################
+; Tests addition of -1.0 and -2^(-25) where the exact result falls between
+; -1.0 and -1.0 - 2^(-23):
+; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up)
+; - RM: Return -1.0 - 2^(-23) (rounding down)
+
+define float @test_neg_1_minus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rm_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.add.rm.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rn_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rp_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rp.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rz.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.add.rm.ftz.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rn.ftz.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rp.ftz.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_neg_1_minus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_neg_1_minus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.add.rz.ftz.f(float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Add(-1.0, -2^(-54))                      #
+;###############################################################
+; Tests addition of -1.0 and -2^(-54) where the exact result falls between
+; -1.0 and -1.0 - 2^(-52):
+; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up)
+; - RM: Return -1.0 - 2^(-52) (rounding down)
+
+define double @test_neg_1_minus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_neg_1_minus_ulp_rm_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.add.rm.d(double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_neg_1_minus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_neg_1_minus_ulp_rn_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rn.d(double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_neg_1_minus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_neg_1_minus_ulp_rp_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rp.d(double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_neg_1_minus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_neg_1_minus_ulp_rz_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.add.rz.d(double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll
new file mode 100644
index 0000000000000..fab674cd9069b
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll
@@ -0,0 +1,880 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s
+
+; Check constant-folding for NVVM divide intrinsics with different rounding modes
+
+;###############################################################
+;#                    Div(1.25, 2.0)                           #
+;###############################################################
+; Tests division of two normal numbers (1.25 by 2.0) where the result
+; is exactly representable. All rounding modes should produce the same result.
+
+define double @test_1_25_div_2_rm_d() {
+; CHECK-LABEL: define double @test_1_25_div_2_rm_d() {
+; CHECK-NEXT:    ret double 6.250000e-01
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define double @test_1_25_div_2_rn_d() {
+; CHECK-LABEL: define double @test_1_25_div_2_rn_d() {
+; CHECK-NEXT:    ret double 6.250000e-01
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define double @test_1_25_div_2_rp_d() {
+; CHECK-LABEL: define double @test_1_25_div_2_rp_d() {
+; CHECK-NEXT:    ret double 6.250000e-01
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define double @test_1_25_div_2_rz_d() {
+; CHECK-LABEL: define double @test_1_25_div_2_rz_d() {
+; CHECK-NEXT:    ret double 6.250000e-01
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define float @test_1_25_div_2_rm_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rm_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rn_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rn_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rp_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rp_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rz_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rz_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rm_ftz_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rn_ftz_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rp_ftz_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_div_2_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_div_2_rz_ftz_f() {
+; CHECK-NEXT:    ret float 6.250000e-01
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    Div(Subnormal, 1.0)                      #
+;###############################################################
+; Tests division of a subnormal number by 1.0 to verify FTZ behavior.
+; For float, we use 2^-149 (smallest subnormal).
+; For double, we use 2^-1074 (smallest subnormal).
+; Without FTZ, the result should be the subnormal number.
+; With FTZ, the result should be 0.0.
+
+define double @test_subnorm_div_1_rm_d() {
+; CHECK-LABEL: define double @test_subnorm_div_1_rm_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0x0000000000000001, double 1.0)
+  ret double %res
+}
+
+define double @test_subnorm_div_1_rn_d() {
+; CHECK-LABEL: define double @test_subnorm_div_1_rn_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0x0000000000000001, double 1.0)
+  ret double %res
+}
+
+define double @test_subnorm_div_1_rp_d() {
+; CHECK-LABEL: define double @test_subnorm_div_1_rp_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0x0000000000000001, double 1.0)
+  ret double %res
+}
+
+define double @test_subnorm_div_1_rz_d() {
+; CHECK-LABEL: define double @test_subnorm_div_1_rz_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0x0000000000000001, double 1.0)
+  ret double %res
+}
+
+define float @test_subnorm_div_1_rm_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rm_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rn_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rn_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rp_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rp_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rz_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rz_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+define float @test_subnorm_div_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_subnorm_div_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x36A0000000000000, float 1.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    Div(Normal, Normal) -> Subnormal         #
+;###############################################################
+; Tests division of two normal numbers that produces a subnormal result.
+; We divide the smallest normal float (2^-126 or 2^-1022 for doubles) by 2 to get 2^-127 (or 2^-1023),
+; which is subnormal. This tests the transition from normal to subnormal numbers.
+
+define double @test_normal_div_normal_to_subnorm_rm_d() {
+; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rm_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0x10000000000000, double 2.0)
+  ret double %res
+}
+
+define double @test_normal_div_normal_to_subnorm_rn_d() {
+; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rn_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0x10000000000000, double 2.0)
+  ret double %res
+}
+
+define double @test_normal_div_normal_to_subnorm_rp_d() {
+; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rp_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0x10000000000000, double 2.0)
+  ret double %res
+}
+
+define double @test_normal_div_normal_to_subnorm_rz_d() {
+; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rz_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0x10000000000000, double 2.0)
+  ret double %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rm_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rm_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rn_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rn_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rp_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rp_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rz_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rz_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+define float @test_normal_div_normal_to_subnorm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x3810000000000000, float 2.0)
+  ret float %res
+}
+
+;###############################################################
+;#              Div( 4/3 + epsilon , 4/3 - epsilon)            #
+;###############################################################
+; Tests division of numbers just above and just below 4/3.
+; The result falls between 1.0 and 1.0 + 2^-23
+; - RZ, RM round to 1.0 (rounding towards zero/down)
+; - RN, RP rounds to 1.0 + 2^-23 (rounding towards nearest/up)
+
+define float @test_div_just_above_1_rm_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rm_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rn_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rn_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rp_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rp_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rz_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_above_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+;###############################################################
+;#               Div(4/3 + epsilon / 4/3 - epsilon)            #
+;###############################################################
+; Tests division of numbers just above and just below 4/3.
+; The result falls between 1.0 and 1.0 + 2^-52
+; - RZ, RM round to 1.0 (rounding towards zero/down)
+; - RN, RP rounds to 1.0 + 2^-23 (rounding towards nearest/up)
+
+define double @test_div_just_above_1_rm_d() {
+; CHECK-LABEL: define double @test_div_just_above_1_rm_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0x3FF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+define double @test_div_just_above_1_rn_d() {
+; CHECK-LABEL: define double @test_div_just_above_1_rn_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0x3FF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+define double @test_div_just_above_1_rp_d() {
+; CHECK-LABEL: define double @test_div_just_above_1_rp_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0x3FF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+define double @test_div_just_above_1_rz_d() {
+; CHECK-LABEL: define double @test_div_just_above_1_rz_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0x3FF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+;###############################################################
+;#            Div( -(4/3 + epsilon),  4/3 - epsilon  )         #
+;###############################################################
+; Tests division of numbers just below -4/3 and just below 4/3.
+; The result falls between -1.0 and -1.0 - 2^-52
+; - RZ, RP round to -1.0 (rounding towards zero/up)
+; - RN, RM rounds to -1.0 - 2^-52 (rounding towards nearest/down)
+
+define float @test_div_just_below_negative_1_rm_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rm_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rn_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rn_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rp_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rp_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rz_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+define float @test_div_just_below_negative_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_negative_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000)
+  ret float %res
+}
+
+;###############################################################
+;#            Div( -(4/3 + epsilon),  4/3 - epsilon  )         #
+;###############################################################
+; Tests division of numbers just below -4/3 and just below 4/3.
+; The result falls between -1.0 and -1.0 - 2^-52
+; - RZ, RP round to -1.0 (rounding towards zero/up)
+; - RN, RM rounds to -1.0 - 2^-52 (rounding towards nearest/down)
+
+define double @test_div_just_below_negative_1_rm_d() {
+; CHECK-LABEL: define double @test_div_just_below_negative_1_rm_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0xBFF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+define double @test_div_just_below_negative_1_rn_d() {
+; CHECK-LABEL: define double @test_div_just_below_negative_1_rn_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0xBFF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+define double @test_div_just_below_negative_1_rp_d() {
+; CHECK-LABEL: define double @test_div_just_below_negative_1_rp_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0xBFF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+define double @test_div_just_below_negative_1_rz_d() {
+; CHECK-LABEL: define double @test_div_just_below_negative_1_rz_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0xBFF5555555555555, double 0x3FF5555555555554)
+  ret double %res
+}
+
+;###############################################################
+;#                    Div(~4/3 , ~4/3 + epsilon)               #
+;###############################################################
+; Tests division of ~4/3 by a value just over 4/3
+; The exact result falls between 1.0 - 2^23 ( = 0x3FEFFFFFC0000000)
+; and 1.0 - (2^23 + 2^24)  ( = 0x3FEFFFFFA0000000).
+; - RN, RZ, RM round to 1.0 - 2^-23 - 2^-24 (rounding towards nearest/zero/down)
+; - RP rounds to 1.0 - 2^-23 (rounding up)
+
+define float @test_div_just_below_1_rm_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rm_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rn_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rn_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rp_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rp_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFC0000000
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rz_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFC0000000
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_below_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_below_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Div(~4/3 , ~4/3 + epsilon)               #
+;###############################################################
+; Tests division of ~4/3 by a value just over 4/3
+; The exact result falls between 1.0 - 2^-51 - 2^-52  ( = 0x3FEFFFFFFFFFFFFA)
+; and 1.0 - 2^-51 - 2^-53  ( = 0x3FEFFFFFFFFFFFFB).
+; - RN, RZ, RM round to 1.0 - 2^-51 - 2^-52 (rounding towards nearest/zero/down)
+; - RP rounds to 1.0 - 2^-51 - 2^-53 (rounding up)
+
+define double @test_div_just_below_1_rm_d() {
+; CHECK-LABEL: define double @test_div_just_below_1_rm_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFA
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0x3FF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+define double @test_div_just_below_1_rn_d() {
+; CHECK-LABEL: define double @test_div_just_below_1_rn_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFA
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0x3FF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+define double @test_div_just_below_1_rp_d() {
+; CHECK-LABEL: define double @test_div_just_below_1_rp_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFB
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0x3FF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+define double @test_div_just_below_1_rz_d() {
+; CHECK-LABEL: define double @test_div_just_below_1_rz_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFA
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0x3FF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+;###############################################################
+;#                   Div(-4/3, ~4/3 + epsilon)                 #
+;###############################################################
+; Tests division of ~4/3 by a value just over 4/3
+; The exact result falls between 1.0 - 2^23 ( = 0x3FEFFFFFC0000000)
+; and 1.0 - (2^23 + 2^24)  ( = 0x3FEFFFFFA0000000).
+; - RN, RZ, RP round to -1.0 + 2^-23 + + 2^-24 (rounding towards nearest/zero/up)
+; - RM rounds to -1.0 + 2^-23 (rounding up)
+
+define float @test_div_just_above_negative_1_rm_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rm_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFC0000000
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rn_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rn_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rp_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rp_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rz_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFC0000000
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+define float @test_div_just_above_negative_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_div_just_above_negative_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFA0000000
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Div(~4/3 , ~4/3 + epsilon)               #
+;###############################################################
+; Tests division of ~4/3 by a value just over 4/3
+; The exact result falls between -1.0 + 2^-51 + 2^-52  ( = 0x3FEFFFFFFFFFFFFA)
+; and -1.0 + 2^-51 + 2^-53  ( = 0x3FEFFFFFFFFFFFFB).
+; - RN, RZ, RP round to -1.0 + 2^-51 + 2^-52 (rounding towards nearest/zero/up)
+; - RM rounds to -1.0 + 2^-51 + 2^-53 (rounding down)
+
+define double @test_div_just_above_negative_1_rm_d() {
+; CHECK-LABEL: define double @test_div_just_above_negative_1_rm_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFB
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0xBFF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+define double @test_div_just_above_negative_1_rn_d() {
+; CHECK-LABEL: define double @test_div_just_above_negative_1_rn_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFA
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0xBFF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+define double @test_div_just_above_negative_1_rp_d() {
+; CHECK-LABEL: define double @test_div_just_above_negative_1_rp_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFA
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0xBFF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+define double @test_div_just_above_negative_1_rz_d() {
+; CHECK-LABEL: define double @test_div_just_above_negative_1_rz_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFA
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0xBFF5555555555554, double 0x3FF5555555555558)
+  ret double %res
+}
+
+;###############################################################
+;#                    Div(NaN, NaN)                           #
+;###############################################################
+; Tests division of NaN by NaN to verify that constant folding is not performed
+; when the result would be NaN.
+
+define float @test_nan_div_nan_rm_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rm_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rm.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rm.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rn_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rn_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rn.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rn.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rp_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rp_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rp.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rp.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rz_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rm_ftz_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rm_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rm.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rn_ftz_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rn_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rn.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rp_ftz_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rp_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rp.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define float @test_nan_div_nan_rz_ftz_f() {
+; CHECK-LABEL: define float @test_nan_div_nan_rz_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.div.rz.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000)
+  ret float %res
+}
+
+define double @test_nan_div_nan_rm_d() {
+; CHECK-LABEL: define double @test_nan_div_nan_rm_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.div.rm.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.div.rm.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+  ret double %res
+}
+
+define double @test_nan_div_nan_rn_d() {
+; CHECK-LABEL: define double @test_nan_div_nan_rn_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.div.rn.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.div.rn.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+  ret double %res
+}
+
+define double @test_nan_div_nan_rp_d() {
+; CHECK-LABEL: define double @test_nan_div_nan_rp_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.div.rp.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.div.rp.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+  ret double %res
+}
+
+define double @test_nan_div_nan_rz_d() {
+; CHECK-LABEL: define double @test_nan_div_nan_rz_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.div.rz.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.div.rz.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF)
+  ret double %res
+}
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll
new file mode 100644
index 0000000000000..d52ff1ce66440
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll
@@ -0,0 +1,874 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s
+
+; Check constant-folding for NVVM FMA intrinsics with different rounding modes
+
+;###############################################################
+;#                    FMA(2.0, 3.0, 4.0)                       #
+;###############################################################
+; Tests FMA with regular numbers that produce a precise result
+
+define double @test_fma_2_3_4_rm_d() {
+; CHECK-LABEL: define double @test_fma_2_3_4_rm_d() {
+; CHECK-NEXT:    ret double 1.000000e+01
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 2.0, double 3.0, double 4.0)
+  ret double %res
+}
+
+define double @test_fma_2_3_4_rn_d() {
+; CHECK-LABEL: define double @test_fma_2_3_4_rn_d() {
+; CHECK-NEXT:    ret double 1.000000e+01
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 2.0, double 3.0, double 4.0)
+  ret double %res
+}
+
+define double @test_fma_2_3_4_rp_d() {
+; CHECK-LABEL: define double @test_fma_2_3_4_rp_d() {
+; CHECK-NEXT:    ret double 1.000000e+01
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 2.0, double 3.0, double 4.0)
+  ret double %res
+}
+
+define double @test_fma_2_3_4_rz_d() {
+; CHECK-LABEL: define double @test_fma_2_3_4_rz_d() {
+; CHECK-NEXT:    ret double 1.000000e+01
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 2.0, double 3.0, double 4.0)
+  ret double %res
+}
+
+define float @test_fma_2_3_4_rm_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rm_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rn_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rn_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rp_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rp_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rz_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rz_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rm_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rn_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rp_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+define float @test_fma_2_3_4_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_2_3_4_rz_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+01
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 2.0, float 3.0, float 4.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(Subnormal, 2.0, 0.0) -> Normal       #
+;###############################################################
+; Tests FMA with subnormal multiplier that produces a normal result
+; For float: 2^-127 * 2.0 = 2^-126 (smallest normal)
+; For double: 2^-1023 * 2.0 = 2^-1022 (smallest normal)
+; The FTZ variants should return 0.0, as they flush their input to zero.
+
+define double @test_fma_subnorm_to_norm_rm_d() {
+; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rm_d() {
+; CHECK-NEXT:    ret double 0x10000000000000
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 0x0008000000000000, double 2.0, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_subnorm_to_norm_rn_d() {
+; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rn_d() {
+; CHECK-NEXT:    ret double 0x10000000000000
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 0x0008000000000000, double 2.0, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_subnorm_to_norm_rp_d() {
+; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rp_d() {
+; CHECK-NEXT:    ret double 0x10000000000000
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 0x0008000000000000, double 2.0, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_subnorm_to_norm_rz_d() {
+; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rz_d() {
+; CHECK-NEXT:    ret double 0x10000000000000
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 0x0008000000000000, double 2.0, double 0.0)
+  ret double %res
+}
+
+define float @test_fma_subnorm_to_norm_rm_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rm_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rn_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rn_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rp_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rp_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rz_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rz_f() {
+; CHECK-NEXT:    ret float 0x3810000000000000
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_subnorm_to_norm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 0x3800000000000000, float 2.0, float 0.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(Normal, 0.5, 0.0) -> Subnormal       #
+;###############################################################
+; Tests FMA with normal inputs that produce a subnormal result
+; For float: 2^-126 * 0.5 = 2^-127 (subnormal)
+; For double: 2^-1022 * 0.5 = 2^-1023 (subnormal)
+; With FTZ mode, the subnormal output should be flushed to zero.
+
+define double @test_fma_norm_to_subnorm_rm_d() {
+; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rm_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 0x10000000000000, double 0.5, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_norm_to_subnorm_rn_d() {
+; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rn_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 0x10000000000000, double 0.5, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_norm_to_subnorm_rp_d() {
+; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rp_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 0x10000000000000, double 0.5, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_norm_to_subnorm_rz_d() {
+; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rz_d() {
+; CHECK-NEXT:    ret double 0x8000000000000
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 0x10000000000000, double 0.5, double 0.0)
+  ret double %res
+}
+
+define float @test_fma_norm_to_subnorm_rm_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rm_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rn_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rn_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rp_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rp_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rz_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rz_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_norm_to_subnorm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 0x3810000000000000, float 0.5, float 0.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(NaN, 1.0, 0.0)                       #
+;###############################################################
+; Tests FMA with NaN input to verify that the instruction is preserved
+; since the result would be NaN. The instruction should not be folded.
+
+define double @test_fma_nan_rm_d() {
+; CHECK-LABEL: define double @test_fma_nan_rm_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.fma.rm.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 0x7FF8000000000000, double 1.0, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_nan_rn_d() {
+; CHECK-LABEL: define double @test_fma_nan_rn_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.fma.rn.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 0x7FF8000000000000, double 1.0, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_nan_rp_d() {
+; CHECK-LABEL: define double @test_fma_nan_rp_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.fma.rp.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 0x7FF8000000000000, double 1.0, double 0.0)
+  ret double %res
+}
+
+define double @test_fma_nan_rz_d() {
+; CHECK-LABEL: define double @test_fma_nan_rz_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.fma.rz.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 0x7FF8000000000000, double 1.0, double 0.0)
+  ret double %res
+}
+
+define float @test_fma_nan_rm_f() {
+; CHECK-LABEL: define float @test_fma_nan_rm_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rm.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rn_f() {
+; CHECK-LABEL: define float @test_fma_nan_rn_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rn.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rp_f() {
+; CHECK-LABEL: define float @test_fma_nan_rp_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rp.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rz_f() {
+; CHECK-LABEL: define float @test_fma_nan_rz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_nan_rm_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rm.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_nan_rn_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rn.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_nan_rp_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rp.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+define float @test_fma_nan_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_nan_rz_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.fma.rz.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(1.0, 1.0, 2^(-25))                   #
+;###############################################################
+; Tests FMA with 1.0 and 2^(-25) where different rounding modes produce different results.
+; The exact result falls between 1.0 and 1.0 + 2^(-23).
+; RM, RN, and RZ return 1.0, while RP returns 1.0 + 2^(-23) (0x3F800001).
+
+define float @test_fma_1_plus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rm_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rn_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rp_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_plus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_plus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(1.0, 1.0, 2^(-54))                   #
+;###############################################################
+; Tests FMA with 1.0 and 2^(-54) where different rounding modes produce different results.
+; The exact result falls between 1.0 and 1.0 + 2^(-52).
+; - RN, RZ, RM: Return 1.0 (rounding to nearest/zero/down)
+; - RP: Returns 1.0 + 2^(-52) (rounding up)
+
+define double @test_fma_1_plus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_fma_1_plus_ulp_rm_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_fma_1_plus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_fma_1_plus_ulp_rn_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_fma_1_plus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_fma_1_plus_ulp_rp_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_fma_1_plus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_fma_1_plus_ulp_rz_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double 1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+;###############################################################
+;#                    FMA(1.0, -1.0, 2^(-25))                  #
+;###############################################################
+; Tests FMA with -1.0 and 2^(-25) where different rounding modes produce different results.
+; The exact result falls between -1.0 and -1.0 + 2^(-23).
+; - RN, RM: Returns -1.0 (rounding toward nearest/down)
+; - RZ, RP: Return -1.0 + 2^(-23) (rounding toward zero/up)
+
+define float @test_fma_neg_1_plus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rm_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rn_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rp_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_plus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(1.0, -1.0, 2^(-54))                  #
+;###############################################################
+; Tests FMA with -1.0 and 2^(-54) where different rounding modes produce different results.
+; The exact result falls between -1.0 and -1.0 + 2^(-52).
+; - RN, RM: Return -1.0 (rounding toward nearest/down)
+; - RZ, RP: Return -1.0 + 2^(-52) (rounding toward zero/up)
+
+define double @test_fma_neg_1_plus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rm_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_fma_neg_1_plus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rn_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_fma_neg_1_plus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rp_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+define double @test_fma_neg_1_plus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rz_d() {
+; CHECK-NEXT:    ret double 0xBFEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double -1.0, double 0x3C90000000000000)
+  ret double %res
+}
+
+;###############################################################
+;#                    FMA(1.0, 1.0, -2^(-25))                  #
+;###############################################################
+; Tests FMA with 1.0 and -2^(-25) where different rounding modes produce different results.
+; The exact result falls between 1.0 and 1.0 - 2^(-23).
+; - RN, RP: Return 1.0 (rounding toward nearest/up)
+; - RZ, RM: Return 1.0 - 2^(-23) (rounding toward zero/down)
+
+define float @test_fma_1_minus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rm_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rn_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rp_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_1_minus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_1_minus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FEFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(1.0, 1.0, -2^(-54))                  #
+;###############################################################
+; Tests FMA with 1.0 and -2^(-54) where different rounding modes produce different results.
+; The exact result falls between 1.0 and 1.0 - 2^(-52).
+; - RN, RP: Return 1.0 (rounding toward nearest/up)
+; - RZ, RM: Return 1.0 - 2^(-52) (rounding toward zero/down)
+
+define double @test_fma_1_minus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_fma_1_minus_ulp_rm_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_fma_1_minus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_fma_1_minus_ulp_rn_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_fma_1_minus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_fma_1_minus_ulp_rp_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_fma_1_minus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_fma_1_minus_ulp_rz_d() {
+; CHECK-NEXT:    ret double 0x3FEFFFFFFFFFFFFF
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double 1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+;###############################################################
+;#                    FMA(1.0, -1.0, -2^(-25))                 #
+;###############################################################
+; Tests FMA with -1.0 and -2^(-25) where different rounding modes produce different results.
+; The exact result falls between -1.0 and -1.0 - 2^(-23).
+; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up)
+; - RM: Return -1.0 - 2^(-23) (rounding down)
+
+define float @test_fma_neg_1_minus_ulp_rm_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rm_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rn_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rn_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rp_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rp_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rm_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rn_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rn_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rp_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rp_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+define float @test_fma_neg_1_minus_ulp_rz_ftz_f() {
+; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rz_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMA(1.0, -1.0, -2^(-54))                 #
+;###############################################################
+; Tests FMA with -1.0 and -2^(-54) where different rounding modes produce different results.
+; The exact result falls between -1.0 and -1.0 - 2^(-52).
+; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up)
+; - RM: Return -1.0 - 2^(-52) (rounding down)
+
+define double @test_fma_neg_1_minus_ulp_rm_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rm_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_fma_neg_1_minus_ulp_rn_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rn_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_fma_neg_1_minus_ulp_rp_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rp_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
+
+define double @test_fma_neg_1_minus_ulp_rz_d() {
+; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rz_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double -1.0, double 0xBC90000000000000)
+  ret double %res
+}
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll
new file mode 100644
index 0000000000000..12391e8bf0631
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll
@@ -0,0 +1,994 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s
+
+; Check constant-folding for NVVM multiply intrinsics with different rounding modes
+
+;###############################################################
+;#                    Mul(1.25, 2.0)                           #
+;###############################################################
+; Tests multiplication of two normal numbers (1.25 and 2.0) where the result
+; is exactly representable. All rounding modes should produce the same result.
+
+define double @test_1_25_times_2_rm_d() {
+; CHECK-LABEL: define double @test_1_25_times_2_rm_d() {
+; CHECK-NEXT:    ret double 2.500000e+00
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define double @test_1_25_times_2_rn_d() {
+; CHECK-LABEL: define double @test_1_25_times_2_rn_d() {
+; CHECK-NEXT:    ret double 2.500000e+00
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define double @test_1_25_times_2_rp_d() {
+; CHECK-LABEL: define double @test_1_25_times_2_rp_d() {
+; CHECK-NEXT:    ret double 2.500000e+00
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define double @test_1_25_times_2_rz_d() {
+; CHECK-LABEL: define double @test_1_25_times_2_rz_d() {
+; CHECK-NEXT:    ret double 2.500000e+00
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 1.25, double 2.0)
+  ret double %res
+}
+
+define float @test_1_25_times_2_rm_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rm_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rn_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rn_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rp_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rp_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rz_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rz_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rm_ftz_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rn_ftz_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rp_ftz_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+define float @test_1_25_times_2_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_25_times_2_rz_ftz_f() {
+; CHECK-NEXT:    ret float 2.500000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 1.25, float 2.0)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(1.0, Subnormal)                      #
+;###############################################################
+; Tests multiplication of 1.0 by a subnormal number to verify FTZ behavior.
+; For float, we use 2^-149 (smallest subnormal float).
+; For double, we use 2^-1074 (smallest subnormal double).
+; Without FTZ, the result should be the subnormal number.
+; With FTZ, the result should be 0.0.
+
+define double @test_1_times_subnorm_rm_d() {
+; CHECK-LABEL: define double @test_1_times_subnorm_rm_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 1.0, double 0x0000000000000001)
+  ret double %res
+}
+
+define double @test_1_times_subnorm_rn_d() {
+; CHECK-LABEL: define double @test_1_times_subnorm_rn_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 1.0, double 0x0000000000000001)
+  ret double %res
+}
+
+define double @test_1_times_subnorm_rp_d() {
+; CHECK-LABEL: define double @test_1_times_subnorm_rp_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 1.0, double 0x0000000000000001)
+  ret double %res
+}
+
+define double @test_1_times_subnorm_rz_d() {
+; CHECK-LABEL: define double @test_1_times_subnorm_rz_d() {
+; CHECK-NEXT:    ret double 4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 1.0, double 0x0000000000000001)
+  ret double %res
+}
+
+define float @test_1_times_subnorm_rm_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rm_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rn_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rn_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rp_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rp_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rz_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rz_f() {
+; CHECK-NEXT:    ret float 0x36A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_subnorm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_subnorm_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 1.0, float 0x36A0000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(1.0, -Subnormal)                     #
+;###############################################################
+; Tests multiplication of 1.0 by a negative subnormal number to verify FTZ behavior.
+; For float, we use -2^-149 (negative smallest subnormal).
+; For double, we use -2^-1074 (negative smallest subnormal).
+; Without FTZ, the result should be the negative subnormal number.
+; With FTZ, the result should be -0.0.
+
+define double @test_1_times_neg_subnorm_rm_d() {
+; CHECK-LABEL: define double @test_1_times_neg_subnorm_rm_d() {
+; CHECK-NEXT:    ret double -4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 1.0, double 0x8000000000000001)
+  ret double %res
+}
+
+define double @test_1_times_neg_subnorm_rn_d() {
+; CHECK-LABEL: define double @test_1_times_neg_subnorm_rn_d() {
+; CHECK-NEXT:    ret double -4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 1.0, double 0x8000000000000001)
+  ret double %res
+}
+
+define double @test_1_times_neg_subnorm_rp_d() {
+; CHECK-LABEL: define double @test_1_times_neg_subnorm_rp_d() {
+; CHECK-NEXT:    ret double -4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 1.0, double 0x8000000000000001)
+  ret double %res
+}
+
+define double @test_1_times_neg_subnorm_rz_d() {
+; CHECK-LABEL: define double @test_1_times_neg_subnorm_rz_d() {
+; CHECK-NEXT:    ret double -4.940660e-324
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 1.0, double 0x8000000000000001)
+  ret double %res
+}
+
+define float @test_1_times_neg_subnorm_rm_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rm_f() {
+; CHECK-NEXT:    ret float 0xB6A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rn_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rn_f() {
+; CHECK-NEXT:    ret float 0xB6A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rp_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rp_f() {
+; CHECK-NEXT:    ret float 0xB6A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rz_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rz_f() {
+; CHECK-NEXT:    ret float 0xB6A0000000000000
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rm_ftz_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rn_ftz_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rp_ftz_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+define float @test_1_times_neg_subnorm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_1_times_neg_subnorm_rz_ftz_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 1.0, float 0xB6A0000000000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(Normal, Normal) -> Subnormal         #
+;###############################################################
+; Tests multiplication of two normal numbers that produces a subnormal result.
+; We multiply the smallest normal float (2^-126 = 0x3810000000000000) by 0.5 to get 2^-127,
+; which is subnormal. This tests the transition from normal to subnormal numbers.
+; For double precision, we just use the same float value since there is no FTZ variant.
+
+define double @test_normal_times_normal_to_subnorm_rm_d() {
+; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rm_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 0x3810000000000000, double 0.5)
+  ret double %res
+}
+
+define double @test_normal_times_normal_to_subnorm_rn_d() {
+; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rn_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 0x3810000000000000, double 0.5)
+  ret double %res
+}
+
+define double @test_normal_times_normal_to_subnorm_rp_d() {
+; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rp_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 0x3810000000000000, double 0.5)
+  ret double %res
+}
+
+define double @test_normal_times_normal_to_subnorm_rz_d() {
+; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rz_d() {
+; CHECK-NEXT:    ret double 0x3800000000000000
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 0x3810000000000000, double 0.5)
+  ret double %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rm_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rm_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rn_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rn_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rp_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rp_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rz_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rz_f() {
+; CHECK-NEXT:    ret float 0x3800000000000000
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rm_ftz_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rn_ftz_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rp_ftz_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+define float @test_normal_times_normal_to_subnorm_rz_ftz_f() {
+; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rz_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0x3810000000000000, float 0.5)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(2.0, NaN)                            #
+;###############################################################
+; Tests multiplication with NaN to verify that we do not fold these,
+; as host and device NaNs may be different.
+
+define double @test_2_times_nan_rm_d() {
+; CHECK-LABEL: define double @test_2_times_nan_rm_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.mul.rm.d(double 2.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 2.0, double 0x7FF4444400000000)
+  ret double %res
+}
+
+define double @test_2_times_nan_rn_d() {
+; CHECK-LABEL: define double @test_2_times_nan_rn_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.mul.rn.d(double 2.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 2.0, double 0x7FF4444400000000)
+  ret double %res
+}
+
+define double @test_2_times_nan_rp_d() {
+; CHECK-LABEL: define double @test_2_times_nan_rp_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.mul.rp.d(double 2.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 2.0, double 0x7FF4444400000000)
+  ret double %res
+}
+
+define double @test_2_times_nan_rz_d() {
+; CHECK-LABEL: define double @test_2_times_nan_rz_d() {
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.nvvm.mul.rz.d(double 2.000000e+00, double 0x7FF4444400000000)
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 2.0, double 0x7FF4444400000000)
+  ret double %res
+}
+
+define float @test_2_times_nan_rm_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rm_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rm.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rn_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rn_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rn.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rp_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rp_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rp.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rz_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rz.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rm_ftz_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rm_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rm.ftz.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rn_ftz_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rn_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rn.ftz.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rp_ftz_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rp_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rp.ftz.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+define float @test_2_times_nan_rz_ftz_f() {
+; CHECK-LABEL: define float @test_2_times_nan_rz_ftz_f() {
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.nvvm.mul.rz.ftz.f(float 2.000000e+00, float 0x7FFF444400000000)
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 2.0, float 0x7FFF444400000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(0.75, 4/3 + epsilon)                 #
+;###############################################################
+; Tests multiplication of 0.75 (3/4) by a value slightly above 4/3,
+; where different rounding modes produce different results.
+; The exact result would be 1.0, but since 4/3 cannot be exactly encoded
+; as a float, the calculated result falls between 1.0 and 1.0 + 2^-23.
+; - RN, RZ, RM round to 1.0 (rounding to nearest/zero/down)
+; - RP rounds to 1.0 + 2^-23 (rounding up)
+
+define float @test_mul_just_above_1_rm_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rm_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rn_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rn_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rp_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rp_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rz_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_above_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_above_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(0.75, 4/3 + epsilon)                 #
+;###############################################################
+; Tests multiplication of 0.75 by a value slightly above 4/3,
+; where different rounding modes produce different results.
+; The exact result would be 1.0, but since 4/3 cannot be exactly encoded
+; as a double, the calculated result falls between 1.0 and 1.0 + 2^-52.
+; - RN, RZ, RM round to 1.0 (rounding to nearest/zero/down)
+; - RP rounds to 1.0 + 2^-52 (rounding up)
+
+define double @test_mul_just_above_1_rm_d() {
+; CHECK-LABEL: define double @test_mul_just_above_1_rm_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+define double @test_mul_just_above_1_rn_d() {
+; CHECK-LABEL: define double @test_mul_just_above_1_rn_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+define double @test_mul_just_above_1_rp_d() {
+; CHECK-LABEL: define double @test_mul_just_above_1_rp_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+define double @test_mul_just_above_1_rz_d() {
+; CHECK-LABEL: define double @test_mul_just_above_1_rz_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+;###############################################################
+;#                    Mul(-0.75, 4/3 + epsilon)                #
+;###############################################################
+; Tests multiplication of -0.75 by a value slightly above 4/3,
+; where different rounding modes produce different results.
+; The exact result would be -1.0, but since 4/3 cannot be exactly encoded
+; as a double, the calculated result falls between -1.0 and -1.0 - 2^-23.
+; - RN, RZ, RP round to -1.0 (rounding to nearest/zero/up)
+; - RM rounds to -1.0 - 2^-23 (rounding down)
+
+define float @test_mul_just_below_negative_1_rm_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rm_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rn_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rn_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rp_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rp_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rz_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+define float @test_mul_just_below_negative_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_mul_just_below_negative_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float -0.75, float 0x3FF5555560000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(-0.75, 4/3 + epsilon)                #
+;###############################################################
+; Tests multiplication of -0.75 by a value slightly above 4/3,
+; where different rounding modes produce different results.
+; The exact result would be -1.0, but since 4/3 cannot be exactly encoded
+; as a double, the calculated result falls between -1.0 and -1.0 - 2^-52.
+; - RN, RZ, RP round to -1.0 (rounding to nearest/zero/up)
+; - RM rounds to -1.0 - 2^-52 (rounding down)
+
+define double @test_mul_just_below_negative_1_rm_d() {
+; CHECK-LABEL: define double @test_mul_just_below_negative_1_rm_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double -0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+define double @test_mul_just_below_negative_1_rn_d() {
+; CHECK-LABEL: define double @test_mul_just_below_negative_1_rn_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double -0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+define double @test_mul_just_below_negative_1_rp_d() {
+; CHECK-LABEL: define double @test_mul_just_below_negative_1_rp_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double -0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+define double @test_mul_just_below_negative_1_rz_d() {
+; CHECK-LABEL: define double @test_mul_just_below_negative_1_rz_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double -0.75, double 0x3FF5555555555556)
+  ret double %res
+}
+
+;###############################################################
+;#                   Mul(0.625, 1.6 + epsilon)                 #
+;###############################################################
+; Tests multiplication of 5/8 * ~8/5 with different rounding modes.
+; Multiply 0.625 (5/8) by a value very slightly above 8/5 = 1.6 + epsilon.
+; The exact result is between 1.0 and 1.0 + 2^-23
+; - RN, RP round to 1.0 + 2^-23 (rounding towards nearest/up)
+; - RZ, RM round to 1.0 (rounding towards zero/down)
+
+define float @test_mul_slightly_more_above_1_rm_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rm_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 0x3FE4000000000000, float 0x3FF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rn_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rn_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 0x3FE4000000000000, float 0x3FF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rp_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rp_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 )
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 )
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 )
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 )
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float 0x3FF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 )
+  ret float %res
+}
+
+define float @test_mul_slightly_more_above_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 )
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(0.625, 1.6 + epsilon)                #
+;###############################################################
+; Tests multiplication of 5/8 * ~8/5  with different rounding modes.
+; Multiply 0.625 (5/8) by a value very slightly above 8/5 = 1.6 + epsilon.
+; The exact result is between 1.0 and 1.0 + 2^-52
+; - RN, RP round to 1.0 + 2^-52 (rounding towards nearest/up)
+; - RZ, RM round to 1.0 (rounding towards zero/down)
+
+define double @test_mul_slightly_more_above_1_rm_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rm_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 0x3FE4000000000000, double 0x3FF999999999999B)
+  ret double %res
+}
+
+define double @test_mul_slightly_more_above_1_rn_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rn_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 0x3FE4000000000000, double 0x3FF999999999999B)
+  ret double %res
+}
+
+define double @test_mul_slightly_more_above_1_rp_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rp_d() {
+; CHECK-NEXT:    ret double 0x3FF0000000000001
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 0x3FE4000000000000, double 0x3FF999999999999B)
+  ret double %res
+}
+
+define double @test_mul_slightly_more_above_1_rz_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rz_d() {
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 0x3FE4000000000000, double 0x3FF999999999999B)
+  ret double %res
+}
+
+;###############################################################
+;#                    Mul(0.625, -(1.6 + epsilon)              #
+;###############################################################
+; Tests multiplication of 5/8 * ~-8/5 with different rounding modes.
+; Multiply 0.625 (5/8) by a value very slightly below -8/5 = -(1.6 + epsilon).
+; The exact result is between -1.0 and -1.0 + 2^-23
+; - RN, RM round to -1.0 - 2^-23 (rounding towards nearest/down)
+; - RZ, RP round to -1.0 (rounding towards zero/up)
+
+define float @test_mul_slightly_more_below_negative_1_rm_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rm_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rm.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rn_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rn_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rn.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rp_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rp_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rm_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rm_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rn_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rn_ftz_f() {
+; CHECK-NEXT:    ret float 0xBFF0000020000000
+;
+  %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rp_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rp_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+define float @test_mul_slightly_more_below_negative_1_rz_ftz_f() {
+; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rz_ftz_f() {
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000)
+  ret float %res
+}
+
+;###############################################################
+;#                    Mul(0.625, -(1.6 + epsilon)              #
+;###############################################################
+; Tests multiplication of 5/8 * ~-8/5 with different rounding modes.
+; Multiply 0.625 (5/8) by a value very slightly below -8/5 = -(1.6 + epsilon).
+; The exact result is between -1.0 and -1.0 + 2^-52
+; - RN, RM round to -1.0 - 2^-52 (rounding towards nearest/down)
+; - RZ, RP round to -1.0 (rounding towards zero/up)
+
+define double @test_mul_slightly_more_below_negative_1_rm_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rm_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.mul.rm.d(double 0x3FE4000000000000, double 0xBFF999999999999B)
+  ret double %res
+}
+
+define double @test_mul_slightly_more_below_negative_1_rn_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rn_d() {
+; CHECK-NEXT:    ret double 0xBFF0000000000001
+;
+  %res = call double @llvm.nvvm.mul.rn.d(double 0x3FE4000000000000, double 0xBFF999999999999B)
+  ret double %res
+}
+
+define double @test_mul_slightly_more_below_negative_1_rp_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rp_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rp.d(double 0x3FE4000000000000, double 0xBFF999999999999B)
+  ret double %res
+}
+
+define double @test_mul_slightly_more_below_negative_1_rz_d() {
+; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rz_d() {
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+  %res = call double @llvm.nvvm.mul.rz.d(double 0x3FE4000000000000, double 0xBFF999999999999B)
+  ret double %res
+}