[llvm] [NVPTX] Constant fold NVVM fmin and fmax (PR #121966)

Lewis Crawford via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 06:55:02 PST 2025


https://github.com/LewisCrawford updated https://github.com/llvm/llvm-project/pull/121966

>From 7cad862564c35f479825a7a32215fd7ece6219e1 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Tue, 7 Jan 2025 17:17:19 +0000
Subject: [PATCH 1/6] [NVPTX] Constant fold NVVM fmin and fmax

Add constant-folding for nvvm float/double fmin + fmax intrinsics,
including all combinations of xorsign.abs, nan-propagation, and ftz.
---
 llvm/include/llvm/IR/NVVMIntrinsicUtils.h     |  51 +-
 llvm/lib/Analysis/ConstantFolding.cpp         | 134 +++-
 .../InstSimplify/const-fold-nvvm-fmin-fmax.ll | 614 ++++++++++++++++++
 3 files changed, 796 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll

diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 8ca073ba822534..d533f944f90ff2 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -38,9 +38,8 @@ enum class TMAReductionOp : uint8_t {
   XOR = 7,
 };
 
-inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+inline bool FloatToIntIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
   switch (IntrinsicID) {
-  // Float to i32 / i64 conversion intrinsics:
   case Intrinsic::nvvm_f2i_rm_ftz:
   case Intrinsic::nvvm_f2i_rn_ftz:
   case Intrinsic::nvvm_f2i_rp_ftz:
@@ -171,6 +170,54 @@ IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
   return APFloat::roundingMode::Invalid;
 }
 
+inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_fmax_ftz_f:
+  case Intrinsic::nvvm_fmax_ftz_nan_f:
+  case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+
+  case Intrinsic::nvvm_fmin_ftz_f:
+  case Intrinsic::nvvm_fmin_ftz_nan_f:
+  case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+    return true;
+  }
+  return false;
+}
+
+inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_fmax_ftz_nan_f:
+  case Intrinsic::nvvm_fmax_nan_f:
+  case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+
+  case Intrinsic::nvvm_fmin_ftz_nan_f:
+  case Intrinsic::nvvm_fmin_nan_f:
+  case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+    return true;
+  }
+  return false;
+}
+
+inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+  case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_xorsign_abs_f:
+    return true;
+  }
+  return false;
+}
+
 } // namespace nvvm
 } // namespace llvm
 #endif // LLVM_IR_NVVMINTRINSICUTILS_H
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 031d675c330ec4..75150ed97aa7b4 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1689,6 +1689,28 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::x86_avx512_cvttsd2usi64:
     return !Call->isStrictFP();
 
+  // NVVM FMax intrinsics
+  case Intrinsic::nvvm_fmax_d:
+  case Intrinsic::nvvm_fmax_f:
+  case Intrinsic::nvvm_fmax_ftz_f:
+  case Intrinsic::nvvm_fmax_ftz_nan_f:
+  case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_nan_f:
+  case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+  // NVVM FMin intrinsics
+  case Intrinsic::nvvm_fmin_d:
+  case Intrinsic::nvvm_fmin_f:
+  case Intrinsic::nvvm_fmin_ftz_f:
+  case Intrinsic::nvvm_fmin_ftz_nan_f:
+  case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_nan_f:
+  case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_xorsign_abs_f:
+
   // NVVM float/double to int32/uint32 conversion intrinsics
   case Intrinsic::nvvm_f2i_rm:
   case Intrinsic::nvvm_f2i_rn:
@@ -2432,7 +2454,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
         return ConstantInt::get(Ty, 0);
 
       APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
-      bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID);
+      bool IsFTZ = nvvm::FloatToIntIntrinsicShouldFTZ(IntrinsicID);
       bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
 
       APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
@@ -2892,12 +2914,49 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
     case Intrinsic::minnum:
     case Intrinsic::maximum:
     case Intrinsic::minimum:
+    case Intrinsic::nvvm_fmax_d:
+    case Intrinsic::nvvm_fmin_d:
       // If one argument is undef, return the other argument.
       if (IsOp0Undef)
         return Operands[1];
       if (IsOp1Undef)
         return Operands[0];
       break;
+
+    case Intrinsic::nvvm_fmax_f:
+    case Intrinsic::nvvm_fmax_ftz_f:
+    case Intrinsic::nvvm_fmax_ftz_nan_f:
+    case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+    case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+    case Intrinsic::nvvm_fmax_nan_f:
+    case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+    case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+    case Intrinsic::nvvm_fmin_f:
+    case Intrinsic::nvvm_fmin_ftz_f:
+    case Intrinsic::nvvm_fmin_ftz_nan_f:
+    case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+    case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+    case Intrinsic::nvvm_fmin_nan_f:
+    case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+    case Intrinsic::nvvm_fmin_xorsign_abs_f:
+      // If one arg is undef, the other arg can be returned only if it is
+      // constant, as we may need to flush it to sign-preserving zero or
+      // canonicalize the NaN.
+      if (!IsOp0Undef && !IsOp1Undef)
+        break;
+      if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) {
+        if (Op->isNaN()) {
+          APInt NVCanonicalNaN(32, 0x7fffffff);
+          return ConstantFP::get(
+              Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
+        }
+        if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
+          return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF()));
+        else
+          return Op;
+      }
+      break;
     }
   }
 
@@ -2955,6 +3014,79 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
         return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
       case Intrinsic::maximum:
         return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
+
+      case Intrinsic::nvvm_fmax_d:
+      case Intrinsic::nvvm_fmax_f:
+      case Intrinsic::nvvm_fmax_ftz_f:
+      case Intrinsic::nvvm_fmax_ftz_nan_f:
+      case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+      case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+      case Intrinsic::nvvm_fmax_nan_f:
+      case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+      case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+      case Intrinsic::nvvm_fmin_d:
+      case Intrinsic::nvvm_fmin_f:
+      case Intrinsic::nvvm_fmin_ftz_f:
+      case Intrinsic::nvvm_fmin_ftz_nan_f:
+      case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+      case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+      case Intrinsic::nvvm_fmin_nan_f:
+      case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+      case Intrinsic::nvvm_fmin_xorsign_abs_f: {
+
+        bool ShouldCanonicalizeNaNs = IntrinsicID != Intrinsic::nvvm_fmax_d &&
+                                      IntrinsicID != Intrinsic::nvvm_fmin_d;
+        bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
+        bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
+        bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
+
+        APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
+        APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
+
+        bool XorSign = false;
+        if (IsXorSignAbs) {
+          XorSign = A.isNegative() ^ B.isNegative();
+          A = abs(A);
+          B = abs(B);
+        }
+
+        bool IsFMax = false;
+        switch (IntrinsicID) {
+        case Intrinsic::nvvm_fmax_d:
+        case Intrinsic::nvvm_fmax_f:
+        case Intrinsic::nvvm_fmax_ftz_f:
+        case Intrinsic::nvvm_fmax_ftz_nan_f:
+        case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+        case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+        case Intrinsic::nvvm_fmax_nan_f:
+        case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+        case Intrinsic::nvvm_fmax_xorsign_abs_f:
+          IsFMax = true;
+          break;
+        }
+        APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
+
+        if (ShouldCanonicalizeNaNs) {
+          APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
+          if (A.isNaN() && B.isNaN())
+            return ConstantFP::get(Ty, NVCanonicalNaN);
+          else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
+            return ConstantFP::get(Ty, NVCanonicalNaN);
+        }
+
+        if (A.isNaN() && B.isNaN())
+          return Operands[1];
+        else if (A.isNaN())
+          Res = B;
+        else if (B.isNaN())
+          Res = A;
+
+        if (IsXorSignAbs && XorSign != Res.isNegative())
+          Res.changeSign();
+
+        return ConstantFP::get(Ty->getContext(), Res);
+      }
       }
 
       if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
new file mode 100644
index 00000000000000..ab277483dbba5a
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
@@ -0,0 +1,614 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s
+
+; Check constant-folding for NVVM fmin fmax intrinsics
+
+;###############################################################
+;#                    FMax(1.25, -2.0)                         #
+;###############################################################
+
+define double @test_fmax_1_25_neg_2_d() {
+; CHECK-LABEL: define double @test_fmax_1_25_neg_2_d() {
+; CHECK-NEXT:    ret double 1.250000e+00
+;
+  %res = call double @llvm.nvvm.fmax.d(double 1.25, double -2.0)
+  ret double %res
+}
+
+define float @test_fmax_1_25_neg_2_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_f() {
+; CHECK-NEXT:    ret float 1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmax.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_f() {
+; CHECK-NEXT:    ret float 1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_f() {
+; CHECK-NEXT:    ret float 1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_nan_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_nan_f() {
+; CHECK-NEXT:    ret float 1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmax.nan.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+;###############################################################
+;#                   FMax(+Subnormal, NaN)                     #
+;###############################################################
+
+define double @test_fmax_pos_subnorm_nan_d() {
+; CHECK-LABEL: define double @test_fmax_pos_subnorm_nan_d() {
+; CHECK-NEXT:    ret double 0x380FFFFFC0000000
+;
+  %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0x7fff444400000000)
+  ret double %res
+}
+
+define float @test_fmax_pos_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_nan_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+;###############################################################
+;#                   FMax(subnorm, undef)                      #
+;###############################################################
+
+define double @test_fmax_subnorm_undef_d() {
+; CHECK-LABEL: define double @test_fmax_subnorm_undef_d() {
+; CHECK-NEXT:    ret double 0x380FFFFFC0000000
+;
+  %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double undef)
+  ret double %res
+}
+
+define float @test_fmax_subnorm_undef_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_nan_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_subnorm_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+;###############################################################
+;#                      FMax(NaN, undef)                       #
+;###############################################################
+; Ensure we canonicalize the NaNs for f32
+
+define double @test_fmax_nan_undef_d() {
+; CHECK-LABEL: define double @test_fmax_nan_undef_d() {
+; CHECK-NEXT:    ret double 0x7FF4444400000000
+;
+  %res = call double @llvm.nvvm.fmax.d(double 0x7ff4444400000000, double undef)
+  ret double %res
+}
+
+define float @test_fmax_nan_undef_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x7ffff4ff00000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_nan_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmax_nan_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+;###############################################################
+;#                    FMin(1.25, -2.0)                         #
+;###############################################################
+
+define double @test_fmin_1_25_neg_2_d() {
+; CHECK-LABEL: define double @test_fmin_1_25_neg_2_d() {
+; CHECK-NEXT:    ret double -2.000000e+00
+;
+  %res = call double @llvm.nvvm.fmin.d(double 1.25, double -2.0)
+  ret double %res
+}
+
+define float @test_fmin_1_25_neg_2_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_nan_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_nan_f() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.nan.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -1.250000e+00
+;
+  %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 1.25, float -2.0)
+  ret float %res
+}
+
+;###############################################################
+;#                   FMin(+Subnormal, NaN)                     #
+;###############################################################
+
+define double @test_fmin_pos_subnorm_nan_d() {
+; CHECK-LABEL: define double @test_fmin_pos_subnorm_nan_d() {
+; CHECK-NEXT:    ret double 0x380FFFFFC0000000
+;
+  %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double 0x7fff444400000000)
+  ret double %res
+}
+
+define float @test_fmin_pos_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_nan_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  ret float %res
+}
+
+;###############################################################
+;#                   FMin(subnorm, undef)                      #
+;###############################################################
+
+define double @test_fmin_subnorm_undef_d() {
+; CHECK-LABEL: define double @test_fmin_subnorm_undef_d() {
+; CHECK-NEXT:    ret double 0x380FFFFFC0000000
+;
+  %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double undef)
+  ret double %res
+}
+
+define float @test_fmin_subnorm_undef_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_nan_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_subnorm_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+  ret float %res
+}
+
+;###############################################################
+;#                      FMin(NaN, undef)                       #
+;###############################################################
+; Ensure we canonicalize the NaNs for f32
+
+define double @test_fmin_nan_undef_d() {
+; CHECK-LABEL: define double @test_fmin_nan_undef_d() {
+; CHECK-NEXT:    ret double 0x7FF4444400000000
+;
+  %res = call double @llvm.nvvm.fmin.d(double 0x7ff4444400000000, double undef)
+  ret double %res
+}
+
+define float @test_fmin_nan_undef_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x7ffff4ff00000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_nan_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}
+
+define float @test_fmin_nan_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
+;
+  %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x7fff444400000000, float undef)
+  ret float %res
+}

>From 31d624c0ecf9e10c43a7bb2723ec5adcee4a0132 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Wed, 8 Jan 2025 13:24:03 +0000
Subject: [PATCH 2/6] Make intrinsic helpers accept closed sets

Make all the helper functions in NVVMIntrinsicUtils.h explicitly
accept all valid intrinsics, and call llvm_unreachable for any
unexpected intrinsics.

Rename some f2i/d2i helpers to make it clearer their scope is for finite
groups of intrinsics, rather than all intrinsics.
---
 llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 122 +++++++++++++++++++++-
 llvm/lib/Analysis/ConstantFolding.cpp     |   5 +-
 2 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index d533f944f90ff2..7a76bcaaa30c7b 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -60,11 +60,53 @@ inline bool FloatToIntIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
   case Intrinsic::nvvm_f2ull_rp_ftz:
   case Intrinsic::nvvm_f2ull_rz_ftz:
     return true;
+
+  case Intrinsic::nvvm_f2i_rm:
+  case Intrinsic::nvvm_f2i_rn:
+  case Intrinsic::nvvm_f2i_rp:
+  case Intrinsic::nvvm_f2i_rz:
+
+  case Intrinsic::nvvm_f2ui_rm:
+  case Intrinsic::nvvm_f2ui_rn:
+  case Intrinsic::nvvm_f2ui_rp:
+  case Intrinsic::nvvm_f2ui_rz:
+
+  case Intrinsic::nvvm_d2i_rm:
+  case Intrinsic::nvvm_d2i_rn:
+  case Intrinsic::nvvm_d2i_rp:
+  case Intrinsic::nvvm_d2i_rz:
+
+  case Intrinsic::nvvm_d2ui_rm:
+  case Intrinsic::nvvm_d2ui_rn:
+  case Intrinsic::nvvm_d2ui_rp:
+  case Intrinsic::nvvm_d2ui_rz:
+
+  case Intrinsic::nvvm_f2ll_rm:
+  case Intrinsic::nvvm_f2ll_rn:
+  case Intrinsic::nvvm_f2ll_rp:
+  case Intrinsic::nvvm_f2ll_rz:
+
+  case Intrinsic::nvvm_f2ull_rm:
+  case Intrinsic::nvvm_f2ull_rn:
+  case Intrinsic::nvvm_f2ull_rp:
+  case Intrinsic::nvvm_f2ull_rz:
+
+  case Intrinsic::nvvm_d2ll_rm:
+  case Intrinsic::nvvm_d2ll_rn:
+  case Intrinsic::nvvm_d2ll_rp:
+  case Intrinsic::nvvm_d2ll_rz:
+
+  case Intrinsic::nvvm_d2ull_rm:
+  case Intrinsic::nvvm_d2ull_rn:
+  case Intrinsic::nvvm_d2ull_rp:
+  case Intrinsic::nvvm_d2ull_rz:
+    return false;
   }
+  llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
   return false;
 }
 
-inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
+inline bool FloatToIntIntrinsicConvertsToSignedInt(Intrinsic::ID IntrinsicID) {
   switch (IntrinsicID) {
   // f2i
   case Intrinsic::nvvm_f2i_rm:
@@ -95,12 +137,44 @@ inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
   case Intrinsic::nvvm_d2ll_rp:
   case Intrinsic::nvvm_d2ll_rz:
     return true;
+
+  // f2ui
+  case Intrinsic::nvvm_f2ui_rm:
+  case Intrinsic::nvvm_f2ui_rm_ftz:
+  case Intrinsic::nvvm_f2ui_rn:
+  case Intrinsic::nvvm_f2ui_rn_ftz:
+  case Intrinsic::nvvm_f2ui_rp:
+  case Intrinsic::nvvm_f2ui_rp_ftz:
+  case Intrinsic::nvvm_f2ui_rz:
+  case Intrinsic::nvvm_f2ui_rz_ftz:
+  // d2ui
+  case Intrinsic::nvvm_d2ui_rm:
+  case Intrinsic::nvvm_d2ui_rn:
+  case Intrinsic::nvvm_d2ui_rp:
+  case Intrinsic::nvvm_d2ui_rz:
+  // f2ull
+  case Intrinsic::nvvm_f2ull_rm:
+  case Intrinsic::nvvm_f2ull_rm_ftz:
+  case Intrinsic::nvvm_f2ull_rn:
+  case Intrinsic::nvvm_f2ull_rn_ftz:
+  case Intrinsic::nvvm_f2ull_rp:
+  case Intrinsic::nvvm_f2ull_rp_ftz:
+  case Intrinsic::nvvm_f2ull_rz:
+  case Intrinsic::nvvm_f2ull_rz_ftz:
+  // d2ull
+  case Intrinsic::nvvm_d2ull_rm:
+  case Intrinsic::nvvm_d2ull_rn:
+  case Intrinsic::nvvm_d2ull_rp:
+  case Intrinsic::nvvm_d2ull_rz:
+    return false;
   }
+  llvm_unreachable(
+      "Checking invalid f2i/d2i intrinsic for signed int conversion");
   return false;
 }
 
 inline APFloat::roundingMode
-IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
+GetFloatToIntRoundingMode(Intrinsic::ID IntrinsicID) {
   switch (IntrinsicID) {
   // RM:
   case Intrinsic::nvvm_f2i_rm:
@@ -166,7 +240,7 @@ IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
   case Intrinsic::nvvm_d2ull_rz:
     return APFloat::rmTowardZero;
   }
-  llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+  llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
   return APFloat::roundingMode::Invalid;
 }
 
@@ -182,7 +256,21 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
   case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
   case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
     return true;
+
+  case Intrinsic::nvvm_fmax_d:
+  case Intrinsic::nvvm_fmax_f:
+  case Intrinsic::nvvm_fmax_nan_f:
+  case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+  case Intrinsic::nvvm_fmin_d:
+  case Intrinsic::nvvm_fmin_f:
+  case Intrinsic::nvvm_fmin_nan_f:
+  case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_xorsign_abs_f:
+    return false;
   }
+  llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
   return false;
 }
 
@@ -198,7 +286,21 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
   case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
   case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
     return true;
+
+  case Intrinsic::nvvm_fmax_d:
+  case Intrinsic::nvvm_fmax_f:
+  case Intrinsic::nvvm_fmax_ftz_f:
+  case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+  case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+  case Intrinsic::nvvm_fmin_d:
+  case Intrinsic::nvvm_fmin_f:
+  case Intrinsic::nvvm_fmin_ftz_f:
+  case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+  case Intrinsic::nvvm_fmin_xorsign_abs_f:
+    return false;
   }
+  llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
   return false;
 }
 
@@ -214,7 +316,21 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
   case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
   case Intrinsic::nvvm_fmin_xorsign_abs_f:
     return true;
+
+  case Intrinsic::nvvm_fmax_d:
+  case Intrinsic::nvvm_fmax_f:
+  case Intrinsic::nvvm_fmax_ftz_f:
+  case Intrinsic::nvvm_fmax_ftz_nan_f:
+  case Intrinsic::nvvm_fmax_nan_f:
+
+  case Intrinsic::nvvm_fmin_d:
+  case Intrinsic::nvvm_fmin_f:
+  case Intrinsic::nvvm_fmin_ftz_f:
+  case Intrinsic::nvvm_fmin_ftz_nan_f:
+  case Intrinsic::nvvm_fmin_nan_f:
+    return false;
   }
+  llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
   return false;
 }
 
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 75150ed97aa7b4..e5079f292c048d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2453,9 +2453,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       if (U.isNaN())
         return ConstantInt::get(Ty, 0);
 
-      APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
+      APFloat::roundingMode RMode =
+          nvvm::GetFloatToIntRoundingMode(IntrinsicID);
       bool IsFTZ = nvvm::FloatToIntIntrinsicShouldFTZ(IntrinsicID);
-      bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
+      bool IsSigned = nvvm::FloatToIntIntrinsicConvertsToSignedInt(IntrinsicID);
 
       APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
       auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;

>From 8012a90231c44a56f93f097c82e69d78183a62fa Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Wed, 8 Jan 2025 14:31:52 +0000
Subject: [PATCH 3/6] Refactor if condition for readability

---
 llvm/lib/Analysis/ConstantFolding.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index e5079f292c048d..8a51801e2d3bae 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -3036,8 +3036,8 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
       case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
       case Intrinsic::nvvm_fmin_xorsign_abs_f: {
 
-        bool ShouldCanonicalizeNaNs = IntrinsicID != Intrinsic::nvvm_fmax_d &&
-                                      IntrinsicID != Intrinsic::nvvm_fmin_d;
+        bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
+                                        IntrinsicID == Intrinsic::nvvm_fmin_d);
         bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
         bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
         bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);

>From 20263eb490a50ffbde1c0ccbdef0018acae59c5a Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Thu, 9 Jan 2025 10:50:41 +0000
Subject: [PATCH 4/6] Rename FloatToInt intrinsics to FPToInteger

Try to avoid the connotation that FloatToInt was restricted to f2i
intrinsics, in the hopes that FPToInteger covers a borader range of
floating-point types (float + double), and both signed and unsigned
integers with both 32 and 64 bits (rather than just specifically
C-style floats and ints).
---
 llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 6 +++---
 llvm/lib/Analysis/ConstantFolding.cpp     | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 7a76bcaaa30c7b..ce794e25736373 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -38,7 +38,7 @@ enum class TMAReductionOp : uint8_t {
   XOR = 7,
 };
 
-inline bool FloatToIntIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
   switch (IntrinsicID) {
   case Intrinsic::nvvm_f2i_rm_ftz:
   case Intrinsic::nvvm_f2i_rn_ftz:
@@ -106,7 +106,7 @@ inline bool FloatToIntIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
   return false;
 }
 
-inline bool FloatToIntIntrinsicConvertsToSignedInt(Intrinsic::ID IntrinsicID) {
+inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
   switch (IntrinsicID) {
   // f2i
   case Intrinsic::nvvm_f2i_rm:
@@ -174,7 +174,7 @@ inline bool FloatToIntIntrinsicConvertsToSignedInt(Intrinsic::ID IntrinsicID) {
 }
 
 inline APFloat::roundingMode
-GetFloatToIntRoundingMode(Intrinsic::ID IntrinsicID) {
+GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
   switch (IntrinsicID) {
   // RM:
   case Intrinsic::nvvm_f2i_rm:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 8a51801e2d3bae..6589653c14d93e 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2454,9 +2454,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
         return ConstantInt::get(Ty, 0);
 
       APFloat::roundingMode RMode =
-          nvvm::GetFloatToIntRoundingMode(IntrinsicID);
-      bool IsFTZ = nvvm::FloatToIntIntrinsicShouldFTZ(IntrinsicID);
-      bool IsSigned = nvvm::FloatToIntIntrinsicConvertsToSignedInt(IntrinsicID);
+          nvvm::GetFPToIntegerRoundingMode(IntrinsicID);
+      bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
+      bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
 
       APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
       auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;

>From b778353a0e279d323525684a905aae82927812fa Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 10 Jan 2025 10:09:40 +0000
Subject: [PATCH 5/6] Add missing nan modifier to ftz.nan tests

The tests for fmax/fmin.ftz.nan were missing the .nan modifier,
so were erroneously testing fmax/fmin.ftz twice.

This patch adds the missing modifier, and updates the expected
values to NaN where the instruction should propagate nan inputs.
---
 .../InstSimplify/const-fold-nvvm-fmin-fmax.ll | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
index ab277483dbba5a..27cffeebb265b1 100644
--- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
@@ -35,7 +35,7 @@ define float @test_fmax_1_25_neg_2_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_f() {
 ; CHECK-NEXT:    ret float 1.250000e+00
 ;
-  %res = call float @llvm.nvvm.fmax.ftz.f(float 1.25, float -2.0)
+  %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0)
   ret float %res
 }
 
@@ -109,9 +109,9 @@ define float @test_fmax_pos_subnorm_nan_ftz_f() {
 
 define float @test_fmax_pos_subnorm_nan_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_f() {
-; CHECK-NEXT:    ret float 0.000000e+00
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
 ;
-  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
   ret float %res
 }
 
@@ -187,7 +187,7 @@ define float @test_fmax_subnorm_undef_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_f() {
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
-  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float undef)
+  %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float undef)
   ret float %res
 }
 
@@ -264,7 +264,7 @@ define float @test_fmax_nan_undef_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_f() {
 ; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
 ;
-  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x7fff444400000000, float undef)
+  %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x7fff444400000000, float undef)
   ret float %res
 }
 
@@ -340,7 +340,7 @@ define float @test_fmin_1_25_neg_2_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_f() {
 ; CHECK-NEXT:    ret float -2.000000e+00
 ;
-  %res = call float @llvm.nvvm.fmin.ftz.f(float 1.25, float -2.0)
+  %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 1.25, float -2.0)
   ret float %res
 }
 
@@ -414,9 +414,9 @@ define float @test_fmin_pos_subnorm_nan_ftz_f() {
 
 define float @test_fmin_pos_subnorm_nan_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_f() {
-; CHECK-NEXT:    ret float 0.000000e+00
+; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
 ;
-  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+  %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
   ret float %res
 }
 
@@ -492,7 +492,7 @@ define float @test_fmin_subnorm_undef_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_f() {
 ; CHECK-NEXT:    ret float 0.000000e+00
 ;
-  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float undef)
+  %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float undef)
   ret float %res
 }
 
@@ -569,7 +569,7 @@ define float @test_fmin_nan_undef_ftz_nan_f() {
 ; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_f() {
 ; CHECK-NEXT:    ret float 0x7FFFFFFFE0000000
 ;
-  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x7fff444400000000, float undef)
+  %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x7fff444400000000, float undef)
   ret float %res
 }
 

>From 5a02a34734db508c617be3b9525c4e62f7c0257a Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Mon, 13 Jan 2025 14:54:02 +0000
Subject: [PATCH 6/6] Add even more subnormal testing

---
 .../InstSimplify/const-fold-nvvm-fmin-fmax.ll | 156 +++++++++++++++++-
 1 file changed, 154 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
index 27cffeebb265b1..74499d87bf2518 100644
--- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
@@ -79,6 +79,82 @@ define float @test_fmax_1_25_neg_2_xorsign_abs_f() {
   ret float %res
 }
 
+;###############################################################
+;#                   FMax(+Subnormal, -Subnormal)              #
+;###############################################################
+
+define double @test_fmax_pos_subnorm_neg_subnorm_d() {
+; CHECK-LABEL: define double @test_fmax_pos_subnorm_neg_subnorm_d() {
+; CHECK-NEXT:    ret double 0x380FFFFFC0000000
+;
+  %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0xB80FFFFFC0000000)
+  ret double %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-NEXT:    ret float 0x380FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0xB80FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0xB80FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
 ;###############################################################
 ;#                   FMax(+Subnormal, NaN)                     #
 ;###############################################################
@@ -156,7 +232,7 @@ define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() {
 }
 
 ;###############################################################
-;#                   FMax(subnorm, undef)                      #
+;#                   FMax(+Subnormal, undef)                   #
 ;###############################################################
 
 define double @test_fmax_subnorm_undef_d() {
@@ -384,6 +460,82 @@ define float @test_fmin_1_25_neg_2_xorsign_abs_f() {
   ret float %res
 }
 
+;###############################################################
+;#                   FMin(+Subnormal, -Subnormal)              #
+;###############################################################
+
+define double @test_fmin_pos_subnorm_neg_subnorm_d() {
+; CHECK-LABEL: define double @test_fmin_pos_subnorm_neg_subnorm_d() {
+; CHECK-NEXT:    ret double 0xB80FFFFFC0000000
+;
+  %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double 0xB80FFFFFC0000000)
+  ret double %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_f() {
+; CHECK-NEXT:    ret float 0xB80FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-NEXT:    ret float 0xB80FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0xB80FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-NEXT:    ret float 0xB80FFFFFC0000000
+;
+  %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+  ret float %res
+}
+
 ;###############################################################
 ;#                   FMin(+Subnormal, NaN)                     #
 ;###############################################################
@@ -461,7 +613,7 @@ define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() {
 }
 
 ;###############################################################
-;#                   FMin(subnorm, undef)                      #
+;#                   FMin(+Subnormal, undef)                   #
 ;###############################################################
 
 define double @test_fmin_subnorm_undef_d() {



More information about the llvm-commits mailing list