[llvm] cea9244 - [NVPTX] Constant fold NVVM fmin and fmax (#121966)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 06:38:54 PST 2025
Author: Lewis Crawford
Date: 2025-01-16T14:38:51Z
New Revision: cea92446ac289dc013e6253cb84445981010d08a
URL: https://github.com/llvm/llvm-project/commit/cea92446ac289dc013e6253cb84445981010d08a
DIFF: https://github.com/llvm/llvm-project/commit/cea92446ac289dc013e6253cb84445981010d08a.diff
LOG: [NVPTX] Constant fold NVVM fmin and fmax (#121966)
Add constant-folding for nvvm float/double fmin + fmax intrinsics,
including all combinations of xorsign.abs, nan-propagation, and ftz.
Added:
llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
Modified:
llvm/include/llvm/IR/NVVMIntrinsicUtils.h
llvm/lib/Analysis/ConstantFolding.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 8ca073ba822534..ce794e25736373 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -38,9 +38,8 @@ enum class TMAReductionOp : uint8_t {
XOR = 7,
};
-inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
- // Float to i32 / i64 conversion intrinsics:
case Intrinsic::nvvm_f2i_rm_ftz:
case Intrinsic::nvvm_f2i_rn_ftz:
case Intrinsic::nvvm_f2i_rp_ftz:
@@ -61,11 +60,53 @@ inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
case Intrinsic::nvvm_f2ull_rp_ftz:
case Intrinsic::nvvm_f2ull_rz_ftz:
return true;
+
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz:
+ return false;
}
+ llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
return false;
}
-inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
+inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// f2i
case Intrinsic::nvvm_f2i_rm:
@@ -96,12 +137,44 @@ inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
case Intrinsic::nvvm_d2ll_rp:
case Intrinsic::nvvm_d2ll_rz:
return true;
+
+ // f2ui
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ // d2ui
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+ // f2ull
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ // d2ull
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz:
+ return false;
}
+ llvm_unreachable(
+ "Checking invalid f2i/d2i intrinsic for signed int conversion");
return false;
}
inline APFloat::roundingMode
-IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
+GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// RM:
case Intrinsic::nvvm_f2i_rm:
@@ -167,10 +240,100 @@ IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
case Intrinsic::nvvm_d2ull_rz:
return APFloat::rmTowardZero;
}
- llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+ llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
return APFloat::roundingMode::Invalid;
}
+inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ return true;
+
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
+ return false;
+}
+
+inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ return true;
+
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+ return false;
+ }
+ llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
+ return false;
+}
+
+inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+ return true;
+
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ return false;
+ }
+ llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
+ return false;
+}
+
} // namespace nvvm
} // namespace llvm
#endif // LLVM_IR_NVVMINTRINSICUTILS_H
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index ecdc841a38d112..3e87ea0e90fd58 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1689,6 +1689,28 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();
+ // NVVM FMax intrinsics
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ // NVVM FMin intrinsics
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+
// NVVM float/double to int32/uint32 conversion intrinsics
case Intrinsic::nvvm_f2i_rm:
case Intrinsic::nvvm_f2i_rn:
@@ -2431,9 +2453,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (U.isNaN())
return ConstantInt::get(Ty, 0);
- APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
- bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID);
- bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
+ APFloat::roundingMode RMode =
+ nvvm::GetFPToIntegerRoundingMode(IntrinsicID);
+ bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
+ bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
@@ -2892,12 +2915,49 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
case Intrinsic::minnum:
case Intrinsic::maximum:
case Intrinsic::minimum:
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmin_d:
// If one argument is undef, return the other argument.
if (IsOp0Undef)
return Operands[1];
if (IsOp1Undef)
return Operands[0];
break;
+
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+ // If one arg is undef, the other arg can be returned only if it is
+ // constant, as we may need to flush it to sign-preserving zero or
+ // canonicalize the NaN.
+ if (!IsOp0Undef && !IsOp1Undef)
+ break;
+ if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) {
+ if (Op->isNaN()) {
+ APInt NVCanonicalNaN(32, 0x7fffffff);
+ return ConstantFP::get(
+ Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
+ }
+ if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
+ return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF()));
+ else
+ return Op;
+ }
+ break;
}
}
@@ -2955,6 +3015,79 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
case Intrinsic::maximum:
return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
+
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f: {
+
+ bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
+ IntrinsicID == Intrinsic::nvvm_fmin_d);
+ bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
+ bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
+ bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
+
+ APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
+ APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
+
+ bool XorSign = false;
+ if (IsXorSignAbs) {
+ XorSign = A.isNegative() ^ B.isNegative();
+ A = abs(A);
+ B = abs(B);
+ }
+
+ bool IsFMax = false;
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+ IsFMax = true;
+ break;
+ }
+ APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
+
+ if (ShouldCanonicalizeNaNs) {
+ APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
+ if (A.isNaN() && B.isNaN())
+ return ConstantFP::get(Ty, NVCanonicalNaN);
+ else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
+ return ConstantFP::get(Ty, NVCanonicalNaN);
+ }
+
+ if (A.isNaN() && B.isNaN())
+ return Operands[1];
+ else if (A.isNaN())
+ Res = B;
+ else if (B.isNaN())
+ Res = A;
+
+ if (IsXorSignAbs && XorSign != Res.isNegative())
+ Res.changeSign();
+
+ return ConstantFP::get(Ty->getContext(), Res);
+ }
}
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
new file mode 100644
index 00000000000000..4ab6b3cf295bfe
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll
@@ -0,0 +1,918 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s
+
+; Check constant-folding for NVVM fmin fmax intrinsics
+
+;###############################################################
+;# FMax(1.25, -2.0) #
+;###############################################################
+
+define double @test_fmax_1_25_neg_2_d() {
+; CHECK-LABEL: define double @test_fmax_1_25_neg_2_d() {
+; CHECK-NEXT: ret double 1.250000e+00
+;
+ %res = call double @llvm.nvvm.fmax.d(double 1.25, double -2.0)
+ ret double %res
+}
+
+define float @test_fmax_1_25_neg_2_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_f() {
+; CHECK-NEXT: ret float 1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmax.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_f() {
+; CHECK-NEXT: ret float 1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_f() {
+; CHECK-NEXT: ret float 1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_nan_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_nan_f() {
+; CHECK-NEXT: ret float 1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmax.nan.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmax_1_25_neg_2_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_1_25_neg_2_xorsign_abs_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+;###############################################################
+;# FMax(+Subnormal, 0.0) #
+;###############################################################
+
+define double @test_fmax_pos_subnorm_zero_d() {
+; CHECK-LABEL: define double @test_fmax_pos_subnorm_zero_d() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0.0)
+ ret double %res
+}
+
+define float @test_fmax_pos_subnorm_zero_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_nan_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_nan_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_zero_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+;###############################################################
+;# FMax(+Subnormal, -Subnormal) #
+;###############################################################
+
+define double @test_fmax_pos_subnorm_neg_subnorm_d() {
+; CHECK-LABEL: define double @test_fmax_pos_subnorm_neg_subnorm_d() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# FMax(+Subnormal, NaN) #
+;###############################################################
+
+define double @test_fmax_pos_subnorm_nan_d() {
+; CHECK-LABEL: define double @test_fmax_pos_subnorm_nan_d() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0x7fff444400000000)
+ ret double %res
+}
+
+define float @test_fmax_pos_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_nan_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+;###############################################################
+;# FMax(+Subnormal, undef) #
+;###############################################################
+
+define double @test_fmax_subnorm_undef_d() {
+; CHECK-LABEL: define double @test_fmax_subnorm_undef_d() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double undef)
+ ret double %res
+}
+
+define float @test_fmax_subnorm_undef_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_nan_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_subnorm_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_subnorm_undef_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+;###############################################################
+;# FMax(NaN, undef) #
+;###############################################################
+; Ensure we canonicalize the NaNs for f32
+
+define double @test_fmax_nan_undef_d() {
+; CHECK-LABEL: define double @test_fmax_nan_undef_d() {
+; CHECK-NEXT: ret double 0x7FF4444400000000
+;
+ %res = call double @llvm.nvvm.fmax.d(double 0x7ff4444400000000, double undef)
+ ret double %res
+}
+
+define float @test_fmax_nan_undef_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.ftz.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x7ffff4ff00000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmax_nan_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmax_nan_undef_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+;###############################################################
+;# FMin(1.25, -2.0) #
+;###############################################################
+
+define double @test_fmin_1_25_neg_2_d() {
+; CHECK-LABEL: define double @test_fmin_1_25_neg_2_d() {
+; CHECK-NEXT: ret double -2.000000e+00
+;
+ %res = call double @llvm.nvvm.fmin.d(double 1.25, double -2.0)
+ ret double %res
+}
+
+define float @test_fmin_1_25_neg_2_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float -1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_nan_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_nan_f() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.nan.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+define float @test_fmin_1_25_neg_2_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_1_25_neg_2_xorsign_abs_f() {
+; CHECK-NEXT: ret float -1.250000e+00
+;
+ %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 1.25, float -2.0)
+ ret float %res
+}
+
+;###############################################################
+;# FMin(-Subnormal, 0.0) #
+;###############################################################
+
+define double @test_fmin_neg_subnorm_zero_d() {
+; CHECK-LABEL: define double @test_fmin_neg_subnorm_zero_d() {
+; CHECK-NEXT: ret double 0xB80FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmin.d(double 0xB80FFFFFC0000000, double 0.0)
+ ret double %res
+}
+
+define float @test_fmin_neg_subnorm_zero_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_nan_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_nan_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_nan_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+define float @test_fmin_neg_subnorm_zero_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0)
+ ret float %res
+}
+
+;###############################################################
+;# FMin(+Subnormal, -Subnormal) #
+;###############################################################
+
+define double @test_fmin_pos_subnorm_neg_subnorm_d() {
+; CHECK-LABEL: define double @test_fmin_pos_subnorm_neg_subnorm_d() {
+; CHECK-NEXT: ret double 0xB80FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_nan_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0xB80FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# FMin(+Subnormal, NaN) #
+;###############################################################
+
+define double @test_fmin_pos_subnorm_nan_d() {
+; CHECK-LABEL: define double @test_fmin_pos_subnorm_nan_d() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double 0x7fff444400000000)
+ ret double %res
+}
+
+define float @test_fmin_pos_subnorm_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_nan_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000)
+ ret float %res
+}
+
+;###############################################################
+;# FMin(+Subnormal, undef) #
+;###############################################################
+
+define double @test_fmin_subnorm_undef_d() {
+; CHECK-LABEL: define double @test_fmin_subnorm_undef_d() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double undef)
+ ret double %res
+}
+
+define float @test_fmin_subnorm_undef_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_nan_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_subnorm_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_subnorm_undef_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float undef)
+ ret float %res
+}
+
+;###############################################################
+;# FMin(NaN, undef) #
+;###############################################################
+; Ensure we canonicalize the NaNs for f32
+
+define double @test_fmin_nan_undef_d() {
+; CHECK-LABEL: define double @test_fmin_nan_undef_d() {
+; CHECK-NEXT: ret double 0x7FF4444400000000
+;
+ %res = call double @llvm.nvvm.fmin.d(double 0x7ff4444400000000, double undef)
+ ret double %res
+}
+
+define float @test_fmin_nan_undef_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.ftz.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_nan_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x7ffff4ff00000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_nan_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_nan_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_nan_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_nan_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
+
+define float @test_fmin_nan_undef_xorsign_abs_f() {
+; CHECK-LABEL: define float @test_fmin_nan_undef_xorsign_abs_f() {
+; CHECK-NEXT: ret float 0x7FFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x7fff444400000000, float undef)
+ ret float %res
+}
More information about the llvm-commits
mailing list