[llvm] fd8ae2c - Add constant-folding for unary NVVM intrinsics (#141233)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 03:32:14 PDT 2025
Author: Lewis Crawford
Date: 2025-07-21T11:32:09+01:00
New Revision: fd8ae2cb76bd0a721eea12767c1a630d465b8495
URL: https://github.com/llvm/llvm-project/commit/fd8ae2cb76bd0a721eea12767c1a630d465b8495
DIFF: https://github.com/llvm/llvm-project/commit/fd8ae2cb76bd0a721eea12767c1a630d465b8495.diff
LOG: Add constant-folding for unary NVVM intrinsics (#141233)
Add support for constant-folding numerous NVVM unary arithmetic
intrinsics (including f, d, and ftz_f variants):
- nvvm.ceil.*
- nvvm.fabs.*
- nvvm.floor.*
- nvvm.rcp.*
- nvvm.round.*
- nvvm.saturate.*
- nvvm.sqrt.f
- nvvm.sqrt.rn.*
Added:
llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
Modified:
llvm/include/llvm/IR/NVVMIntrinsicUtils.h
llvm/lib/Analysis/ConstantFolding.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 737610b73b081..0fd5de3b8ea42 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic");
- return false;
}
inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
@@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
}
llvm_unreachable(
"Checking invalid f2i/d2i intrinsic for signed int conversion");
- return false;
}
inline APFloat::roundingMode
@@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
return APFloat::rmTowardZero;
}
llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic");
- return APFloat::roundingMode::Invalid;
}
inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
@@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic");
- return false;
}
inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
@@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic");
- return false;
}
inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
@@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) {
return false;
}
llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic");
- return false;
+}
+
+inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_ceil_ftz_f:
+ case Intrinsic::nvvm_fabs_ftz:
+ case Intrinsic::nvvm_floor_ftz_f:
+ case Intrinsic::nvvm_round_ftz_f:
+ case Intrinsic::nvvm_saturate_ftz_f:
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ return true;
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_d:
+ case Intrinsic::nvvm_fabs:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_d:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_d:
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f:
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid unary intrinsic");
+}
+
+inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ return true;
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f:
+ return false;
+ }
+ llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic");
+}
+
+inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ return APFloat::rmTowardNegative;
+
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ return APFloat::rmNearestTiesToEven;
+
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ return APFloat::rmTowardPositive;
+
+ case Intrinsic::nvvm_rcp_rz_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ return APFloat::rmTowardZero;
+ }
+ llvm_unreachable("Checking rounding mode for invalid rcp intrinsic");
+}
+
+inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) {
+ if (ShouldFTZ)
+ return DenormalMode::getPreserveSign();
+ return DenormalMode::getIEEE();
}
} // namespace nvvm
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 9c1c2c6e60f02..f5a88b6e0368e 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1801,6 +1801,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
+
+ // NVVM math intrinsics:
+ case Intrinsic::nvvm_ceil_d:
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_ftz_f:
+
+ case Intrinsic::nvvm_fabs:
+ case Intrinsic::nvvm_fabs_ftz:
+
+ case Intrinsic::nvvm_floor_d:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_ftz_f:
+
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+
+ case Intrinsic::nvvm_round_d:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_ftz_f:
+
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f:
+ case Intrinsic::nvvm_saturate_ftz_f:
+
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
return !Call->isStrictFP();
// Sign operations are actually bitwise operations, they do not raise
@@ -1818,6 +1856,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nearbyint:
case Intrinsic::rint:
case Intrinsic::canonicalize:
+
// Constrained intrinsics can be folded if FP environment is known
// to compiler.
case Intrinsic::experimental_constrained_fma:
@@ -1965,22 +2004,56 @@ inline bool llvm_fenv_testexcept() {
return false;
}
-static APFloat FTZPreserveSign(const APFloat &V) {
+static const APFloat FTZPreserveSign(const APFloat &V) {
if (V.isDenormal())
return APFloat::getZero(V.getSemantics(), V.isNegative());
return V;
}
-Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
- Type *Ty) {
+static const APFloat FlushToPositiveZero(const APFloat &V) {
+ if (V.isDenormal())
+ return APFloat::getZero(V.getSemantics(), false);
+ return V;
+}
+
+static const APFloat
+FlushWithDenormKind(const APFloat &V,
+ DenormalMode::DenormalModeKind DenormKind) {
+ assert(DenormKind != DenormalMode::DenormalModeKind::Invalid &&
+ DenormKind != DenormalMode::DenormalModeKind::Dynamic);
+ switch (DenormKind) {
+ case DenormalMode::DenormalModeKind::IEEE:
+ return V;
+ case DenormalMode::DenormalModeKind::PreserveSign:
+ return FTZPreserveSign(V);
+ case DenormalMode::DenormalModeKind::PositiveZero:
+ return FlushToPositiveZero(V);
+ default:
+ llvm_unreachable("Invalid denormal mode!");
+ }
+}
+
+Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
+ DenormalMode DenormMode = DenormalMode::getIEEE()) {
+ if (!DenormMode.isValid() ||
+ DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
+ DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
+ return nullptr;
+
llvm_fenv_clearexcept();
- double Result = NativeFP(V.convertToDouble());
+ auto Input = FlushWithDenormKind(V, DenormMode.Input);
+ double Result = NativeFP(Input.convertToDouble());
if (llvm_fenv_testexcept()) {
llvm_fenv_clearexcept();
return nullptr;
}
- return GetConstantFoldFPValue(Result, Ty);
+ Constant *Output = GetConstantFoldFPValue(Result, Ty);
+ if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
+ return Output;
+ const auto *CFP = static_cast<ConstantFP *>(Output);
+ const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output);
+ return ConstantFP::get(Ty->getContext(), Res);
}
#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
@@ -2550,6 +2623,91 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFoldFP(atan, APF, Ty);
case Intrinsic::sqrt:
return ConstantFoldFP(sqrt, APF, Ty);
+
+ // NVVM Intrinsics:
+ case Intrinsic::nvvm_ceil_ftz_f:
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_d:
+ return ConstantFoldFP(
+ ceil, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_fabs_ftz:
+ case Intrinsic::nvvm_fabs:
+ return ConstantFoldFP(
+ fabs, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_floor_ftz_f:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_d:
+ return ConstantFoldFP(
+ floor, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f: {
+ APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
+ bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
+
+ auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
+ APFloat Res = APFloat::getOne(APF.getSemantics());
+ APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
+
+ if (Status == APFloat::opOK || Status == APFloat::opInexact) {
+ if (IsFTZ)
+ Res = FTZPreserveSign(Res);
+ return ConstantFP::get(Ty->getContext(), Res);
+ }
+ return nullptr;
+ }
+
+ case Intrinsic::nvvm_round_ftz_f:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_d:
+ return ConstantFoldFP(
+ round, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_saturate_ftz_f:
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f: {
+ bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
+ auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
+ if (V.isNegative() || V.isZero() || V.isNaN())
+ return ConstantFP::getZero(Ty);
+ APFloat One = APFloat::getOne(APF.getSemantics());
+ if (V > One)
+ return ConstantFP::get(Ty->getContext(), One);
+ return ConstantFP::get(Ty->getContext(), APF);
+ }
+
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ if (APF.isNegative())
+ return nullptr;
+ return ConstantFoldFP(
+ sqrt, APF, Ty,
+ nvvm::GetNVVMDenromMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ // AMDGCN Intrinsics:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_sin: {
double V = getValueAsDouble(Op);
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
new file mode 100644
index 0000000000000..75b850978b75a
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
@@ -0,0 +1,646 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s
+
+; Test constant-folding for various NVVM unary arithmetic intrinsics.
+
+;###############################################################
+;# Ceil #
+;###############################################################
+
+define double @test_ceil_d_1_25() {
+; CHECK-LABEL: define double @test_ceil_d_1_25() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.ceil.d(double 1.25)
+ ret double %res
+}
+
+define float @test_ceil_f_1_25() {
+; CHECK-LABEL: define float @test_ceil_f_1_25() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.ceil.f(float 1.25)
+ ret float %res
+}
+
+define float @test_ceil_ftz_f_1_25() {
+; CHECK-LABEL: define float @test_ceil_ftz_f_1_25() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.ceil.ftz.f(float 1.25)
+ ret float %res
+}
+
+define double @test_ceil_d_pos_subnorm() {
+; CHECK-LABEL: define double @test_ceil_d_pos_subnorm() {
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %res = call double @llvm.nvvm.ceil.d(double 0x380FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_ceil_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_ceil_f_pos_subnorm() {
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %res = call float @llvm.nvvm.ceil.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_ceil_ftz_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_ceil_ftz_f_pos_subnorm() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.ceil.ftz.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# FAbs #
+;###############################################################
+
+define float @test_fabs_neg_1_5() {
+; CHECK-LABEL: define float @test_fabs_neg_1_5() {
+; CHECK-NEXT: ret float 1.500000e+00
+;
+ %res = call float @llvm.nvvm.fabs(float -1.5)
+ ret float %res
+}
+
+define float @test_fabs_ftz_neg_1_5() {
+; CHECK-LABEL: define float @test_fabs_ftz_neg_1_5() {
+; CHECK-NEXT: ret float 1.500000e+00
+;
+ %res = call float @llvm.nvvm.fabs.ftz(float -1.5)
+ ret float %res
+}
+
+define float @test_fabs_1_25() {
+; CHECK-LABEL: define float @test_fabs_1_25() {
+; CHECK-NEXT: ret float 1.250000e+00
+;
+ %res = call float @llvm.nvvm.fabs(float 1.25)
+ ret float %res
+}
+
+define float @test_fabs_ftz_1_25() {
+; CHECK-LABEL: define float @test_fabs_ftz_1_25() {
+; CHECK-NEXT: ret float 1.250000e+00
+;
+ %res = call float @llvm.nvvm.fabs.ftz(float 1.25)
+ ret float %res
+}
+
+define float @test_fabs_neg_subnorm() {
+; CHECK-LABEL: define float @test_fabs_neg_subnorm() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fabs(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fabs_ftz_neg_subnorm() {
+; CHECK-LABEL: define float @test_fabs_ftz_neg_subnorm() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fabs.ftz(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fabs_pos_subnorm() {
+; CHECK-LABEL: define float @test_fabs_pos_subnorm() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.fabs(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_fabs_ftz_pos_subnorm() {
+; CHECK-LABEL: define float @test_fabs_ftz_pos_subnorm() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.fabs.ftz(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+
+;###############################################################
+;# Floor #
+;###############################################################
+
+define double @test_floor_d_1_25() {
+; CHECK-LABEL: define double @test_floor_d_1_25() {
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %res = call double @llvm.nvvm.floor.d(double 1.25)
+ ret double %res
+}
+
+define float @test_floor_f_1_25() {
+; CHECK-LABEL: define float @test_floor_f_1_25() {
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %res = call float @llvm.nvvm.floor.f(float 1.25)
+ ret float %res
+}
+
+define float @test_floor_ftz_f_1_25() {
+; CHECK-LABEL: define float @test_floor_ftz_f_1_25() {
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %res = call float @llvm.nvvm.floor.ftz.f(float 1.25)
+ ret float %res
+}
+
+define double @test_floor_d_neg_subnorm() {
+; CHECK-LABEL: define double @test_floor_d_neg_subnorm() {
+; CHECK-NEXT: ret double -1.000000e+00
+;
+ %res = call double @llvm.nvvm.floor.d(double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_floor_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_floor_f_neg_subnorm() {
+; CHECK-NEXT: ret float -1.000000e+00
+;
+ %res = call float @llvm.nvvm.floor.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_floor_ftz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_floor_ftz_f_neg_subnorm() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.floor.ftz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# Rcp #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| rcp_rm |
+;+-------------------------------------------------------------+
+define double @test_rcp_rm_d_0_5() {
+; CHECK-LABEL: define double @test_rcp_rm_d_0_5() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.rcp.rm.d(double 0.5)
+ ret double %res
+}
+
+define float @test_rcp_rm_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rm_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rm.f(float 0.5)
+ ret float %res
+}
+
+define float @test_rcp_rm_ftz_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rm_ftz_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rm.ftz.f(float 0.5)
+ ret float %res
+}
+
+define double @test_rcp_rm_d_neg_subnorm() {
+; CHECK-LABEL: define double @test_rcp_rm_d_neg_subnorm() {
+; CHECK-NEXT: ret double 0xC7D0000020000041
+;
+ %res = call double @llvm.nvvm.rcp.rm.d(double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_rcp_rm_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rm_f_neg_subnorm() {
+; CHECK-NEXT: ret float 0xC7D0000040000000
+;
+ %res = call float @llvm.nvvm.rcp.rm.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_rcp_rm_ftz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rm_ftz_f_neg_subnorm() {
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rm.ftz.f(float 0xB80FFFFFC0000000)
+; CHECK-NEXT: ret float [[RES]]
+;
+ %res = call float @llvm.nvvm.rcp.rm.ftz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;+-------------------------------------------------------------+
+;| rcp_rn |
+;+-------------------------------------------------------------+
+define double @test_rcp_rn_d_0_5() {
+; CHECK-LABEL: define double @test_rcp_rn_d_0_5() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.rcp.rn.d(double 0.5)
+ ret double %res
+}
+
+define float @test_rcp_rn_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rn_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rn.f(float 0.5)
+ ret float %res
+}
+
+define float @test_rcp_rn_ftz_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rn_ftz_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rn.ftz.f(float 0.5)
+ ret float %res
+}
+
+define double @test_rcp_rn_d_neg_subnorm() {
+; CHECK-LABEL: define double @test_rcp_rn_d_neg_subnorm() {
+; CHECK-NEXT: ret double 0xC7D0000020000040
+;
+ %res = call double @llvm.nvvm.rcp.rn.d(double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_rcp_rn_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rn_f_neg_subnorm() {
+; CHECK-NEXT: ret float 0xC7D0000020000000
+;
+ %res = call float @llvm.nvvm.rcp.rn.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_rcp_rn_ftz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rn_ftz_f_neg_subnorm() {
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rn.ftz.f(float 0xB80FFFFFC0000000)
+; CHECK-NEXT: ret float [[RES]]
+;
+ %res = call float @llvm.nvvm.rcp.rn.ftz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;+-------------------------------------------------------------+
+;| rcp_rp |
+;+-------------------------------------------------------------+
+define double @test_rcp_rp_d_0_5() {
+; CHECK-LABEL: define double @test_rcp_rp_d_0_5() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.rcp.rp.d(double 0.5)
+ ret double %res
+}
+
+define float @test_rcp_rp_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rp_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rp.f(float 0.5)
+ ret float %res
+}
+
+define float @test_rcp_rp_ftz_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rp_ftz_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rp.ftz.f(float 0.5)
+ ret float %res
+}
+
+define double @test_rcp_rp_d_neg_subnorm() {
+; CHECK-LABEL: define double @test_rcp_rp_d_neg_subnorm() {
+; CHECK-NEXT: ret double 0xC7D0000020000040
+;
+ %res = call double @llvm.nvvm.rcp.rp.d(double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_rcp_rp_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rp_f_neg_subnorm() {
+; CHECK-NEXT: ret float 0xC7D0000020000000
+;
+ %res = call float @llvm.nvvm.rcp.rp.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_rcp_rp_ftz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rp_ftz_f_neg_subnorm() {
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rp.ftz.f(float 0xB80FFFFFC0000000)
+; CHECK-NEXT: ret float [[RES]]
+;
+ %res = call float @llvm.nvvm.rcp.rp.ftz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;+-------------------------------------------------------------+
+;| rcp_rz |
+;+-------------------------------------------------------------+
+define double @test_rcp_rz_d_0_5() {
+; CHECK-LABEL: define double @test_rcp_rz_d_0_5() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.rcp.rz.d(double 0.5)
+ ret double %res
+}
+
+define float @test_rcp_rz_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rz_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rz.f(float 0.5)
+ ret float %res
+}
+
+define float @test_rcp_rz_ftz_f_0_5() {
+; CHECK-LABEL: define float @test_rcp_rz_ftz_f_0_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.rcp.rz.ftz.f(float 0.5)
+ ret float %res
+}
+
+define double @test_rcp_rz_d_neg_subnorm() {
+; CHECK-LABEL: define double @test_rcp_rz_d_neg_subnorm() {
+; CHECK-NEXT: ret double 0xC7D0000020000040
+;
+ %res = call double @llvm.nvvm.rcp.rz.d(double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_rcp_rz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rz_f_neg_subnorm() {
+; CHECK-NEXT: ret float 0xC7D0000020000000
+;
+ %res = call float @llvm.nvvm.rcp.rz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_rcp_rz_ftz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_rcp_rz_ftz_f_neg_subnorm() {
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rz.ftz.f(float 0xB80FFFFFC0000000)
+; CHECK-NEXT: ret float [[RES]]
+;
+ %res = call float @llvm.nvvm.rcp.rz.ftz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# Round #
+;###############################################################
+
+define double @test_round_d_neg_1_5() {
+; CHECK-LABEL: define double @test_round_d_neg_1_5() {
+; CHECK-NEXT: ret double -2.000000e+00
+;
+ %res = call double @llvm.nvvm.round.d(double -1.5)
+ ret double %res
+}
+
+define float @test_round_f_neg_1_5() {
+; CHECK-LABEL: define float @test_round_f_neg_1_5() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.f(float -1.5)
+ ret float %res
+}
+
+define float @test_round_ftz_f_neg_1_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_neg_1_5() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.ftz.f(float -1.5)
+ ret float %res
+}
+
+define double @test_round_d_neg_subnorm() {
+; CHECK-LABEL: define double @test_round_d_neg_subnorm() {
+; CHECK-NEXT: ret double -0.000000e+00
+;
+ %res = call double @llvm.nvvm.round.d(double 0xB80FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_round_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_round_f_neg_subnorm() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.round.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_round_ftz_f_neg_subnorm() {
+; CHECK-LABEL: define float @test_round_ftz_f_neg_subnorm() {
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %res = call float @llvm.nvvm.round.ftz.f(float 0xB80FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# Saturate #
+;###############################################################
+
+define double @test_saturate_d_1_25() {
+; CHECK-LABEL: define double @test_saturate_d_1_25() {
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %res = call double @llvm.nvvm.saturate.d(double 1.25)
+ ret double %res
+}
+
+define float @test_saturate_f_1_25() {
+; CHECK-LABEL: define float @test_saturate_f_1_25() {
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %res = call float @llvm.nvvm.saturate.f(float 1.25)
+ ret float %res
+}
+
+define float @test_saturate_ftz_f_1_25() {
+; CHECK-LABEL: define float @test_saturate_ftz_f_1_25() {
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %res = call float @llvm.nvvm.saturate.ftz.f(float 1.25)
+ ret float %res
+}
+
+define double @test_saturate_d_neg_1_25() {
+; CHECK-LABEL: define double @test_saturate_d_neg_1_25() {
+; CHECK-NEXT: ret double 0.000000e+00
+;
+ %res = call double @llvm.nvvm.saturate.d(double -1.25)
+ ret double %res
+}
+
+define float @test_saturate_f_neg_1_25() {
+; CHECK-LABEL: define float @test_saturate_f_neg_1_25() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.saturate.f(float -1.25)
+ ret float %res
+}
+
+define float @test_saturate_ftz_f_neg_1_25() {
+; CHECK-LABEL: define float @test_saturate_ftz_f_neg_1_25() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.saturate.ftz.f(float -1.25)
+ ret float %res
+}
+
+define double @test_saturate_d_0_5() {
+; CHECK-LABEL: define double @test_saturate_d_0_5() {
+; CHECK-NEXT: ret double 5.000000e-01
+;
+ %res = call double @llvm.nvvm.saturate.d(double 0.5)
+ ret double %res
+}
+
+define float @test_saturate_f_0_5() {
+; CHECK-LABEL: define float @test_saturate_f_0_5() {
+; CHECK-NEXT: ret float 5.000000e-01
+;
+ %res = call float @llvm.nvvm.saturate.f(float 0.5)
+ ret float %res
+}
+
+define float @test_saturate_ftz_f_0_5() {
+; CHECK-LABEL: define float @test_saturate_ftz_f_0_5() {
+; CHECK-NEXT: ret float 5.000000e-01
+;
+ %res = call float @llvm.nvvm.saturate.ftz.f(float 0.5)
+ ret float %res
+}
+
+define double @test_saturate_d_pos_subnorm() {
+; CHECK-LABEL: define double @test_saturate_d_pos_subnorm() {
+; CHECK-NEXT: ret double 0x380FFFFFC0000000
+;
+ %res = call double @llvm.nvvm.saturate.d(double 0x380FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_saturate_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_saturate_f_pos_subnorm() {
+; CHECK-NEXT: ret float 0x380FFFFFC0000000
+;
+ %res = call float @llvm.nvvm.saturate.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_saturate_ftz_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_saturate_ftz_f_pos_subnorm() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.saturate.ftz.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+;###############################################################
+;# Sqrt #
+;###############################################################
+
+define float @test_sqrt_f_4() {
+; CHECK-LABEL: define float @test_sqrt_f_4() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.sqrt.f(float 4.0)
+ ret float %res
+}
+
+define float @test_sqrt_rn_f_4() {
+; CHECK-LABEL: define float @test_sqrt_rn_f_4() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.sqrt.rn.f(float 4.0)
+ ret float %res
+}
+
+define double @test_sqrt_rn_d_4() {
+; CHECK-LABEL: define double @test_sqrt_rn_d_4() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.sqrt.rn.d(double 4.0)
+ ret double %res
+}
+
+define float @test_sqrt_rn_ftz_f_4() {
+; CHECK-LABEL: define float @test_sqrt_rn_ftz_f_4() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.sqrt.rn.ftz.f(float 4.0)
+ ret float %res
+}
+
+define float @test_sqrt_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_sqrt_f_pos_subnorm() {
+; CHECK-NEXT: ret float 0x3BFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.sqrt.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+define float @test_sqrt_rn_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_sqrt_rn_f_pos_subnorm() {
+; CHECK-NEXT: ret float 0x3BFFFFFFE0000000
+;
+ %res = call float @llvm.nvvm.sqrt.rn.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+define double @test_sqrt_rn_d_pos_subnorm() {
+; CHECK-LABEL: define double @test_sqrt_rn_d_pos_subnorm() {
+; CHECK-NEXT: ret double 0x3BFFFFFFDFFFFFF0
+;
+ %res = call double @llvm.nvvm.sqrt.rn.d(double 0x380FFFFFC0000000)
+ ret double %res
+}
+
+define float @test_sqrt_rn_ftz_f_pos_subnorm() {
+; CHECK-LABEL: define float @test_sqrt_rn_ftz_f_pos_subnorm() {
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %res = call float @llvm.nvvm.sqrt.rn.ftz.f(float 0x380FFFFFC0000000)
+ ret float %res
+}
+
+declare double @llvm.nvvm.ceil.d(double)
+declare float @llvm.nvvm.ceil.f(float)
+declare float @llvm.nvvm.ceil.ftz.f(float)
+
+declare float @llvm.nvvm.fabs(float)
+declare float @llvm.nvvm.fabs.ftz(float)
+
+declare double @llvm.nvvm.floor.d(double)
+declare float @llvm.nvvm.floor.f(float)
+declare float @llvm.nvvm.floor.ftz.f(float)
+
+declare double @llvm.nvvm.rcp.rm.d(double)
+declare float @llvm.nvvm.rcp.rm.f(float)
+declare float @llvm.nvvm.rcp.rm.ftz.f(float)
+declare double @llvm.nvvm.rcp.rn.d(double)
+declare float @llvm.nvvm.rcp.rn.f(float)
+declare float @llvm.nvvm.rcp.rn.ftz.f(float)
+declare double @llvm.nvvm.rcp.rp.d(double)
+declare float @llvm.nvvm.rcp.rp.f(float)
+declare float @llvm.nvvm.rcp.rp.ftz.f(float)
+declare double @llvm.nvvm.rcp.rz.d(double)
+declare float @llvm.nvvm.rcp.rz.f(float)
+declare float @llvm.nvvm.rcp.rz.ftz.f(float)
+
+declare double @llvm.nvvm.round.d(double)
+declare float @llvm.nvvm.round.f(float)
+declare float @llvm.nvvm.round.ftz.f(float)
+
+declare double @llvm.nvvm.saturate.d(double)
+declare float @llvm.nvvm.saturate.f(float)
+declare float @llvm.nvvm.saturate.ftz.f(float)
+
+declare float @llvm.nvvm.sqrt.f(float)
+declare double @llvm.nvvm.sqrt.rn.d(double)
+declare float @llvm.nvvm.sqrt.rn.f(float)
+declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
More information about the llvm-commits
mailing list