[llvm] [NVPTX] Remove `UnsafeFPMath` uses (PR #151479)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 01:11:35 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: None (paperchalice)
<details>
<summary>Changes</summary>
Remove `UnsafeFPMath` in NVPTX part, it blocks some bugfixes related to clang and the ultimate goal is to remove `resetTargetOptions` method in `TargetMachine`, see FIXME in `resetTargetOptions`.
See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast
https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract
---
Patch is 63.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151479.diff
13 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (-5)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (-1)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+4-23)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (-1)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+2-3)
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+4-5)
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+4-5)
- (modified) llvm/test/CodeGen/NVPTX/f32x2-instructions.ll (+4-5)
- (modified) llvm/test/CodeGen/NVPTX/fast-math.ll (+55-96)
- (modified) llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll (+37-40)
- (modified) llvm/test/CodeGen/NVPTX/frem.ll (+268-265)
- (modified) llvm/test/CodeGen/NVPTX/sqrt-approx.ll (+30-31)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 95abcded46485..cf9758c80c5c2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -82,11 +82,6 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
return TL->allowFMA(*MF, OptLevel);
}
-bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
- const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
- return TL->allowUnsafeFPMath(*MF);
-}
-
bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
/// Select - Select instructions not customized! Used for
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 9e0f88e544980..dd05c4df8a3ee 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -44,7 +44,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool usePrecSqrtF32(const SDNode *N) const;
bool useF32FTZ() const;
bool allowFMA() const;
- bool allowUnsafeFPMath() const;
bool doRsqrtOpt() const;
NVPTXScopes Scopes{};
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4fd362303b6e5..16a5cf71f44cd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -125,10 +125,6 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
if (UsePrecDivF32.getNumOccurrences() > 0)
return UsePrecDivF32;
- // Otherwise, use div.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return NVPTX::DivPrecisionLevel::Approx;
-
const SDNodeFlags Flags = N.getFlags();
if (Flags.hasApproximateFuncs())
return NVPTX::DivPrecisionLevel::Approx;
@@ -142,10 +138,6 @@ bool NVPTXTargetLowering::usePrecSqrtF32(const MachineFunction &MF,
if (UsePrecSqrtF32.getNumOccurrences() > 0)
return UsePrecSqrtF32;
- // Otherwise, use sqrt.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return false;
-
if (N) {
const SDNodeFlags Flags = N->getFlags();
if (Flags.hasApproximateFuncs())
@@ -2687,8 +2679,7 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) {
SDLoc(Op), Opcode, DAG);
}
-static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
- bool AllowUnsafeFPMath) {
+static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG) {
// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches
// the semantics of LLVM's frem.
@@ -2705,7 +2696,7 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
Flags | SDNodeFlags::AllowContract);
- if (AllowUnsafeFPMath || Flags.hasNoInfs())
+ if (Flags.hasNoInfs())
return Sub;
// If Y is infinite, return X
@@ -2845,7 +2836,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
return lowerCTLZCTPOP(Op, DAG);
case ISD::FREM:
- return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction()));
+ return lowerFREM(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
@@ -4718,17 +4709,7 @@ bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
return true;
- return allowUnsafeFPMath(MF);
-}
-
-bool NVPTXTargetLowering::allowUnsafeFPMath(const MachineFunction &MF) const {
- // Honor TargetOptions flags that explicitly say unsafe math is okay.
- if (MF.getTarget().Options.UnsafeFPMath)
- return true;
-
- // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
- const Function &F = MF.getFunction();
- return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+ return false;
}
static bool isConstZero(const SDValue &Operand) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index cf72a1e6db89c..71c15695b1988 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -215,7 +215,6 @@ class NVPTXTargetLowering : public TargetLowering {
unsigned combineRepeatedFPDivisors() const override { return 2; }
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
- bool allowUnsafeFPMath(const MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT) const override {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6000b40694763..020d42f217f75 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1181,9 +1181,8 @@ defm FMA_F64 : FMA<F64RT, allow_ftz = false>;
// sin/cos/tanh
class UnaryOpAllowsApproxFn<SDPatternOperator operator>
- : PatFrag<(ops node:$A),
- (operator node:$A), [{
- return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
+ : PatFrag<(ops node:$A), (operator node:$A), [{
+ return N->getFlags().hasApproximateFuncs();
}]>;
def SIN_APPROX_f32 :
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
index 80627a03354a0..e1d4ef1073a78 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
-; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | %ptxas-verify -arch=sm_80 %}
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -22,7 +22,7 @@ define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
+ %r = call afn <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
@@ -41,7 +41,7 @@ define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r5, %r4, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
+ %r = call afn <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 2b7e4184670c7..d4aec4f16f1ab 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -886,8 +886,8 @@ define half @test_sqrt(half %a) #0 {
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
-define half @test_sin(half %a) #0 #1 {
- %r = call half @llvm.sin.f16(half %a)
+define half @test_sin(half %a) #0 {
+ %r = call afn half @llvm.sin.f16(half %a)
ret half %r
}
@@ -900,8 +900,8 @@ define half @test_sin(half %a) #0 #1 {
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
-define half @test_cos(half %a) #0 #1 {
- %r = call half @llvm.cos.f16(half %a)
+define half @test_cos(half %a) #0 {
+ %r = call afn half @llvm.cos.f16(half %a)
ret half %r
}
@@ -1183,4 +1183,3 @@ define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
}
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index d4fcea320f3ad..991311f9492b9 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -1674,7 +1674,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 {
; ret <2 x half> %r
;}
-define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
+define <2 x half> @test_sin(<2 x half> %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -1692,11 +1692,11 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3};
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
+ %r = call afn <2 x half> @llvm.sin.f16(<2 x half> %a)
ret <2 x half> %r
}
-define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
+define <2 x half> @test_cos(<2 x half> %a) #0 {
; CHECK-LABEL: test_cos(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<5>;
@@ -1714,7 +1714,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
; CHECK-NEXT: mov.b32 %r6, {%rs4, %rs3};
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-NEXT: ret;
- %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
+ %r = call afn <2 x half> @llvm.cos.f16(<2 x half> %a)
ret <2 x half> %r
}
@@ -2330,4 +2330,3 @@ define void @test_store_2xhalf(ptr %p1, ptr %p2, <2 x half> %v) {
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index b84a0ec7155e2..1a73b22e83be7 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -1627,7 +1627,7 @@ define <2 x float> @test_sqrt(<2 x float> %a) #0 {
; ret <2 x float> %r
;}
-define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
+define <2 x float> @test_sin(<2 x float> %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -1640,11 +1640,11 @@ define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
; CHECK-NEXT: sin.approx.f32 %r4, %r1;
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3};
; CHECK-NEXT: ret;
- %r = call <2 x float> @llvm.sin(<2 x float> %a)
+ %r = call afn <2 x float> @llvm.sin(<2 x float> %a)
ret <2 x float> %r
}
-define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
+define <2 x float> @test_cos(<2 x float> %a) #0 {
; CHECK-LABEL: test_cos(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -1657,7 +1657,7 @@ define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
; CHECK-NEXT: cos.approx.f32 %r4, %r1;
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3};
; CHECK-NEXT: ret;
- %r = call <2 x float> @llvm.cos(<2 x float> %a)
+ %r = call afn <2 x float> @llvm.cos(<2 x float> %a)
ret <2 x float> %r
}
@@ -2146,5 +2146,4 @@ define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) {
attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
attributes #2 = { "denormal-fp-math"="preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 5eda3a1e2dda1..8561c60a46948 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -22,7 +22,7 @@ define float @sqrt_div(float %a, float %b) {
ret float %t2
}
-define float @sqrt_div_fast(float %a, float %b) #0 {
+define float @sqrt_div_fast(float %a, float %b) {
; CHECK-LABEL: sqrt_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -34,29 +34,25 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
- %t1 = tail call float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
-define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
+define float @sqrt_div_fast_ninf(float %a, float %b) {
; CHECK-LABEL: sqrt_div_fast_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<7>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ninf_param_0];
; CHECK-NEXT: sqrt.approx.f32 %r2, %r1;
-; CHECK-NEXT: abs.f32 %r3, %r1;
-; CHECK-NEXT: setp.lt.f32 %p1, %r3, 0f00800000;
-; CHECK-NEXT: selp.f32 %r4, 0f00000000, %r2, %p1;
-; CHECK-NEXT: ld.param.b32 %r5, [sqrt_div_fast_ninf_param_1];
-; CHECK-NEXT: div.approx.f32 %r6, %r4, %r5;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ninf_param_1];
+; CHECK-NEXT: div.approx.f32 %r4, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
@@ -77,7 +73,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 {
ret float %t2
}
-define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz(float %a, float %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<5>;
@@ -89,35 +85,32 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3;
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
- %t1 = tail call float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
-define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [sqrt_div_fast_ftz_ninf_param_0];
-; CHECK-NEXT: setp.eq.ftz.f32 %p1, %r1, 0f00000000;
; CHECK-NEXT: sqrt.approx.ftz.f32 %r2, %r1;
-; CHECK-NEXT: selp.f32 %r3, 0f00000000, %r2, %p1;
-; CHECK-NEXT: ld.param.b32 %r4, [sqrt_div_fast_ftz_ninf_param_1];
-; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ld.param.b32 %r3, [sqrt_div_fast_ftz_ninf_param_1];
+; CHECK-NEXT: div.approx.ftz.f32 %r4, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
- %t2 = fdiv float %t1, %b
+ %t2 = fdiv afn float %t1, %b
ret float %t2
}
; There are no fast-math or ftz versions of sqrt and div for f64. We use
; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
-define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64(double %a, double %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_f64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<5>;
@@ -134,22 +127,17 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
ret double %t2
}
-define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #1 {
; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [sqrt_div_fast_ftz_f64_ninf_param_0];
-; CHECK-NEXT: abs.f64 %rd2, %rd1;
-; CHECK-NEXT: setp.lt.f64 %p1, %rd2, 0d0010000000000000;
-; CHECK-NEXT: rsqrt.approx.f64 %rd3, %rd1;
-; CHECK-NEXT: rcp.approx.ftz.f64 %rd4, %rd3;
-; CHECK-NEXT: selp.f64 %rd5, 0d0000000000000000, %rd4, %p1;
-; CHECK-NEXT: ld.param.b64 %rd6, [sqrt_div_fast_ftz_f64_ninf_param_1];
-; CHECK-NEXT: div.rn.f64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT: sqrt.rn.f64 %rd2, %rd1;
+; CHECK-NEXT: ld.param.b64 %rd3, [sqrt_div_fast_ftz_f64_ninf_param_1];
+; CHECK-NEXT: div.rn.f64 %rd4, %rd2, %rd3;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
%t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
%t2 = fdiv double %t1, %b
@@ -172,7 +160,7 @@ define float @rsqrt(float %a) {
ret float %ret
}
-define float @rsqrt_fast(float %a) #0 {
+define float @rsqrt_fast(float %a) {
; CHECK-LABEL: rsqrt_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -182,12 +170,12 @@ define float @rsqrt_fast(float %a) #0 {
; CHECK-NEXT: rsqrt.approx.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %b = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv float 1.0, %b
+ %b = tail call afn float @llvm.sqrt.f32(float %a)
+ %ret = fdiv afn float 1.0, %b
ret float %ret
}
-define float @rsqrt_fast_ftz(float %a) #0 #1 {
+define float @rsqrt_fast_ftz(float %a) #1 {
; CHECK-LABEL: rsqrt_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -197,8 +185,8 @@ define float @rsqrt_fast_ftz(float %a) #0 #1 {
; CHECK-NEXT: rsqrt.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %b = tail call float @llvm.sqrt.f32(float %a)
- %ret = fdiv float 1.0, %b
+ %b = tail call afn float @llvm.sqrt.f32(float %a)
+ %ret = fdiv afn float 1.0, %b
ret float %ret
}
@@ -263,35 +251,7 @@ define float @fcos_approx_afn(float %a) {
ret float %r
}
-define float @fsin_approx(float %a) #0 {
-; CHECK-LABEL: fsin_approx(
-; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [fsin_approx_param_0];
-; CHECK-NEXT: sin.approx.f32 %r2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
- %r = tail call float @llvm.sin.f32(float %a)
- ret float %r
-}
-
-define float @fcos_approx(float %a) #0 {
-; CHECK-LABEL: fcos_approx(
-; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [fcos_approx_param_0];
-; CHECK-NEXT: cos.approx.f32 %r2, %r1;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
- %r = tail call float @llvm.cos.f32(float %a)
- ret float %r
-}
-
-define float @fsin_approx_ftz(float %a) #0 #1 {
+define float @fsin_approx_ftz(float %a) #1 {
; CHECK-LABEL: fsin_approx_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -301,11 +261,11 @@ define float @fsin_approx_ftz(float %a) #0 #1 {
; CHECK-NEXT: sin.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %r = tail call float @llvm.sin.f32(float %a)
+ %r = tail call afn float @llvm.sin.f32(float %a)
ret float %r
}
-define float @fcos_approx_ftz(float %a) #0 #1 {
+define float @fcos_approx_ftz(float %a) #1 {
; CHECK-LABEL: fcos_approx_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -315,7 +275,7 @@ define float @fcos_approx_ftz(float %a) #0 #1 {
; CHECK-NEXT: cos.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
- %r = tail call float @llvm.cos.f32(float %a)
+ %r = tail call afn float @llvm.cos.f32(float %a)
ret float %r
}
@@ -423,7 +383,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
ret float %w
}
-define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -444,14 +404,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
- %z = fmul float %x, %y
+ %x = fdiv afn arcp float %a, %divisor
+ %y = fdiv afn arcp contract float %b, %divisor
+ %z = fmul cont...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151479
More information about the llvm-commits
mailing list