[llvm] [NVPTX] Remove `UnsafeFPMath` uses (PR #151479)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 1 01:11:35 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-nvptx

Author: None (paperchalice)

<details>
<summary>Changes</summary>

Remove `UnsafeFPMath` in NVPTX part, it blocks some bugfixes related to clang and the ultimate goal is to remove `resetTargetOptions` method in `TargetMachine`, see FIXME in `resetTargetOptions`.
See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast
https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract 

---

Patch is 63.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151479.diff


13 Files Affected:

- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (-5) 
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (-1) 
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+4-23) 
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (-1) 
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+2-3) 
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (+4-4) 
- (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+4-5) 
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+4-5) 
- (modified) llvm/test/CodeGen/NVPTX/f32x2-instructions.ll (+4-5) 
- (modified) llvm/test/CodeGen/NVPTX/fast-math.ll (+55-96) 
- (modified) llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll (+37-40) 
- (modified) llvm/test/CodeGen/NVPTX/frem.ll (+268-265) 
- (modified) llvm/test/CodeGen/NVPTX/sqrt-approx.ll (+30-31) 


``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 95abcded46485..cf9758c80c5c2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -82,11 +82,6 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
   return TL->allowFMA(*MF, OptLevel);
 }
 
-bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
-  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
-  return TL->allowUnsafeFPMath(*MF);
-}
-
 bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
 
 /// Select - Select instructions not customized! Used for
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 9e0f88e544980..dd05c4df8a3ee 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -44,7 +44,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   bool usePrecSqrtF32(const SDNode *N) const;
   bool useF32FTZ() const;
   bool allowFMA() const;
-  bool allowUnsafeFPMath() const;
   bool doRsqrtOpt() const;
 
   NVPTXScopes Scopes{};
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4fd362303b6e5..16a5cf71f44cd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -125,10 +125,6 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
   if (UsePrecDivF32.getNumOccurrences() > 0)
     return UsePrecDivF32;
 
-  // Otherwise, use div.approx if fast math is enabled
-  if (allowUnsafeFPMath(MF))
-    return NVPTX::DivPrecisionLevel::Approx;
-
   const SDNodeFlags Flags = N.getFlags();
   if (Flags.hasApproximateFuncs())
     return NVPTX::DivPrecisionLevel::Approx;
@@ -142,10 +138,6 @@ bool NVPTXTargetLowering::usePrecSqrtF32(const MachineFunction &MF,
   if (UsePrecSqrtF32.getNumOccurrences() > 0)
     return UsePrecSqrtF32;
 
-  // Otherwise, use sqrt.approx if fast math is enabled
-  if (allowUnsafeFPMath(MF))
-    return false;
-
   if (N) {
     const SDNodeFlags Flags = N->getFlags();
     if (Flags.hasApproximateFuncs())
@@ -2687,8 +2679,7 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) {
                      SDLoc(Op), Opcode, DAG);
 }
 
-static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
-                         bool AllowUnsafeFPMath) {
+static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG) {
   // Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
   // i.e. "poor man's fmod()". When y is infinite, x is returned. This matches
   // the semantics of LLVM's frem.
@@ -2705,7 +2696,7 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
   SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
                             Flags | SDNodeFlags::AllowContract);
 
-  if (AllowUnsafeFPMath || Flags.hasNoInfs())
+  if (Flags.hasNoInfs())
     return Sub;
 
   // If Y is infinite, return X
@@ -2845,7 +2836,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::CTLZ:
     return lowerCTLZCTPOP(Op, DAG);
   case ISD::FREM:
-    return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction()));
+    return lowerFREM(Op, DAG);
 
   default:
     llvm_unreachable("Custom lowering not defined for operation");
@@ -4718,17 +4709,7 @@ bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
   if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
     return true;
 
-  return allowUnsafeFPMath(MF);
-}
-
-bool NVPTXTargetLowering::allowUnsafeFPMath(const MachineFunction &MF) const {
-  // Honor TargetOptions flags that explicitly say unsafe math is okay.
-  if (MF.getTarget().Options.UnsafeFPMath)
-    return true;
-
-  // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
-  const Function &F = MF.getFunction();
-  return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+  return false;
 }
 
 static bool isConstZero(const SDValue &Operand) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index cf72a1e6db89c..71c15695b1988 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -215,7 +215,6 @@ class NVPTXTargetLowering : public TargetLowering {
   unsigned combineRepeatedFPDivisors() const override { return 2; }
 
   bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
-  bool allowUnsafeFPMath(const MachineFunction &MF) const;
 
   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
                                   EVT) const override {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6000b40694763..020d42f217f75 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1181,9 +1181,8 @@ defm FMA_F64    : FMA<F64RT,    allow_ftz = false>;
 // sin/cos/tanh
 
 class UnaryOpAllowsApproxFn<SDPatternOperator operator>
-    : PatFrag<(ops node:$A),
-              (operator node:$A), [{
-  return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
+    : PatFrag<(ops node:$A), (operator node:$A), [{
+  return N->getFlags().hasApproximateFuncs();
 }]>;
 
 def SIN_APPROX_f32 :
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
index 80627a03354a0..e1d4ef1073a78 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
-; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-11.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | %ptxas-verify -arch=sm_80 %}
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
@@ -22,7 +22,7 @@ define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
 ; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r5, %r4, %r2;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NEXT:    ret;
-  %r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
+  %r = call afn <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
   ret <2 x bfloat> %r
 }
 
@@ -41,7 +41,7 @@ define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
 ; CHECK-NEXT:    cvt.rn.bf16x2.f32 %r5, %r4, %r2;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NEXT:    ret;
-  %r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
+  %r = call afn <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
   ret <2 x bfloat> %r
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 2b7e4184670c7..d4aec4f16f1ab 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -886,8 +886,8 @@ define half @test_sqrt(half %a) #0 {
 ; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
 ; CHECK:      st.param.b16    [func_retval0], [[R]];
 ; CHECK:      ret;
-define half @test_sin(half %a) #0 #1 {
-  %r = call half @llvm.sin.f16(half %a)
+define half @test_sin(half %a) #0 {
+  %r = call afn half @llvm.sin.f16(half %a)
   ret half %r
 }
 
@@ -900,8 +900,8 @@ define half @test_sin(half %a) #0 #1 {
 ; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
 ; CHECK:      st.param.b16    [func_retval0], [[R]];
 ; CHECK:      ret;
-define half @test_cos(half %a) #0 #1 {
-  %r = call half @llvm.cos.f16(half %a)
+define half @test_cos(half %a) #0 {
+  %r = call afn half @llvm.cos.f16(half %a)
   ret half %r
 }
 
@@ -1183,4 +1183,3 @@ define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
 }
 
 attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index d4fcea320f3ad..991311f9492b9 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -1674,7 +1674,7 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 {
 ;  ret <2 x half> %r
 ;}
 
-define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
+define <2 x half> @test_sin(<2 x half> %a) #0 {
 ; CHECK-LABEL: test_sin(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b16 %rs<5>;
@@ -1692,11 +1692,11 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
 ; CHECK-NEXT:    mov.b32 %r6, {%rs4, %rs3};
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
 ; CHECK-NEXT:    ret;
-  %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
+  %r = call afn <2 x half> @llvm.sin.f16(<2 x half> %a)
   ret <2 x half> %r
 }
 
-define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
+define <2 x half> @test_cos(<2 x half> %a) #0 {
 ; CHECK-LABEL: test_cos(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b16 %rs<5>;
@@ -1714,7 +1714,7 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
 ; CHECK-NEXT:    mov.b32 %r6, {%rs4, %rs3};
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
 ; CHECK-NEXT:    ret;
-  %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
+  %r = call afn <2 x half> @llvm.cos.f16(<2 x half> %a)
   ret <2 x half> %r
 }
 
@@ -2330,4 +2330,3 @@ define void @test_store_2xhalf(ptr %p1, ptr %p2, <2 x half> %v) {
 
 
 attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index b84a0ec7155e2..1a73b22e83be7 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -1627,7 +1627,7 @@ define <2 x float> @test_sqrt(<2 x float> %a) #0 {
 ;  ret <2 x float> %r
 ;}
 
-define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
+define <2 x float> @test_sin(<2 x float> %a) #0 {
 ; CHECK-LABEL: test_sin(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<5>;
@@ -1640,11 +1640,11 @@ define <2 x float> @test_sin(<2 x float> %a) #0 #1 {
 ; CHECK-NEXT:    sin.approx.f32 %r4, %r1;
 ; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r4, %r3};
 ; CHECK-NEXT:    ret;
-  %r = call <2 x float> @llvm.sin(<2 x float> %a)
+  %r = call afn <2 x float> @llvm.sin(<2 x float> %a)
   ret <2 x float> %r
 }
 
-define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
+define <2 x float> @test_cos(<2 x float> %a) #0 {
 ; CHECK-LABEL: test_cos(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<5>;
@@ -1657,7 +1657,7 @@ define <2 x float> @test_cos(<2 x float> %a) #0 #1 {
 ; CHECK-NEXT:    cos.approx.f32 %r4, %r1;
 ; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r4, %r3};
 ; CHECK-NEXT:    ret;
-  %r = call <2 x float> @llvm.cos(<2 x float> %a)
+  %r = call afn <2 x float> @llvm.cos(<2 x float> %a)
   ret <2 x float> %r
 }
 
@@ -2146,5 +2146,4 @@ define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) {
 
 
 attributes #0 = { nounwind }
-attributes #1 = { "unsafe-fp-math" = "true" }
 attributes #2 = { "denormal-fp-math"="preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 5eda3a1e2dda1..8561c60a46948 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -22,7 +22,7 @@ define float @sqrt_div(float %a, float %b) {
   ret float %t2
 }
 
-define float @sqrt_div_fast(float %a, float %b) #0 {
+define float @sqrt_div_fast(float %a, float %b) {
 ; CHECK-LABEL: sqrt_div_fast(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<5>;
@@ -34,29 +34,25 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
 ; CHECK-NEXT:    div.approx.f32 %r4, %r2, %r3;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NEXT:    ret;
-  %t1 = tail call float @llvm.sqrt.f32(float %a)
-  %t2 = fdiv float %t1, %b
+  %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+  %t2 = fdiv afn float %t1, %b
   ret float %t2
 }
 
-define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
+define float @sqrt_div_fast_ninf(float %a, float %b) {
 ; CHECK-LABEL: sqrt_div_fast_ninf(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b32 %r<7>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b32 %r1, [sqrt_div_fast_ninf_param_0];
 ; CHECK-NEXT:    sqrt.approx.f32 %r2, %r1;
-; CHECK-NEXT:    abs.f32 %r3, %r1;
-; CHECK-NEXT:    setp.lt.f32 %p1, %r3, 0f00800000;
-; CHECK-NEXT:    selp.f32 %r4, 0f00000000, %r2, %p1;
-; CHECK-NEXT:    ld.param.b32 %r5, [sqrt_div_fast_ninf_param_1];
-; CHECK-NEXT:    div.approx.f32 %r6, %r4, %r5;
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
+; CHECK-NEXT:    ld.param.b32 %r3, [sqrt_div_fast_ninf_param_1];
+; CHECK-NEXT:    div.approx.f32 %r4, %r2, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NEXT:    ret;
   %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
-  %t2 = fdiv float %t1, %b
+  %t2 = fdiv afn float %t1, %b
   ret float %t2
 }
 
@@ -77,7 +73,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 {
   ret float %t2
 }
 
-define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz(float %a, float %b) #1 {
 ; CHECK-LABEL: sqrt_div_fast_ftz(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<5>;
@@ -89,35 +85,32 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
 ; CHECK-NEXT:    div.approx.ftz.f32 %r4, %r2, %r3;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NEXT:    ret;
-  %t1 = tail call float @llvm.sqrt.f32(float %a)
-  %t2 = fdiv float %t1, %b
+  %t1 = tail call afn float @llvm.sqrt.f32(float %a)
+  %t2 = fdiv afn float %t1, %b
   ret float %t2
 }
 
-define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
+define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #1 {
 ; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b32 %r<6>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b32 %r1, [sqrt_div_fast_ftz_ninf_param_0];
-; CHECK-NEXT:    setp.eq.ftz.f32 %p1, %r1, 0f00000000;
 ; CHECK-NEXT:    sqrt.approx.ftz.f32 %r2, %r1;
-; CHECK-NEXT:    selp.f32 %r3, 0f00000000, %r2, %p1;
-; CHECK-NEXT:    ld.param.b32 %r4, [sqrt_div_fast_ftz_ninf_param_1];
-; CHECK-NEXT:    div.approx.ftz.f32 %r5, %r3, %r4;
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT:    ld.param.b32 %r3, [sqrt_div_fast_ftz_ninf_param_1];
+; CHECK-NEXT:    div.approx.ftz.f32 %r4, %r2, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
 ; CHECK-NEXT:    ret;
   %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
-  %t2 = fdiv float %t1, %b
+  %t2 = fdiv afn float %t1, %b
   ret float %t2
 }
 
 ; There are no fast-math or ftz versions of sqrt and div for f64.  We use
 ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
 
-define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64(double %a, double %b) #1 {
 ; CHECK-LABEL: sqrt_div_fast_ftz_f64(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b64 %rd<5>;
@@ -134,22 +127,17 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
   ret double %t2
 }
 
-define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
+define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #1 {
 ; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
 ; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b64 %rd<8>;
+; CHECK-NEXT:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [sqrt_div_fast_ftz_f64_ninf_param_0];
-; CHECK-NEXT:    abs.f64 %rd2, %rd1;
-; CHECK-NEXT:    setp.lt.f64 %p1, %rd2, 0d0010000000000000;
-; CHECK-NEXT:    rsqrt.approx.f64 %rd3, %rd1;
-; CHECK-NEXT:    rcp.approx.ftz.f64 %rd4, %rd3;
-; CHECK-NEXT:    selp.f64 %rd5, 0d0000000000000000, %rd4, %p1;
-; CHECK-NEXT:    ld.param.b64 %rd6, [sqrt_div_fast_ftz_f64_ninf_param_1];
-; CHECK-NEXT:    div.rn.f64 %rd7, %rd5, %rd6;
-; CHECK-NEXT:    st.param.b64 [func_retval0], %rd7;
+; CHECK-NEXT:    sqrt.rn.f64 %rd2, %rd1;
+; CHECK-NEXT:    ld.param.b64 %rd3, [sqrt_div_fast_ftz_f64_ninf_param_1];
+; CHECK-NEXT:    div.rn.f64 %rd4, %rd2, %rd3;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
   %t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
   %t2 = fdiv double %t1, %b
@@ -172,7 +160,7 @@ define float @rsqrt(float %a) {
   ret float %ret
 }
 
-define float @rsqrt_fast(float %a) #0 {
+define float @rsqrt_fast(float %a) {
 ; CHECK-LABEL: rsqrt_fast(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<3>;
@@ -182,12 +170,12 @@ define float @rsqrt_fast(float %a) #0 {
 ; CHECK-NEXT:    rsqrt.approx.f32 %r2, %r1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
 ; CHECK-NEXT:    ret;
-  %b = tail call float @llvm.sqrt.f32(float %a)
-  %ret = fdiv float 1.0, %b
+  %b = tail call afn float @llvm.sqrt.f32(float %a)
+  %ret = fdiv afn float 1.0, %b
   ret float %ret
 }
 
-define float @rsqrt_fast_ftz(float %a) #0 #1 {
+define float @rsqrt_fast_ftz(float %a) #1 {
 ; CHECK-LABEL: rsqrt_fast_ftz(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<3>;
@@ -197,8 +185,8 @@ define float @rsqrt_fast_ftz(float %a) #0 #1 {
 ; CHECK-NEXT:    rsqrt.approx.ftz.f32 %r2, %r1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
 ; CHECK-NEXT:    ret;
-  %b = tail call float @llvm.sqrt.f32(float %a)
-  %ret = fdiv float 1.0, %b
+  %b = tail call afn float @llvm.sqrt.f32(float %a)
+  %ret = fdiv afn float 1.0, %b
   ret float %ret
 }
 
@@ -263,35 +251,7 @@ define float @fcos_approx_afn(float %a) {
   ret float %r
 }
 
-define float @fsin_approx(float %a) #0 {
-; CHECK-LABEL: fsin_approx(
-; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.b32 %r1, [fsin_approx_param_0];
-; CHECK-NEXT:    sin.approx.f32 %r2, %r1;
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT:    ret;
-  %r = tail call float @llvm.sin.f32(float %a)
-  ret float %r
-}
-
-define float @fcos_approx(float %a) #0 {
-; CHECK-LABEL: fcos_approx(
-; CHECK:       {
-; CHECK-NEXT:    .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT:  // %bb.0:
-; CHECK-NEXT:    ld.param.b32 %r1, [fcos_approx_param_0];
-; CHECK-NEXT:    cos.approx.f32 %r2, %r1;
-; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT:    ret;
-  %r = tail call float @llvm.cos.f32(float %a)
-  ret float %r
-}
-
-define float @fsin_approx_ftz(float %a) #0 #1 {
+define float @fsin_approx_ftz(float %a) #1 {
 ; CHECK-LABEL: fsin_approx_ftz(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<3>;
@@ -301,11 +261,11 @@ define float @fsin_approx_ftz(float %a) #0 #1 {
 ; CHECK-NEXT:    sin.approx.ftz.f32 %r2, %r1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
 ; CHECK-NEXT:    ret;
-  %r = tail call float @llvm.sin.f32(float %a)
+  %r = tail call afn float @llvm.sin.f32(float %a)
   ret float %r
 }
 
-define float @fcos_approx_ftz(float %a) #0 #1 {
+define float @fcos_approx_ftz(float %a) #1 {
 ; CHECK-LABEL: fcos_approx_ftz(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<3>;
@@ -315,7 +275,7 @@ define float @fcos_approx_ftz(float %a) #0 #1 {
 ; CHECK-NEXT:    cos.approx.ftz.f32 %r2, %r1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
 ; CHECK-NEXT:    ret;
-  %r = tail call float @llvm.cos.f32(float %a)
+  %r = tail call afn float @llvm.cos.f32(float %a)
   ret float %r
 }
 
@@ -423,7 +383,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
   ret float %w
 }
 
-define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
 ; CHECK-LABEL: repeated_div_fast(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
@@ -444,14 +404,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
 ; CHECK-NEXT:    selp.f32 %r8, %r7, %r6, %p1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
 ; CHECK-NEXT:    ret;
-  %x = fdiv float %a, %divisor
-  %y = fdiv float %b, %divisor
-  %z = fmul float %x, %y
+  %x = fdiv afn arcp float %a, %divisor
+  %y = fdiv afn arcp contract float %b, %divisor
+  %z = fmul cont...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/151479


More information about the llvm-commits mailing list