[llvm] AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions (PR #92595)

Fri May 17 13:29:36 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

We need to insert a constrained canonicalize.

Depends #92594

---

Patch is 21.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92595.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp (+11-10) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll (+52-59) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index faf04c3c7e709..47de1791dae31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1156,25 +1156,26 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
 
 bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
                                 const FuncInfo &FInfo) {
-  // skip vector function
-  if (getVecSize(FInfo) != 1)
-    return false;
-
   Value *opr0 = FPOp->getOperand(0);
   Value *opr1 = FPOp->getOperand(1);
 
-  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
-  if (!CINT) {
+  const APInt *CINT = nullptr;
+  if (!match(opr1, m_APIntAllowPoison(CINT)))
     return false;
-  }
+
+  Function *Parent = B.GetInsertBlock()->getParent();
+
   int ci_opr1 = (int)CINT->getSExtValue();
-  if (ci_opr1 == 1) {  // rootn(x, 1) = x
-    LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
+  if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
+    // rootn(x, 1) = x
+    //
+    // TODO: Insert constrained canonicalize for strictfp case.
+    LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
     replaceCall(FPOp, opr0);
     return true;
   }
 
-  Module *M = B.GetInsertBlock()->getModule();
+  Module *M = Parent->getParent();
   if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
     if (FunctionCallee FPExpr =
             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
index 2e64a3456c242..d75517cb26875 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -342,8 +342,7 @@ define <2 x half> @test_rootn_v2f16_0(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_1(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_1(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 1, i32 1>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    ret <2 x half> [[X]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 1, i32 1>)
   ret <2 x half> %call
@@ -352,8 +351,8 @@ define <2 x half> @test_rootn_v2f16_1(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_2(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 2, i32 2>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    ret <2 x half> [[__ROOTN2SQRT]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 2, i32 2>)
   ret <2 x half> %call
@@ -362,8 +361,8 @@ define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg1(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, [[X]]
+; CHECK-NEXT:    ret <2 x half> [[__ROOTN2DIV]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -1, i32 -1>)
   ret <2 x half> %call
@@ -372,8 +371,8 @@ define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg2(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 -2, i32 -2>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = call <2 x half> @_Z5rsqrtDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    ret <2 x half> [[__ROOTN2RSQRT]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -2, i32 -2>)
   ret <2 x half> %call
@@ -512,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
 ; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
 ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret float [[X]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]]
+; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
   %call = tail call float @_Z5rootnfi(float %x, i32 1) #1
@@ -523,8 +523,7 @@ define <2 x float> @test_rootn_v2f32__y_1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    ret <2 x float> [[X]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>)
@@ -547,8 +546,7 @@ define <2 x float> @test_rootn_v2f32__y_1_undef(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1_undef(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 poison>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    ret <2 x float> [[X]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 poison>)
@@ -559,8 +557,7 @@ define <3 x float> @test_rootn_v3f32__y_1(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_1(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 1, i32 1, i32 1>)
@@ -571,8 +568,7 @@ define <3 x float> @test_rootn_v3f32__y_1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_1_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 1, i32 1, i32 poison>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 1, i32 1, i32 poison>)
@@ -583,8 +579,7 @@ define <4 x float> @test_rootn_v4f32__y_1(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_1(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> [[X]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <4 x float> [[CALL]]
+; CHECK-NEXT:    ret <4 x float> [[X]]
 ;
 entry:
   %call = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
@@ -595,8 +590,7 @@ define <8 x float> @test_rootn_v8f32__y_1(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_1(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> [[X]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <8 x float> [[CALL]]
+; CHECK-NEXT:    ret <8 x float> [[X]]
 ;
 entry:
   %call = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -607,8 +601,7 @@ define <16 x float> @test_rootn_v16f32__y_1(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_1(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> [[X]], <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <16 x float> [[CALL]]
+; CHECK-NEXT:    ret <16 x float> [[X]]
 ;
 entry:
   %call = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> %x, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -656,8 +649,8 @@ define <2 x float> @test_rootn_v2f32__y_2_flags(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_2_flags(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan nsz <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 2, i32 2>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call nnan nsz <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 2, i32 2>)
@@ -668,8 +661,8 @@ define <3 x float> @test_rootn_v3f32__y_2(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_2(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 2, i32 2, i32 2>)
@@ -680,8 +673,8 @@ define <3 x float> @test_rootn_v3f32__y_2_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_2_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 2, i32 poison, i32 2>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 2, i32 poison, i32 2>)
@@ -692,8 +685,8 @@ define <4 x float> @test_rootn_v4f32__y_2(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_2(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> [[X]], <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <4 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[X]])
+; CHECK-NEXT:    ret <4 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> %x, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
@@ -704,8 +697,8 @@ define <8 x float> @test_rootn_v8f32__y_2(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_2(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> [[X]], <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <8 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[X]])
+; CHECK-NEXT:    ret <8 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> %x, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
@@ -716,8 +709,8 @@ define <16 x float> @test_rootn_v16f32__y_2(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_2(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> [[X]], <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <16 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[X]])
+; CHECK-NEXT:    ret <16 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> %x, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
@@ -740,8 +733,8 @@ define <2 x float> @test_rootn_v2f32__y_3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_3(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2CBRT:%.*]] = call <2 x float> @_Z4cbrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2CBRT]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 3, i32 3>)
@@ -764,8 +757,8 @@ define <2 x float> @test_rootn_v2f32__y_nonsplat_2_poison(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_nonsplat_2_poison(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 2, i32 poison>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 2, i32 poison>)
@@ -800,8 +793,8 @@ define <2 x float> @test_rootn_v2f32__y_neg1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg1(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -1, i32 -1>)
@@ -812,8 +805,8 @@ define <3 x float> @test_rootn_v3f32__y_neg1(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 -1, i32 -1, i32 -1>)
@@ -824,8 +817,8 @@ define <3 x float> @test_rootn_v3f32__y_neg1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 -1, i32 -1, i32 poison>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 -1, i32 -1, i32 poison>)
@@ -836,8 +829,8 @@ define <4 x float> @test_rootn_v4f32__y_neg1(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_neg1(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> [[X]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <4 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <4 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> %x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
@@ -848,8 +841,8 @@ define <8 x float> @test_rootn_v8f32__y_neg1(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_neg1(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> [[X]], <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <8 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <8 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> %x, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
@@ -860,8 +853,8 @@ define <16 x float> @test_rootn_v16f32__y_neg1(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_neg1(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> [[X]], <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <16 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <16 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <16 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> %x, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
@@ -932,8 +925,8 @@ define <2 x float> @test_rootn_v2f32__y_neg2(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = call <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -2, i32 -2>)
@@ -944,8 +937,8 @@ define <2 x float> @test_rootn_v2f32__y_neg2__flags(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__flags(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan nsz <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = call nnan nsz <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
   %call = tail call nsz nnan <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -2, i32 -2>)
@@ -956,8 +949,8 @@ define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/92595