[llvm] AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt (PR #74197)

Sat Dec 2 06:55:22 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

The library implementation is just a wrapper around a call to the intrinsic, but loses metadata. Swap out the call site to the intrinsic so that the lowering can see the !fpmath metadata and fast math flags.

Since d56e0d07cc5ee8e334fd1ad403eef0b1a771384f, clang started placing !fpmath on OpenCL library sqrt calls. Also don't bother emitting native_sqrt anymore, it's just another wrapper around llvm.sqrt.

---

Patch is 24.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74197.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp (+3-29) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll (+38-38) 
- (modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+2-3) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 5c66fd2b180f7..245c42af43483 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -87,9 +87,6 @@ class AMDGPULibCalls {
                               Constant *copr0, Constant *copr1);
   bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
 
-  // sqrt
-  bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
-
   /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
   /// of cos, sincos call).
   std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
@@ -673,8 +670,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
 
     // Specialized optimizations for each function call.
     //
-    // TODO: Handle other simple intrinsic wrappers. Sqrt.
-    //
     // TODO: Handle native functions
     switch (FInfo.getId()) {
     case AMDGPULibFunc::EI_EXP:
@@ -795,7 +790,9 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
     case AMDGPULibFunc::EI_ROOTN:
       return fold_rootn(FPOp, B, FInfo);
     case AMDGPULibFunc::EI_SQRT:
-      return fold_sqrt(FPOp, B, FInfo);
+      // TODO: Allow with strictfp + constrained intrinsic
+      return tryReplaceLibcallWithSimpleIntrinsic(
+          B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
     case AMDGPULibFunc::EI_COS:
     case AMDGPULibFunc::EI_SIN:
       return fold_sincos(FPOp, B, FInfo);
@@ -1275,29 +1272,6 @@ bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
   return true;
 }
 
-// fold sqrt -> native_sqrt (x)
-bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
-                               const FuncInfo &FInfo) {
-  if (!isUnsafeMath(FPOp))
-    return false;
-
-  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
-      (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
-    Module *M = B.GetInsertBlock()->getModule();
-
-    if (FunctionCallee FPExpr = getNativeFunction(
-            M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
-      Value *opr0 = FPOp->getOperand(0);
-      LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
-                        << "sqrt(" << *opr0 << ")\n");
-      Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
-      replaceCall(FPOp, nval);
-      return true;
-    }
-  }
-  return false;
-}
-
 std::tuple<Value *, Value *, Value *>
 AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
                              FunctionCallee Fsincos) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
index d1a58a7a0148d..f5b6f2e170777 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
@@ -27,7 +27,7 @@ declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>)
 define float @test_sqrt_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0:![0-9]+]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0
@@ -37,7 +37,7 @@ define float @test_sqrt_f32(float %arg) {
 define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
@@ -47,7 +47,7 @@ define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0
@@ -57,7 +57,7 @@ define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0
@@ -67,7 +67,7 @@ define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0
@@ -77,7 +77,7 @@ define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0
@@ -87,7 +87,7 @@ define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 define float @test_sqrt_cr_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg)
@@ -97,7 +97,7 @@ define float @test_sqrt_cr_f32(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
@@ -107,7 +107,7 @@ define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]])
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg)
@@ -117,7 +117,7 @@ define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg)
@@ -127,7 +127,7 @@ define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]])
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg)
@@ -137,7 +137,7 @@ define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]])
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg)
@@ -147,7 +147,7 @@ define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 define double @test_sqrt_f64(double %arg) {
 ; CHECK-LABEL: define double @test_sqrt_f64
 ; CHECK-SAME: (double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @_Z4sqrtd(double [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[ARG]])
 ; CHECK-NEXT:    ret double [[SQRT]]
 ;
   %sqrt = tail call double @_Z4sqrtd(double %arg)
@@ -157,7 +157,7 @@ define double @test_sqrt_f64(double %arg) {
 define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 ; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64
 ; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[ARG]])
 ; CHECK-NEXT:    ret <2 x double> [[SQRT]]
 ;
   %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg)
@@ -167,7 +167,7 @@ define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 ; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64
 ; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @llvm.sqrt.v3f64(<3 x double> [[ARG]])
 ; CHECK-NEXT:    ret <3 x double> [[SQRT]]
 ;
   %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg)
@@ -177,7 +177,7 @@ define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64
 ; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[ARG]])
 ; CHECK-NEXT:    ret <4 x double> [[SQRT]]
 ;
   %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg)
@@ -187,7 +187,7 @@ define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 ; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64
 ; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[ARG]])
 ; CHECK-NEXT:    ret <8 x double> [[SQRT]]
 ;
   %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg)
@@ -197,7 +197,7 @@ define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 ; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64
 ; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @llvm.sqrt.v16f64(<16 x double> [[ARG]])
 ; CHECK-NEXT:    ret <16 x double> [[SQRT]]
 ;
   %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg)
@@ -207,7 +207,7 @@ define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 define half @test_sqrt_f16(half %arg) {
 ; CHECK-LABEL: define half @test_sqrt_f16
 ; CHECK-SAME: (half [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @_Z4sqrtDh(half [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @llvm.sqrt.f16(half [[ARG]])
 ; CHECK-NEXT:    ret half [[SQRT]]
 ;
   %sqrt = tail call half @_Z4sqrtDh(half %arg)
@@ -217,7 +217,7 @@ define half @test_sqrt_f16(half %arg) {
 define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 ; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16
 ; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @llvm.sqrt.v2f16(<2 x half> [[ARG]])
 ; CHECK-NEXT:    ret <2 x half> [[SQRT]]
 ;
   %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg)
@@ -227,7 +227,7 @@ define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 ; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16
 ; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @llvm.sqrt.v3f16(<3 x half> [[ARG]])
 ; CHECK-NEXT:    ret <3 x half> [[SQRT]]
 ;
   %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg)
@@ -237,7 +237,7 @@ define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 ; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16
 ; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[ARG]])
 ; CHECK-NEXT:    ret <4 x half> [[SQRT]]
 ;
   %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg)
@@ -247,7 +247,7 @@ define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 ; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16
 ; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[ARG]])
 ; CHECK-NEXT:    ret <8 x half> [[SQRT]]
 ;
   %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg)
@@ -257,7 +257,7 @@ define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 ; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16
 ; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @llvm.sqrt.v16f16(<16 x half> [[ARG]])
 ; CHECK-NEXT:    ret <16 x half> [[SQRT]]
 ;
   %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg)
@@ -267,7 +267,7 @@ define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2:[0-9]+]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3:[0-9]+]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
@@ -277,7 +277,7 @@ define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
@@ -287,7 +287,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
@@ -297,7 +297,7 @@ define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
@@ -308,7 +308,7 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
@@ -318,7 +318,7 @@ define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
@@ -328,7 +328,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
@@ -338,7 +338,7 @@ define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
@@ -348,7 +348,7 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 define float @test_sqrt_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/74197