[llvm] daecc30 - AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt (#74197)

Tue Jan 9 00:14:02 PST 2024

Author: Matt Arsenault
Date: 2024-01-09T15:13:58+07:00
New Revision: daecc303bb719ed63566fcb343afec169826f82c

URL: https://github.com/llvm/llvm-project/commit/daecc303bb719ed63566fcb343afec169826f82c
DIFF: https://github.com/llvm/llvm-project/commit/daecc303bb719ed63566fcb343afec169826f82c.diff

LOG: AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt (#74197)

The library implementation is just a wrapper around a call to the
intrinsic, but loses metadata. Swap out the call site to the intrinsic
so that the lowering can see the !fpmath metadata and fast math flags.

Since d56e0d07cc5ee8e334fd1ad403eef0b1a771384f, clang started placing
!fpmath on OpenCL library sqrt calls. Also don't bother emitting
native_sqrt anymore, it's just another wrapper around llvm.sqrt.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
    llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index f03e6b8915b134..1b2f74cf153bf8 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -87,9 +87,6 @@ class AMDGPULibCalls {
                               Constant *copr0, Constant *copr1);
   bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
 
-  // sqrt
-  bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
-
   /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
   /// of cos, sincos call).
   std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
@@ -672,8 +669,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
 
     // Specialized optimizations for each function call.
     //
-    // TODO: Handle other simple intrinsic wrappers. Sqrt.
-    //
     // TODO: Handle native functions
     switch (FInfo.getId()) {
     case AMDGPULibFunc::EI_EXP:
@@ -794,7 +789,9 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
     case AMDGPULibFunc::EI_ROOTN:
       return fold_rootn(FPOp, B, FInfo);
     case AMDGPULibFunc::EI_SQRT:
-      return fold_sqrt(FPOp, B, FInfo);
+      // TODO: Allow with strictfp + constrained intrinsic
+      return tryReplaceLibcallWithSimpleIntrinsic(
+          B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
     case AMDGPULibFunc::EI_COS:
     case AMDGPULibFunc::EI_SIN:
       return fold_sincos(FPOp, B, FInfo);
@@ -1273,29 +1270,6 @@ bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
   return true;
 }
 
-// fold sqrt -> native_sqrt (x)
-bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
-                               const FuncInfo &FInfo) {
-  if (!isUnsafeMath(FPOp))
-    return false;
-
-  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
-      (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
-    Module *M = B.GetInsertBlock()->getModule();
-
-    if (FunctionCallee FPExpr = getNativeFunction(
-            M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
-      Value *opr0 = FPOp->getOperand(0);
-      LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
-                        << "sqrt(" << *opr0 << ")\n");
-      Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
-      replaceCall(FPOp, nval);
-      return true;
-    }
-  }
-  return false;
-}
-
 std::tuple<Value *, Value *, Value *>
 AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
                              FunctionCallee Fsincos) {

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
index 5b57778d5fdcdb..72f809b3e0607a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
@@ -27,7 +27,7 @@ declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>)
 define float @test_sqrt_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0:![0-9]+]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0
@@ -37,7 +37,7 @@ define float @test_sqrt_f32(float %arg) {
 define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
@@ -47,7 +47,7 @@ define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0
@@ -57,7 +57,7 @@ define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0
@@ -67,7 +67,7 @@ define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0
@@ -77,7 +77,7 @@ define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0
@@ -87,7 +87,7 @@ define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 define float @test_sqrt_cr_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg)
@@ -97,7 +97,7 @@ define float @test_sqrt_cr_f32(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
@@ -107,7 +107,7 @@ define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]])
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg)
@@ -117,7 +117,7 @@ define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg)
@@ -127,7 +127,7 @@ define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]])
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg)
@@ -137,7 +137,7 @@ define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]])
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg)
@@ -147,7 +147,7 @@ define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 define double @test_sqrt_f64(double %arg) {
 ; CHECK-LABEL: define double @test_sqrt_f64
 ; CHECK-SAME: (double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @_Z4sqrtd(double [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[ARG]])
 ; CHECK-NEXT:    ret double [[SQRT]]
 ;
   %sqrt = tail call double @_Z4sqrtd(double %arg)
@@ -157,7 +157,7 @@ define double @test_sqrt_f64(double %arg) {
 define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 ; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64
 ; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[ARG]])
 ; CHECK-NEXT:    ret <2 x double> [[SQRT]]
 ;
   %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg)
@@ -167,7 +167,7 @@ define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 ; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64
 ; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @llvm.sqrt.v3f64(<3 x double> [[ARG]])
 ; CHECK-NEXT:    ret <3 x double> [[SQRT]]
 ;
   %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg)
@@ -177,7 +177,7 @@ define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64
 ; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[ARG]])
 ; CHECK-NEXT:    ret <4 x double> [[SQRT]]
 ;
   %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg)
@@ -187,7 +187,7 @@ define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 ; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64
 ; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[ARG]])
 ; CHECK-NEXT:    ret <8 x double> [[SQRT]]
 ;
   %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg)
@@ -197,7 +197,7 @@ define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 ; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64
 ; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @llvm.sqrt.v16f64(<16 x double> [[ARG]])
 ; CHECK-NEXT:    ret <16 x double> [[SQRT]]
 ;
   %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg)
@@ -207,7 +207,7 @@ define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 define half @test_sqrt_f16(half %arg) {
 ; CHECK-LABEL: define half @test_sqrt_f16
 ; CHECK-SAME: (half [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @_Z4sqrtDh(half [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @llvm.sqrt.f16(half [[ARG]])
 ; CHECK-NEXT:    ret half [[SQRT]]
 ;
   %sqrt = tail call half @_Z4sqrtDh(half %arg)
@@ -217,7 +217,7 @@ define half @test_sqrt_f16(half %arg) {
 define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 ; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16
 ; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @llvm.sqrt.v2f16(<2 x half> [[ARG]])
 ; CHECK-NEXT:    ret <2 x half> [[SQRT]]
 ;
   %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg)
@@ -227,7 +227,7 @@ define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 ; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16
 ; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @llvm.sqrt.v3f16(<3 x half> [[ARG]])
 ; CHECK-NEXT:    ret <3 x half> [[SQRT]]
 ;
   %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg)
@@ -237,7 +237,7 @@ define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 ; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16
 ; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[ARG]])
 ; CHECK-NEXT:    ret <4 x half> [[SQRT]]
 ;
   %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg)
@@ -247,7 +247,7 @@ define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 ; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16
 ; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[ARG]])
 ; CHECK-NEXT:    ret <8 x half> [[SQRT]]
 ;
   %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg)
@@ -257,7 +257,7 @@ define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 ; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16
 ; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @llvm.sqrt.v16f16(<16 x half> [[ARG]])
 ; CHECK-NEXT:    ret <16 x half> [[SQRT]]
 ;
   %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg)
@@ -267,7 +267,7 @@ define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2:[0-9]+]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3:[0-9]+]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
@@ -277,7 +277,7 @@ define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
@@ -287,7 +287,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
@@ -297,7 +297,7 @@ define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
@@ -308,7 +308,7 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
@@ -318,7 +318,7 @@ define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
@@ -328,7 +328,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
@@ -338,7 +338,7 @@ define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
@@ -348,7 +348,7 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 define float @test_sqrt_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0
@@ -358,7 +358,7 @@ define float @test_sqrt_f32_preserve_flags(float %arg) {
 define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
@@ -368,7 +368,7 @@ define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
 define float @test_sqrt_f32_preserve_flags_md(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0]], !foo [[META1:![0-9]+]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]], !foo [[META1:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1
@@ -378,7 +378,7 @@ define float @test_sqrt_f32_preserve_flags_md(float %arg) {
 define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]], !foo [[META1]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]], !foo [[META1]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
@@ -388,7 +388,7 @@ define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
 define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call ninf contract float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call ninf contract float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call ninf contract float @_Z4sqrtf(float %arg)
@@ -398,7 +398,7 @@ define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)

diff  --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 87f69065c9fd59..731a88278e512c 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -694,7 +694,7 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
-; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp)
+; GCN-NATIVE: call fast float @llvm.sqrt.f32(float %tmp)
 define amdgpu_kernel void @test_use_native_sqrt(ptr addrspace(1) nocapture %a) {
 entry:
   %tmp = load float, ptr addrspace(1) %a, align 4
@@ -704,7 +704,7 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
-; GCN: call fast double @_Z4sqrtd(double %tmp)
+; GCN: call fast double @llvm.sqrt.f64(double %tmp)
 define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(ptr addrspace(1) nocapture %a) {
 entry:
   %tmp = load double, ptr addrspace(1) %a, align 8
@@ -836,7 +836,6 @@ entry:
 }
 
 ; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
-; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
 
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }