[llvm] AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt (PR #74197)

Sat Dec 2 06:55:04 PST 2023

https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/74197

The library implementation is just a wrapper around a call to the intrinsic, but loses metadata. Swap out the call site to the intrinsic so that the lowering can see the !fpmath metadata and fast math flags.

Since d56e0d07cc5ee8e334fd1ad403eef0b1a771384f, clang started placing !fpmath on OpenCL library sqrt calls. Also don't bother emitting native_sqrt anymore, it's just another wrapper around llvm.sqrt.

>From b5913a035d8b1689a8ea963e9b915efe5c7ea789 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 2 Dec 2023 11:35:01 +0900
Subject: [PATCH] AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt

The library implementation is just a wrapper around a call to the
intrinsic, but loses metadata. Swap out the call site to the intrinsic
so that the lowering can see the !fpmath metadata and fast math flags.

Since d56e0d07cc5ee8e334fd1ad403eef0b1a771384f, clang started placing
!fpmath on OpenCL library sqrt calls. Also don't bother emitting
native_sqrt anymore, it's just another wrapper around llvm.sqrt.
---
 llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp     | 32 +-------
 .../AMDGPU/amdgpu-simplify-libcall-sqrt.ll    | 76 +++++++++----------
 llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll |  5 +-
 3 files changed, 43 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 5c66fd2b180f7..245c42af43483 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -87,9 +87,6 @@ class AMDGPULibCalls {
                               Constant *copr0, Constant *copr1);
   bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
 
-  // sqrt
-  bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
-
   /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
   /// of cos, sincos call).
   std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
@@ -673,8 +670,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
 
     // Specialized optimizations for each function call.
     //
-    // TODO: Handle other simple intrinsic wrappers. Sqrt.
-    //
     // TODO: Handle native functions
     switch (FInfo.getId()) {
     case AMDGPULibFunc::EI_EXP:
@@ -795,7 +790,9 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
     case AMDGPULibFunc::EI_ROOTN:
       return fold_rootn(FPOp, B, FInfo);
     case AMDGPULibFunc::EI_SQRT:
-      return fold_sqrt(FPOp, B, FInfo);
+      // TODO: Allow with strictfp + constrained intrinsic
+      return tryReplaceLibcallWithSimpleIntrinsic(
+          B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
     case AMDGPULibFunc::EI_COS:
     case AMDGPULibFunc::EI_SIN:
       return fold_sincos(FPOp, B, FInfo);
@@ -1275,29 +1272,6 @@ bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
   return true;
 }
 
-// fold sqrt -> native_sqrt (x)
-bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
-                               const FuncInfo &FInfo) {
-  if (!isUnsafeMath(FPOp))
-    return false;
-
-  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
-      (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
-    Module *M = B.GetInsertBlock()->getModule();
-
-    if (FunctionCallee FPExpr = getNativeFunction(
-            M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
-      Value *opr0 = FPOp->getOperand(0);
-      LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
-                        << "sqrt(" << *opr0 << ")\n");
-      Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
-      replaceCall(FPOp, nval);
-      return true;
-    }
-  }
-  return false;
-}
-
 std::tuple<Value *, Value *, Value *>
 AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
                              FunctionCallee Fsincos) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
index d1a58a7a0148d..f5b6f2e170777 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
@@ -27,7 +27,7 @@ declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>)
 define float @test_sqrt_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0:![0-9]+]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0
@@ -37,7 +37,7 @@ define float @test_sqrt_f32(float %arg) {
 define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
@@ -47,7 +47,7 @@ define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0
@@ -57,7 +57,7 @@ define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0
@@ -67,7 +67,7 @@ define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0
@@ -77,7 +77,7 @@ define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0
@@ -87,7 +87,7 @@ define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 define float @test_sqrt_cr_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg)
@@ -97,7 +97,7 @@ define float @test_sqrt_cr_f32(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
@@ -107,7 +107,7 @@ define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]])
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg)
@@ -117,7 +117,7 @@ define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg)
@@ -127,7 +127,7 @@ define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]])
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg)
@@ -137,7 +137,7 @@ define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]])
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg)
@@ -147,7 +147,7 @@ define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 define double @test_sqrt_f64(double %arg) {
 ; CHECK-LABEL: define double @test_sqrt_f64
 ; CHECK-SAME: (double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @_Z4sqrtd(double [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[ARG]])
 ; CHECK-NEXT:    ret double [[SQRT]]
 ;
   %sqrt = tail call double @_Z4sqrtd(double %arg)
@@ -157,7 +157,7 @@ define double @test_sqrt_f64(double %arg) {
 define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 ; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64
 ; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[ARG]])
 ; CHECK-NEXT:    ret <2 x double> [[SQRT]]
 ;
   %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg)
@@ -167,7 +167,7 @@ define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 ; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64
 ; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @llvm.sqrt.v3f64(<3 x double> [[ARG]])
 ; CHECK-NEXT:    ret <3 x double> [[SQRT]]
 ;
   %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg)
@@ -177,7 +177,7 @@ define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64
 ; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[ARG]])
 ; CHECK-NEXT:    ret <4 x double> [[SQRT]]
 ;
   %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg)
@@ -187,7 +187,7 @@ define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 ; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64
 ; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[ARG]])
 ; CHECK-NEXT:    ret <8 x double> [[SQRT]]
 ;
   %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg)
@@ -197,7 +197,7 @@ define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 ; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64
 ; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @llvm.sqrt.v16f64(<16 x double> [[ARG]])
 ; CHECK-NEXT:    ret <16 x double> [[SQRT]]
 ;
   %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg)
@@ -207,7 +207,7 @@ define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 define half @test_sqrt_f16(half %arg) {
 ; CHECK-LABEL: define half @test_sqrt_f16
 ; CHECK-SAME: (half [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @_Z4sqrtDh(half [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @llvm.sqrt.f16(half [[ARG]])
 ; CHECK-NEXT:    ret half [[SQRT]]
 ;
   %sqrt = tail call half @_Z4sqrtDh(half %arg)
@@ -217,7 +217,7 @@ define half @test_sqrt_f16(half %arg) {
 define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 ; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16
 ; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @llvm.sqrt.v2f16(<2 x half> [[ARG]])
 ; CHECK-NEXT:    ret <2 x half> [[SQRT]]
 ;
   %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg)
@@ -227,7 +227,7 @@ define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 ; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16
 ; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @llvm.sqrt.v3f16(<3 x half> [[ARG]])
 ; CHECK-NEXT:    ret <3 x half> [[SQRT]]
 ;
   %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg)
@@ -237,7 +237,7 @@ define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 ; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16
 ; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[ARG]])
 ; CHECK-NEXT:    ret <4 x half> [[SQRT]]
 ;
   %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg)
@@ -247,7 +247,7 @@ define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 ; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16
 ; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[ARG]])
 ; CHECK-NEXT:    ret <8 x half> [[SQRT]]
 ;
   %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg)
@@ -257,7 +257,7 @@ define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 ; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16
 ; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @llvm.sqrt.v16f16(<16 x half> [[ARG]])
 ; CHECK-NEXT:    ret <16 x half> [[SQRT]]
 ;
   %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg)
@@ -267,7 +267,7 @@ define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2:[0-9]+]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3:[0-9]+]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
@@ -277,7 +277,7 @@ define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
@@ -287,7 +287,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
@@ -297,7 +297,7 @@ define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
@@ -308,7 +308,7 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
@@ -318,7 +318,7 @@ define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]], !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
@@ -328,7 +328,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
@@ -338,7 +338,7 @@ define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR2]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
@@ -348,7 +348,7 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 define float @test_sqrt_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0
@@ -358,7 +358,7 @@ define float @test_sqrt_f32_preserve_flags(float %arg) {
 define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
@@ -368,7 +368,7 @@ define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
 define float @test_sqrt_f32_preserve_flags_md(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @_Z4sqrtf(float [[ARG]]), !fpmath [[META0]], !foo [[META1:![0-9]+]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]], !foo [[META1:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1
@@ -378,7 +378,7 @@ define float @test_sqrt_f32_preserve_flags_md(float %arg) {
 define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]), !fpmath [[META0]], !foo [[META1]]
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]], !foo [[META1]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
@@ -388,7 +388,7 @@ define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
 define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call ninf contract float @_Z4sqrtf(float [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call ninf contract float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call ninf contract float @_Z4sqrtf(float %arg)
@@ -398,7 +398,7 @@ define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
 define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]])
+; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 87f69065c9fd5..731a88278e512 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -694,7 +694,7 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
-; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp)
+; GCN-NATIVE: call fast float @llvm.sqrt.f32(float %tmp)
 define amdgpu_kernel void @test_use_native_sqrt(ptr addrspace(1) nocapture %a) {
 entry:
   %tmp = load float, ptr addrspace(1) %a, align 4
@@ -704,7 +704,7 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
-; GCN: call fast double @_Z4sqrtd(double %tmp)
+; GCN: call fast double @llvm.sqrt.f64(double %tmp)
 define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(ptr addrspace(1) nocapture %a) {
 entry:
   %tmp = load double, ptr addrspace(1) %a, align 8
@@ -836,7 +836,6 @@ entry:
 }
 
 ; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
-; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
 
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }