[clang] 61c8af6 - AMDGPU: InstCombine amdgcn.sqrt.f16 to sqrt.f16

Wed Aug 23 17:30:49 PDT 2023

Author: Matt Arsenault
Date: 2023-08-23T20:30:40-04:00
New Revision: 61c8af67924b02c8f2cf871439c24650a0207f29

URL: https://github.com/llvm/llvm-project/commit/61c8af67924b02c8f2cf871439c24650a0207f29
DIFF: https://github.com/llvm/llvm-project/commit/61c8af67924b02c8f2cf871439c24650a0207f29.diff

LOG: AMDGPU: InstCombine amdgcn.sqrt.f16 to sqrt.f16

There's nothing special about f16 sqrt handling.

https://reviews.llvm.org/D158090

Added: 
    

Modified: 
    clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
    llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
    llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
    llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 4f7ac1673af37d..756f90b282a9a2 100644

--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -24,7 +24,7 @@ void test_rcp_f16(global half* out, half a)
 }
 
 // CHECK-LABEL: @test_sqrt_f16
-// CHECK: call half @llvm.amdgcn.sqrt.f16
+// CHECK: call half @llvm.sqrt.f16
 void test_sqrt_f16(global half* out, half a)
 {
   *out = __builtin_amdgcn_sqrth(a);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index a0274aecfa3274..992f9964bf2e7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -488,6 +488,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
       return IC.replaceInstUsesWith(II, QNaN);
     }
 
+    // f16 amdgcn.sqrt is identical to regular sqrt.
+    if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
+      Function *NewDecl = Intrinsic::getDeclaration(
+          II.getModule(), Intrinsic::sqrt, {II.getType()});
+      II.setCalledFunction(NewDecl);
+      return &II;
+    }
+
     break;
   }
   case Intrinsic::amdgcn_log:

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 9413258d67b0ba..804283cc20cd6a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -107,8 +107,7 @@ define double @test_constant_fold_sqrt_f64_undef() nounwind {
 
 define half @test_constant_fold_sqrt_f16_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f16_0(
-; CHECK-NEXT:    [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR15:[0-9]+]]
-; CHECK-NEXT:    ret half [[VAL]]
+; CHECK-NEXT:    ret half 0xH0000
 ;
   %val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone
   ret half %val
@@ -116,7 +115,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind {
 
 define float @test_constant_fold_sqrt_f32_0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
@@ -134,8 +133,7 @@ define double @test_constant_fold_sqrt_f64_0() nounwind {
 
 define half @test_constant_fold_sqrt_f16_neg0() nounwind {
 ; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0(
-; CHECK-NEXT:    [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR15]]
-; CHECK-NEXT:    ret half [[VAL]]
+; CHECK-NEXT:    ret half 0xH8000
 ;
   %val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone
   ret half %val
@@ -186,6 +184,42 @@ define double @test_constant_fold_sqrt_neg1() nounwind {
   ret double %val
 }
 
+define half @test_amdgcn_sqrt_f16(half %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f16(
+; CHECK-NEXT:    [[VAL:%.*]] = call half @llvm.sqrt.f16(half [[ARG:%.*]])
+; CHECK-NEXT:    ret half [[VAL]]
+;
+  %val = call half @llvm.amdgcn.sqrt.f16(half %arg)
+  ret half %val
+}
+
+define half @test_amdgcn_sqrt_f16_flags(half %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f16_flags(
+; CHECK-NEXT:    [[VAL:%.*]] = call nnan half @llvm.sqrt.f16(half [[ARG:%.*]])
+; CHECK-NEXT:    ret half [[VAL]]
+;
+  %val = call nnan half @llvm.amdgcn.sqrt.f16(half %arg)
+  ret half %val
+}
+
+define float @test_amdgcn_sqrt_f32(float %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f32(
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[ARG:%.*]])
+; CHECK-NEXT:    ret float [[VAL]]
+;
+  %val = call float @llvm.amdgcn.sqrt.f32(float %arg)
+  ret float %val
+}
+
+define double @test_amdgcn_sqrt_f64(double %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f64(
+; CHECK-NEXT:    [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double [[ARG:%.*]])
+; CHECK-NEXT:    ret double [[VAL]]
+;
+  %val = call double @llvm.amdgcn.sqrt.f64(double %arg)
+  ret double %val
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.rsq
 ; --------------------------------------------------------------------

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll b/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
index 0be2c7aa85b27b..ce0886333462be 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
@@ -99,7 +99,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_contract(half %x) {
 define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0(half %x) {
 ; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0
 ; CHECK-SAME: (half [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = call half @llvm.amdgcn.sqrt.f16(half [[X]])
+; CHECK-NEXT:    [[SQRT:%.*]] = call half @llvm.sqrt.f16(half [[X]])
 ; CHECK-NEXT:    [[RSQ:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[SQRT]])
 ; CHECK-NEXT:    ret half [[RSQ]]
 ;
@@ -112,7 +112,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0(half %x) {
 define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1(half %x) {
 ; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1
 ; CHECK-SAME: (half [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[X]])
+; CHECK-NEXT:    [[SQRT:%.*]] = call contract half @llvm.sqrt.f16(half [[X]])
 ; CHECK-NEXT:    [[RSQ:%.*]] = call half @llvm.amdgcn.rcp.f16(half [[SQRT]])
 ; CHECK-NEXT:    ret half [[RSQ]]
 ;
@@ -125,7 +125,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1(half %x) {
 define half @amdgcn_rcp_amdgcn_sqrt_f16_contract_multi_use(half %x, ptr %ptr) {
 ; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_contract_multi_use
 ; CHECK-SAME: (half [[X:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[SQRT:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[X]])
+; CHECK-NEXT:    [[SQRT:%.*]] = call contract half @llvm.sqrt.f16(half [[X]])
 ; CHECK-NEXT:    store half [[SQRT]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    [[RSQ:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[SQRT]])
 ; CHECK-NEXT:    ret half [[RSQ]]