[llvm] 61c8af6 - AMDGPU: InstCombine amdgcn.sqrt.f16 to sqrt.f16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 17:30:50 PDT 2023
Author: Matt Arsenault
Date: 2023-08-23T20:30:40-04:00
New Revision: 61c8af67924b02c8f2cf871439c24650a0207f29
URL: https://github.com/llvm/llvm-project/commit/61c8af67924b02c8f2cf871439c24650a0207f29
DIFF: https://github.com/llvm/llvm-project/commit/61c8af67924b02c8f2cf871439c24650a0207f29.diff
LOG: AMDGPU: InstCombine amdgcn.sqrt.f16 to sqrt.f16
There's nothing special about f16 sqrt handling.
https://reviews.llvm.org/D158090
Added:
Modified:
clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 4f7ac1673af37d..756f90b282a9a2 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -24,7 +24,7 @@ void test_rcp_f16(global half* out, half a)
}
// CHECK-LABEL: @test_sqrt_f16
-// CHECK: call half @llvm.amdgcn.sqrt.f16
+// CHECK: call half @llvm.sqrt.f16
void test_sqrt_f16(global half* out, half a)
{
*out = __builtin_amdgcn_sqrth(a);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index a0274aecfa3274..992f9964bf2e7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -488,6 +488,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, QNaN);
}
+ // f16 amdgcn.sqrt is identical to regular sqrt.
+ if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
+ Function *NewDecl = Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::sqrt, {II.getType()});
+ II.setCalledFunction(NewDecl);
+ return ⅈ
+ }
+
break;
}
case Intrinsic::amdgcn_log:
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 9413258d67b0ba..804283cc20cd6a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -107,8 +107,7 @@ define double @test_constant_fold_sqrt_f64_undef() nounwind {
define half @test_constant_fold_sqrt_f16_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f16_0(
-; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR15:[0-9]+]]
-; CHECK-NEXT: ret half [[VAL]]
+; CHECK-NEXT: ret half 0xH0000
;
%val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone
ret half %val
@@ -116,7 +115,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind {
define float @test_constant_fold_sqrt_f32_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
@@ -134,8 +133,7 @@ define double @test_constant_fold_sqrt_f64_0() nounwind {
define half @test_constant_fold_sqrt_f16_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0(
-; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR15]]
-; CHECK-NEXT: ret half [[VAL]]
+; CHECK-NEXT: ret half 0xH8000
;
%val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone
ret half %val
@@ -186,6 +184,42 @@ define double @test_constant_fold_sqrt_neg1() nounwind {
ret double %val
}
+define half @test_amdgcn_sqrt_f16(half %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f16(
+; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.sqrt.f16(half [[ARG:%.*]])
+; CHECK-NEXT: ret half [[VAL]]
+;
+ %val = call half @llvm.amdgcn.sqrt.f16(half %arg)
+ ret half %val
+}
+
+define half @test_amdgcn_sqrt_f16_flags(half %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f16_flags(
+; CHECK-NEXT: [[VAL:%.*]] = call nnan half @llvm.sqrt.f16(half [[ARG:%.*]])
+; CHECK-NEXT: ret half [[VAL]]
+;
+ %val = call nnan half @llvm.amdgcn.sqrt.f16(half %arg)
+ ret half %val
+}
+
+define float @test_amdgcn_sqrt_f32(float %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f32(
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[ARG:%.*]])
+; CHECK-NEXT: ret float [[VAL]]
+;
+ %val = call float @llvm.amdgcn.sqrt.f32(float %arg)
+ ret float %val
+}
+
+define double @test_amdgcn_sqrt_f64(double %arg) {
+; CHECK-LABEL: @test_amdgcn_sqrt_f64(
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double [[ARG:%.*]])
+; CHECK-NEXT: ret double [[VAL]]
+;
+ %val = call double @llvm.amdgcn.sqrt.f64(double %arg)
+ ret double %val
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.rsq
; --------------------------------------------------------------------
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll b/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
index 0be2c7aa85b27b..ce0886333462be 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
@@ -99,7 +99,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_contract(half %x) {
define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0(half %x) {
; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0
; CHECK-SAME: (half [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[SQRT:%.*]] = call half @llvm.amdgcn.sqrt.f16(half [[X]])
+; CHECK-NEXT: [[SQRT:%.*]] = call half @llvm.sqrt.f16(half [[X]])
; CHECK-NEXT: [[RSQ:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[SQRT]])
; CHECK-NEXT: ret half [[RSQ]]
;
@@ -112,7 +112,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0(half %x) {
define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1(half %x) {
; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1
; CHECK-SAME: (half [[X:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[X]])
+; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.sqrt.f16(half [[X]])
; CHECK-NEXT: [[RSQ:%.*]] = call half @llvm.amdgcn.rcp.f16(half [[SQRT]])
; CHECK-NEXT: ret half [[RSQ]]
;
@@ -125,7 +125,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1(half %x) {
define half @amdgcn_rcp_amdgcn_sqrt_f16_contract_multi_use(half %x, ptr %ptr) {
; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_contract_multi_use
; CHECK-SAME: (half [[X:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[X]])
+; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.sqrt.f16(half [[X]])
; CHECK-NEXT: store half [[SQRT]], ptr [[PTR]], align 2
; CHECK-NEXT: [[RSQ:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[SQRT]])
; CHECK-NEXT: ret half [[RSQ]]
More information about the llvm-commits
mailing list