[llvm] e3547bc - AMDGPU: Assume llvm.amdgcn.exp2 and log are canonicalizing
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 15 06:55:32 PDT 2023
Author: Matt Arsenault
Date: 2023-06-15T09:55:15-04:00
New Revision: e3547bc21b3aeb99b684d4faede10238f1baadab
URL: https://github.com/llvm/llvm-project/commit/e3547bc21b3aeb99b684d4faede10238f1baadab
DIFF: https://github.com/llvm/llvm-project/commit/e3547bc21b3aeb99b684d4faede10238f1baadab.diff
LOG: AMDGPU: Assume llvm.amdgcn.exp2 and log are canonicalizing
This was partially handled for log in SelectionDAG and both were
missed for GlobalISel.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 28a3b87630eea..ab7a543ed5fef 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10459,6 +10459,8 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
+ case AMDGPUISD::LOG:
+ case AMDGPUISD::EXP:
case AMDGPUISD::DIV_SCALE:
case AMDGPUISD::DIV_FMAS:
case AMDGPUISD::DIV_FIXUP:
@@ -10566,6 +10568,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_trig_preop:
case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
return true;
default:
break;
@@ -10657,6 +10660,8 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_cos:
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
case Intrinsic::amdgcn_log_clamp:
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
index 733be82403cfd..4473904195dcc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
@@ -328,3 +328,43 @@ body: |
%11:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef_fcan, %14
$vgpr0 = COPY %11(<2 x s16>)
...
+
+---
+name: test_fcanonicalize_log
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_fcanonicalize_log
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), [[COPY]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.log), %0
+ %2:_(s32) = G_FCANONICALIZE %1
+ $vgpr0 = COPY %2(s32)
+...
+
+---
+name: test_fcanonicalize_exp2
+tracksRegLiveness: true
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_fcanonicalize_exp2
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[COPY]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), %0
+ %2:_(s32) = G_FCANONICALIZE %1
+ $vgpr0 = COPY %2(s32)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index 3c9174e8a8bb0..a4311491d1ea1 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -882,6 +882,16 @@ define float @v_test_canonicalize_amdgcn_log(float %a) {
ret float %canonicalized
}
+; GCN-LABEL: {{^}}v_test_canonicalize_amdgcn_exp2:
+; GCN: s_waitcnt
+; GCN-NEXT: v_exp_f32
+; GCN-NEXT: s_setpc_b64
+define float @v_test_canonicalize_amdgcn_exp2(float %a) {
+ %log = call float @llvm.amdgcn.exp2.f32(float %a)
+ %canonicalized = call float @llvm.canonicalize.f32(float %log)
+ ret float %canonicalized
+}
+
; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
; GCN: .amd_amdgpu_isa
@@ -911,6 +921,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
declare float @llvm.amdgcn.cubeid(float, float, float) #0
declare float @llvm.amdgcn.frexp.mant.f32(float) #0
declare float @llvm.amdgcn.log.f32(float) #0
+declare float @llvm.amdgcn.exp2.f32(float) #0
attributes #0 = { nounwind readnone }
attributes #1 = { "no-nans-fp-math"="true" }
More information about the llvm-commits
mailing list