[llvm] 59162e3 - [AMDGPU] Skip buffer_wbl2 before atomic fence acquire
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 8 01:24:31 PST 2023
Author: Stanislav Mekhanoshin
Date: 2023-03-08T01:24:20-08:00
New Revision: 59162e38590fbe194e2f5dc11bcfc02bffeb75fc
URL: https://github.com/llvm/llvm-project/commit/59162e38590fbe194e2f5dc11bcfc02bffeb75fc
DIFF: https://github.com/llvm/llvm-project/commit/59162e38590fbe194e2f5dc11bcfc02bffeb75fc.diff
LOG: [AMDGPU] Skip buffer_wbl2 before atomic fence acquire
Memory models for gfx90a and gfx940 do not require buffer_wbl2
before the fence for acquire ordering, but we do insert the full
release.
Fixes: SWDEV-386785
Differential Revision: https://reviews.llvm.org/D145524
Added:
Modified:
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 68e592b65845d..47b81d7f5bf8a 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2209,8 +2209,13 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::Release ||
+ if (MOI.getOrdering() == AtomicOrdering::Acquire)
+ Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
/// TODO: This relies on a barrier always generating a waitcnt
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 98406ab613eae..01428a968cad0 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -1335,14 +1335,12 @@ define amdgpu_kernel void @agent_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
@@ -1641,14 +1639,12 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
@@ -1935,7 +1931,6 @@ define amdgpu_kernel void @system_acquire_fence() {
;
; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
@@ -1943,7 +1938,6 @@ define amdgpu_kernel void @system_acquire_fence() {
;
; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: buffer_wbl2
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
@@ -1951,14 +1945,12 @@ define amdgpu_kernel void @system_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
@@ -2255,7 +2247,6 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
;
; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
@@ -2263,7 +2254,6 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
;
; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: buffer_wbl2
; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
@@ -2271,14 +2261,12 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
More information about the llvm-commits
mailing list