[llvm] 59162e3 - [AMDGPU] Skip buffer_wbl2 before atomic fence acquire

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 8 01:24:31 PST 2023


Author: Stanislav Mekhanoshin
Date: 2023-03-08T01:24:20-08:00
New Revision: 59162e38590fbe194e2f5dc11bcfc02bffeb75fc

URL: https://github.com/llvm/llvm-project/commit/59162e38590fbe194e2f5dc11bcfc02bffeb75fc
DIFF: https://github.com/llvm/llvm-project/commit/59162e38590fbe194e2f5dc11bcfc02bffeb75fc.diff

LOG: [AMDGPU] Skip buffer_wbl2 before atomic fence acquire

Memory models for gfx90a and gfx940 do not require buffer_wbl2
before the fence for acquire ordering, but we do insert the full
release.

Fixes: SWDEV-386785

Differential Revision: https://reviews.llvm.org/D145524

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
    llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 68e592b65845d..47b81d7f5bf8a 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2209,8 +2209,13 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
   bool Changed = false;
 
   if (MOI.isAtomic()) {
-    if (MOI.getOrdering() == AtomicOrdering::Acquire ||
-        MOI.getOrdering() == AtomicOrdering::Release ||
+    if (MOI.getOrdering() == AtomicOrdering::Acquire)
+      Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
+                                SIMemOp::LOAD | SIMemOp::STORE,
+                                MOI.getIsCrossAddressSpaceOrdering(),
+                                Position::BEFORE);
+
+    if (MOI.getOrdering() == AtomicOrdering::Release ||
         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
       /// TODO: This relies on a barrier always generating a waitcnt

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 98406ab613eae..01428a968cad0 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -1335,14 +1335,12 @@ define amdgpu_kernel void @agent_acquire_fence() {
 ;
 ; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
 ; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
 ;
 ; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
 ; GFX940-TGSPLIT:       ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
 ; GFX940-TGSPLIT-NEXT:    s_endpgm
@@ -1641,14 +1639,12 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
 ;
 ; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
 ; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
 ;
 ; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
 ; GFX940-TGSPLIT:       ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
 ; GFX940-TGSPLIT-NEXT:    s_endpgm
@@ -1935,7 +1931,6 @@ define amdgpu_kernel void @system_acquire_fence() {
 ;
 ; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
 ; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
 ; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
 ; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
@@ -1943,7 +1938,6 @@ define amdgpu_kernel void @system_acquire_fence() {
 ;
 ; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
 ; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
 ; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-TGSPLIT-NEXT:    buffer_invl2
 ; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
@@ -1951,14 +1945,12 @@ define amdgpu_kernel void @system_acquire_fence() {
 ;
 ; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
 ; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
 ;
 ; GFX940-TGSPLIT-LABEL: system_acquire_fence:
 ; GFX940-TGSPLIT:       ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
 ; GFX940-TGSPLIT-NEXT:    s_endpgm
@@ -2255,7 +2247,6 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
 ;
 ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
 ; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
 ; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
 ; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
@@ -2263,7 +2254,6 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
 ;
 ; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
 ; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
 ; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-TGSPLIT-NEXT:    buffer_invl2
 ; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
@@ -2271,14 +2261,12 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
 ;
 ; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
 ; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
 ; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
 ;
 ; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
 ; GFX940-TGSPLIT:       ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
 ; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
 ; GFX940-TGSPLIT-NEXT:    s_endpgm


        


More information about the llvm-commits mailing list