[llvm] 7631af3 - [AMDGPU] Skip generating cache invalidating instructions on AMDPAL

Piotr Sobczak via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 24 04:54:41 PDT 2020


Author: Piotr Sobczak
Date: 2020-04-24T13:53:44+02:00
New Revision: 7631af3af2799cdc8963d5c1d8f4261f6442b3ea

URL: https://github.com/llvm/llvm-project/commit/7631af3af2799cdc8963d5c1d8f4261f6442b3ea
DIFF: https://github.com/llvm/llvm-project/commit/7631af3af2799cdc8963d5c1d8f4261f6442b3ea.diff

LOG: [AMDGPU] Skip generating cache invalidating instructions on AMDPAL

Summary:
Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78800

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
    llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 242211d8423b..4e6c72ca20e2 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -254,6 +254,9 @@ class SICacheControl {
 
   IsaVersion IV;
 
+  /// Whether to insert cache invalidation instructions.
+  bool InsertCacheInv;
+
   SICacheControl(const GCNSubtarget &ST);
 
 public:
@@ -650,6 +653,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
   TII = ST.getInstrInfo();
   IV = getIsaVersion(ST.getCPU());
+  InsertCacheInv = !ST.isAmdPalOS();
 }
 
 /* static */
@@ -714,6 +718,9 @@ bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
                                                SIAtomicScope Scope,
                                                SIAtomicAddrSpace AddrSpace,
                                                Position Pos) const {
+  if (!InsertCacheInv)
+    return false;
+
   bool Changed = false;
 
   MachineBasicBlock &MBB = *MI->getParent();
@@ -852,6 +859,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
                                                SIAtomicScope Scope,
                                                SIAtomicAddrSpace AddrSpace,
                                                Position Pos) const {
+  if (!InsertCacheInv)
+    return false;
+
   bool Changed = false;
 
   MachineBasicBlock &MBB = *MI->getParent();
@@ -954,6 +964,9 @@ bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
                                                 SIAtomicScope Scope,
                                                 SIAtomicAddrSpace AddrSpace,
                                                 Position Pos) const {
+  if (!InsertCacheInv)
+    return false;
+
   bool Changed = false;
 
   MachineBasicBlock &MBB = *MI->getParent();

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
index 431ca021a779..6fe24c1dfb94 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
@@ -1,13 +1,15 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s
 
 ; FUNC-LABEL: {{^}}system_acquire:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT:   buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_acquire() {
 entry:
@@ -19,6 +21,7 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_release() {
 entry:
@@ -30,7 +33,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_acq_rel() {
 entry:
@@ -42,7 +46,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_seq_cst() {
 entry:
@@ -54,7 +59,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT:   buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_acquire() {
 entry:
@@ -66,6 +72,7 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_release() {
 entry:
@@ -77,7 +84,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_acq_rel() {
 entry:
@@ -89,7 +97,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_seq_cst() {
 entry:
@@ -181,7 +190,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT:   buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_acquire() {
 entry:
@@ -193,6 +203,7 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_release() {
 entry:
@@ -204,7 +215,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_acq_rel() {
 entry:
@@ -216,7 +228,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_seq_cst() {
 entry:
@@ -228,7 +241,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT:   buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_acquire() {
 entry:
@@ -240,6 +254,7 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_release() {
 entry:
@@ -251,7 +266,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_acq_rel() {
 entry:
@@ -263,7 +279,8 @@ entry:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
-; GCN:        buffer_wbinvl1{{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
+; GCN-NOT:    buffer_wbinvl1{{$}}
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_seq_cst() {
 entry:
@@ -273,7 +290,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_acquire:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_acquire() {
@@ -284,7 +303,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_release:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_release() {
@@ -295,7 +316,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_acq_rel:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_acq_rel() {
@@ -307,6 +330,9 @@ entry:
 ; FUNC-LABEL: {{^}}workgroup_seq_cst:
 ; GCN:        %bb.0
 ; GCN-NOT:    s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_seq_cst() {
@@ -317,7 +343,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0)
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_one_as_acquire() {
@@ -328,7 +356,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_one_as_release:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0)
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_one_as_release() {
@@ -339,7 +369,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0)
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_one_as_acq_rel() {
@@ -350,7 +382,9 @@ entry:
 
 ; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
 ; GCN:        %bb.0
-; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT:   s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10:      s_waitcnt vmcnt(0)
+; GCN10:      s_waitcnt_vscnt null, 0x0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_endpgm
 define amdgpu_kernel void @workgroup_one_as_seq_cst() {


        


More information about the llvm-commits mailing list