[llvm] 590dd73 - [AMDGPU] Make generating cache invalidating instructions optional

Piotr Sobczak via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 27 00:53:08 PDT 2020


Author: Piotr Sobczak
Date: 2020-07-27T09:24:11+02:00
New Revision: 590dd73c6ebdc9fe1314dfa5bda5c2367d866574

URL: https://github.com/llvm/llvm-project/commit/590dd73c6ebdc9fe1314dfa5bda5c2367d866574
DIFF: https://github.com/llvm/llvm-project/commit/590dd73c6ebdc9fe1314dfa5bda5c2367d866574.diff

LOG: [AMDGPU] Make generating cache invalidating instructions optional

Summary:
D78800 skipped generating cache invalidating instrucions altogether
on AMDPAL. However, this is sometimes too restrictive - we want a
more flexible option to be able to toggle this behaviour on and off
while we work towards developing a correct implementation of the
alternative memory model.

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, dexonsmith, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D84448

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
    llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4e6c72ca20e2..21419aab1a43 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -47,6 +47,10 @@ using namespace llvm::AMDGPU;
 #define DEBUG_TYPE "si-memory-legalizer"
 #define PASS_NAME "SI Memory Legalizer"
 
+static cl::opt<bool> AmdgcnSkipCacheInvalidations(
+    "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
+    cl::desc("Use this to skip inserting cache invalidating instructions."));
+
 namespace {
 
 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
@@ -254,7 +258,7 @@ class SICacheControl {
 
   IsaVersion IV;
 
-  /// Whether to insert cache invalidation instructions.
+  /// Whether to insert cache invalidating instructions.
   bool InsertCacheInv;
 
   SICacheControl(const GCNSubtarget &ST);
@@ -653,7 +657,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
   TII = ST.getInstrInfo();
   IV = getIsaVersion(ST.getCPU());
-  InsertCacheInv = !ST.isAmdPalOS();
+  InsertCacheInv = !AmdgcnSkipCacheInvalidations;
 }
 
 /* static */

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
index 6fe24c1dfb94..b414c83374b8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
@@ -1,15 +1,23 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10,CACHE_INV10 %s
+
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,SKIP_CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10,SKIP_CACHE_INV %s
+
 
 ; FUNC-LABEL: {{^}}system_acquire:
 ; GCN:        %bb.0
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_acquire() {
 entry:
@@ -34,7 +42,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_acq_rel() {
 entry:
@@ -47,7 +59,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_seq_cst() {
 entry:
@@ -60,7 +76,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_acquire() {
 entry:
@@ -85,7 +105,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_acq_rel() {
 entry:
@@ -98,7 +122,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:    buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @system_one_as_seq_cst() {
 entry:
@@ -191,7 +219,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_acquire() {
 entry:
@@ -216,7 +248,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_acq_rel() {
 entry:
@@ -229,7 +265,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_seq_cst() {
 entry:
@@ -242,7 +282,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_acquire() {
 entry:
@@ -267,7 +311,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_acq_rel() {
 entry:
@@ -280,7 +328,11 @@ entry:
 ; GCN-NOT:    ATOMIC_FENCE
 ; GCN:        s_waitcnt vmcnt(0){{$}}
 ; GCN10:      s_waitcnt_vscnt null, 0x0
-; GCN-NOT:    buffer_wbinvl1{{$}}
+; CACHE_INV:  buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
 ; GCN:        s_endpgm
 define amdgpu_kernel void @agent_one_as_seq_cst() {
 entry:


        


More information about the llvm-commits mailing list