[llvm] 590dd73 - [AMDGPU] Make generating cache invalidating instructions optional
Piotr Sobczak via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 27 00:53:08 PDT 2020
Author: Piotr Sobczak
Date: 2020-07-27T09:24:11+02:00
New Revision: 590dd73c6ebdc9fe1314dfa5bda5c2367d866574
URL: https://github.com/llvm/llvm-project/commit/590dd73c6ebdc9fe1314dfa5bda5c2367d866574
DIFF: https://github.com/llvm/llvm-project/commit/590dd73c6ebdc9fe1314dfa5bda5c2367d866574.diff
LOG: [AMDGPU] Make generating cache invalidating instructions optional
Summary:
D78800 skipped generating cache invalidating instrucions altogether
on AMDPAL. However, this is sometimes too restrictive - we want a
more flexible option to be able to toggle this behaviour on and off
while we work towards developing a correct implementation of the
alternative memory model.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, dexonsmith, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D84448
Added:
Modified:
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4e6c72ca20e2..21419aab1a43 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -47,6 +47,10 @@ using namespace llvm::AMDGPU;
#define DEBUG_TYPE "si-memory-legalizer"
#define PASS_NAME "SI Memory Legalizer"
+static cl::opt<bool> AmdgcnSkipCacheInvalidations(
+ "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
+ cl::desc("Use this to skip inserting cache invalidating instructions."));
+
namespace {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
@@ -254,7 +258,7 @@ class SICacheControl {
IsaVersion IV;
- /// Whether to insert cache invalidation instructions.
+ /// Whether to insert cache invalidating instructions.
bool InsertCacheInv;
SICacheControl(const GCNSubtarget &ST);
@@ -653,7 +657,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());
- InsertCacheInv = !ST.isAmdPalOS();
+ InsertCacheInv = !AmdgcnSkipCacheInvalidations;
}
/* static */
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
index 6fe24c1dfb94..b414c83374b8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
@@ -1,15 +1,23 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10,CACHE_INV10 %s
+
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,SKIP_CACHE_INV %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10,SKIP_CACHE_INV %s
+
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
entry:
@@ -34,7 +42,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {
entry:
@@ -47,7 +59,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {
entry:
@@ -60,7 +76,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
@@ -85,7 +105,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
@@ -98,7 +122,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
@@ -191,7 +219,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
entry:
@@ -216,7 +248,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {
entry:
@@ -229,7 +265,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {
entry:
@@ -242,7 +282,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
@@ -267,7 +311,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
@@ -280,7 +328,11 @@ entry:
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN10: s_waitcnt_vscnt null, 0x0
-; GCN-NOT: buffer_wbinvl1{{$}}
+; CACHE_INV: buffer_wbinvl1{{$}}
+; CACHE_INV10: buffer_gl0_inv
+; CACHE_INV10: buffer_gl1_inv
+; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}}
+; SKIP_CACHE_INV-NOT: buffer_gl
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
More information about the llvm-commits
mailing list