[llvm] 7631af3 - [AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Piotr Sobczak via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 24 04:54:41 PDT 2020
Author: Piotr Sobczak
Date: 2020-04-24T13:53:44+02:00
New Revision: 7631af3af2799cdc8963d5c1d8f4261f6442b3ea
URL: https://github.com/llvm/llvm-project/commit/7631af3af2799cdc8963d5c1d8f4261f6442b3ea
DIFF: https://github.com/llvm/llvm-project/commit/7631af3af2799cdc8963d5c1d8f4261f6442b3ea.diff
LOG: [AMDGPU] Skip generating cache invalidating instructions on AMDPAL
Summary:
Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78800
Added:
Modified:
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 242211d8423b..4e6c72ca20e2 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -254,6 +254,9 @@ class SICacheControl {
IsaVersion IV;
+ /// Whether to insert cache invalidation instructions.
+ bool InsertCacheInv;
+
SICacheControl(const GCNSubtarget &ST);
public:
@@ -650,6 +653,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());
+ InsertCacheInv = !ST.isAmdPalOS();
}
/* static */
@@ -714,6 +718,9 @@ bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -852,6 +859,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -954,6 +964,9 @@ bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
index 431ca021a779..6fe24c1dfb94 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
@@ -1,13 +1,15 @@
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
entry:
@@ -19,6 +21,7 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@@ -30,7 +33,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {
entry:
@@ -42,7 +46,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {
entry:
@@ -54,7 +59,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {
entry:
@@ -66,6 +72,7 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {
entry:
@@ -77,7 +84,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
@@ -89,7 +97,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
@@ -181,7 +190,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
entry:
@@ -193,6 +203,7 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@@ -204,7 +215,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {
entry:
@@ -216,7 +228,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {
entry:
@@ -228,7 +241,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
@@ -240,6 +254,7 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {
entry:
@@ -251,7 +266,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
@@ -263,7 +279,8 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN: buffer_wbinvl1{{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
+; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
@@ -273,7 +290,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@@ -284,7 +303,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@@ -295,7 +316,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@@ -307,6 +330,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {
@@ -317,7 +343,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0)
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {
@@ -328,7 +356,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0)
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {
@@ -339,7 +369,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0)
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
@@ -350,7 +382,9 @@ entry:
; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN10: s_waitcnt vmcnt(0)
+; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
More information about the llvm-commits
mailing list