[llvm] 2eb767c - AMDGPU: Scratch instructions are trivially disjoint from SMEM and buffer instructions (#65287)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 22:43:40 PDT 2023
Author: Nicolai Hähnle
Date: 2023-09-08T07:43:36+02:00
New Revision: 2eb767c9e1360bcb58b817d8eee36d616ec841af
URL: https://github.com/llvm/llvm-project/commit/2eb767c9e1360bcb58b817d8eee36d616ec841af
DIFF: https://github.com/llvm/llvm-project/commit/2eb767c9e1360bcb58b817d8eee36d616ec841af.diff
LOG: AMDGPU: Scratch instructions are trivially disjoint from SMEM and buffer instructions (#65287)
Scratch instructions are always in addrspace(5), which can only alias
with flat (and itself). SMEM and buffer instructions can never reference
those address spaces, so they are trivially disjoint.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5b553b1136b344..38b5e0114903cd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3425,19 +3425,30 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
if (isMUBUF(MIb) || isMTBUF(MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(MIb) && !isSMRD(MIb);
+ if (isFLAT(MIb))
+ return isFLATScratch(MIb);
+
+ return !isSMRD(MIb);
}
if (isSMRD(MIa)) {
if (isSMRD(MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb);
+ if (isFLAT(MIb))
+ return isFLATScratch(MIb);
+
+ return !isMUBUF(MIb) && !isMTBUF(MIb);
}
if (isFLAT(MIa)) {
- if (isFLAT(MIb))
+ if (isFLAT(MIb)) {
+ if ((isFLATScratch(MIa) && isFLATGlobal(MIb)) ||
+ (isFLATGlobal(MIa) && isFLATScratch(MIb)))
+ return true;
+
return checkInstOffsetsDoNotOverlap(MIa, MIb);
+ }
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll b/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
index 49e434e2dd30c3..29c82db6f8204e 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
@@ -5,15 +5,14 @@ define amdgpu_gfx void @example(<4 x i32> inreg %rsrc, ptr addrspace(5) %src, i3
; CHECK-LABEL: example:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_add_nc_u32_e32 v3, 4, v0
+; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: scratch_load_b32 v2, v0, off
-; CHECK-NEXT: v_add_nc_u32_e32 v0, 4, v0
+; CHECK-NEXT: scratch_load_b32 v3, v3, off
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: buffer_store_b32 v2, v1, s[4:7], 0 offen
-; CHECK-NEXT: scratch_load_b32 v0, v0, off
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: buffer_store_b32 v0, v1, s[4:7], 0 offen offset:4
+; CHECK-NEXT: buffer_store_b64 v[2:3], v1, s[4:7], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
-;
+
%x0 = load i32, ptr addrspace(5) %src
call void @llvm.amdgcn.raw.buffer.store.i32(i32 %x0, <4 x i32> %rsrc, i32 %dst, i32 0, i32 0)
%src1 = getelementptr i8, ptr addrspace(5) %src, i32 4
More information about the llvm-commits
mailing list