[PATCH] D92951: AMDGPU: If a store defines (alias) a load, it clobbers the load.
Changpeng Fang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 9 11:00:29 PST 2020
cfang created this revision.
cfang added reviewers: rampitec, arsenm, kerbowa.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
cfang requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
If a store defines (must alias) a load, it clobbers the load.
Fixes: SWDEV-258915
https://reviews.llvm.org/D92951
Files:
llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
llvm/test/CodeGen/AMDGPU/wave32.ll
Index: llvm/test/CodeGen/AMDGPU/wave32.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/wave32.ll
+++ llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -231,9 +231,9 @@
; GCN: ; %bb.{{[0-9]+}}: ; %.preheader
; GCN: BB{{.*}}:
+; GCN: global_store_dword
; GFX1032: s_or_b32 [[MASK0:s[0-9]+]], [[MASK0]], vcc_lo
; GFX1064: s_or_b64 [[MASK0:s\[[0-9:]+\]]], [[MASK0]], vcc
-; GCN: global_store_dword
; GFX1032: s_andn2_b32 [[MASK1:s[0-9]+]], [[MASK1]], exec_lo
; GFX1064: s_andn2_b64 [[MASK1:s\[[0-9:]+\]]], [[MASK1]], exec
; GFX1032: s_and_b32 [[MASK0]], [[MASK0]], exec_lo
@@ -249,10 +249,12 @@
; GFX1064: s_andn2_b64 exec, exec, [[ACC]]
; GCN: s_cbranch_execz
; GCN: BB{{.*}}:
-; GCN: s_load_dword [[LOAD:s[0-9]+]]
+
; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], exec_lo
; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], exec
-; GCN: s_cmp_lt_i32 [[LOAD]], 11
+; GCN: global_load_dword [[LOAD:v[0-9]+]]
+; GFX1032: v_cmp_gt_i32_e32 vcc_lo, 11, [[LOAD]]
+; GFX1064: v_cmp_gt_i32_e32 vcc, 11, [[LOAD]]
define amdgpu_kernel void @test_loop_with_if_else_break(i32 addrspace(1)* %arg) #0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Index: llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S --amdgpu-annotate-uniform < %s | FileCheck -check-prefix=OPT %s
+target datalayout = "A5"
+
+; "load vaddr" depends on the store, so we should not mark vaddr as amdgpu.noclobber.
+
+; OPT-LABEL: @store_clobbers_load(
+; OPT: %vaddr = bitcast [4 x i32] addrspace(5)* %alloca to <4 x i32> addrspace(5)*, !amdgpu.uniform !0
+; OPT-NEXT: %zero = load <4 x i32>, <4 x i32> addrspace(5)* %vaddr, align 16
+define amdgpu_kernel void @store_clobbers_load(i32 addrspace(1)* %out, i32 %index) {
+entry:
+ %alloca = alloca [4 x i32], addrspace(5)
+ %addr0 = bitcast [4 x i32] addrspace(5)* %alloca to i32 addrspace(5)*
+ store i32 0, i32 addrspace(5)* %addr0
+ %vaddr = bitcast [4 x i32] addrspace(5)* %alloca to <4 x i32> addrspace(5)*
+ %zero = load <4 x i32>, <4 x i32> addrspace(5)* %vaddr, align 16
+ %one = insertelement <4 x i32> %zero, i32 1, i32 1
+ %two = insertelement <4 x i32> %one, i32 2, i32 2
+ %three = insertelement <4 x i32> %two, i32 3, i32 3
+ store <4 x i32> %three, <4 x i32> addrspace(5)* %vaddr, align 16
+ %rslt = extractelement <4 x i32> %three, i32 %index
+ store i32 %rslt, i32 addrspace(1)* %out, align 4
+ ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -110,7 +110,9 @@
BasicBlock::iterator(Load) : BB->end();
auto Q = MDR->getPointerDependencyFrom(
MemoryLocation::getBeforeOrAfter(Ptr), true, StartIt, BB, Load);
- if (Q.isClobber() || Q.isUnknown())
+ if (Q.isClobber() || Q.isUnknown() ||
+ // Store defines the load and thus clobbers it.
+ (Q.isDef() && isa<StoreInst>(Q.getInst())))
return true;
}
return false;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D92951.310580.patch
Type: text/x-patch
Size: 3247 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201209/a5795435/attachment.bin>
More information about the llvm-commits
mailing list