[llvm] 04bd5b5 - [AMDGPU] Fix not rescheduling without clustering
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 7 11:17:40 PDT 2020
Author: Vang Thao
Date: 2020-08-07T11:15:58-07:00
New Revision: 04bd5b52862098945a8bb9449d6accc120304cb5
URL: https://github.com/llvm/llvm-project/commit/04bd5b52862098945a8bb9449d6accc120304cb5
DIFF: https://github.com/llvm/llvm-project/commit/04bd5b52862098945a8bb9449d6accc120304cb5.diff
LOG: [AMDGPU] Fix not rescheduling without clustering
Regions are sometimes skipped which should be rescheduled without memory op
clustering. RegionIdx is not incremented when iterating over regions that
are flagged to be skipped, causing the index to be incorrect.
Thanks to Vang Thao for discovering this bug!
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D85498
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index deed50b6db7d..c2feb0ce25f0 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -567,8 +567,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
SavedMutations.swap(Mutations);
for (auto Region : Regions) {
- if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
+ if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
+ ++RegionIdx;
continue;
+ }
RegionBegin = Region.first;
RegionEnd = Region.second;
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
index 884d0cbd4dbe..139669bbe6d0 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
@@ -3,6 +3,9 @@
; Interleave loads and stores to fit into 9 VGPR limit.
; This requires to avoid load/store clustering.
+; Reschedule the second scheduling region without clustering while
+; the first region is skipped.
+
; GCN: global_load_dwordx4
; GCN: global_store_dwordx4
; GCN: global_load_dwordx4
@@ -12,10 +15,13 @@
; GCN: NumVgprs: {{[0-9]$}}
; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1) #1 {
+define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1, i1 %cnd) #1 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id
+ br i1 %cnd, label %bb1, label %bb2
+
+bb1:
%tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1
%tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4
%tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3
@@ -27,6 +33,9 @@ bb:
store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4
%tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5
store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4
+ br label %bb2
+
+bb2:
ret void
}
More information about the llvm-commits
mailing list