[PATCH] D85498: [AMDGPU] Fix not rescheduling without clustering

Austin Kerbow via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 6 21:10:00 PDT 2020


kerbowa created this revision.
kerbowa added reviewers: rampitec, vpykhtin, vangthao95.
Herald added subscribers: llvm-commits, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.
kerbowa requested review of this revision.
Herald added a subscriber: wdng.

Regions are sometimes skipped which should be rescheduled without memory op
clustering. RegionIdx is not incremented when iterating over regions that
are flagged to be skipped, causing the index to be incorrect.

Thanks to Vang Thao for discovering this bug!


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D85498

Files:
  llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
  llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll


Index: llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
+++ llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
@@ -3,6 +3,9 @@
 ; Interleave loads and stores to fit into 9 VGPR limit.
 ; This requires to avoid load/store clustering.
 
+; Reschedule the second scheduling region without clustering while
+; the first region is skipped.
+
 ; GCN: global_load_dwordx4
 ; GCN: global_store_dwordx4
 ; GCN: global_load_dwordx4
@@ -12,10 +15,13 @@
 ; GCN: NumVgprs: {{[0-9]$}}
 ; GCN: ScratchSize: 0{{$}}
 
-define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1) #1 {
+define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1, i1 %cnd) #1 {
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id
+  br i1 %cnd, label %bb1, label %bb2
+
+bb1:
   %tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1
   %tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4
   %tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3
@@ -27,6 +33,9 @@
   store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4
   %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5
   store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4
+  br label %bb2
+
+bb2:
   ret void
 }
 
Index: llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -567,8 +567,10 @@
       SavedMutations.swap(Mutations);
 
     for (auto Region : Regions) {
-      if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
+      if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
+        ++RegionIdx;
         continue;
+      }
 
       RegionBegin = Region.first;
       RegionEnd = Region.second;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D85498.283806.patch
Type: text/x-patch
Size: 2270 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200807/7fa48e7c/attachment.bin>


More information about the llvm-commits mailing list