[llvm] 36ef291 - [AMDGPU] Fix hang caused by VS_CNT handling at calls (#78318)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 17 02:24:33 PST 2024


Author: Jay Foad
Date: 2024-01-17T10:24:29Z
New Revision: 36ef291d632e3c0ac109e8e1b19ae09d39ac5496

URL: https://github.com/llvm/llvm-project/commit/36ef291d632e3c0ac109e8e1b19ae09d39ac5496
DIFF: https://github.com/llvm/llvm-project/commit/36ef291d632e3c0ac109e8e1b19ae09d39ac5496.diff

LOG: [AMDGPU] Fix hang caused by VS_CNT handling at calls (#78318)

Fix a potential hang introduced by #77439 and #77935. This line:

  setScoreUB(VS_CNT, getScoreLB(VS_CNT) + getWaitCountMax(VS_CNT));

could potentialy set UB lower than it was before, which confused
SIInsertWaitcnts's fixed point algorithm.

This was only triggered a STORE instruction with an implicit-def, which
seems odd but apparently happens for some spills.

Added: 
    llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index acf9489b2d37727..2ae028477ac7e3f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -293,7 +293,7 @@ class WaitcntBrackets {
   }
 
   void setStateOnFunctionEntryOrReturn() {
-    setScoreUB(VS_CNT, getScoreLB(VS_CNT) + getWaitCountMax(VS_CNT));
+    setScoreUB(VS_CNT, getScoreUB(VS_CNT) + getWaitCountMax(VS_CNT));
     PendingEvents |= WaitEventMaskForInst[VS_CNT];
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir
new file mode 100644
index 000000000000000..993933b2b5c723b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s
+
+---
+name: test
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 4, size: 40, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  frameOffsetReg: '$sgpr33'
+body: |
+  ; CHECK-LABEL: name: test
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $vgpr0, $vgpr1, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   SCRATCH_STORE_DWORDX4_SADDR killed undef $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 :: (store (s128) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $vgpr31, $vgpr40, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $vgpr0_vgpr1:0x000000000000000F
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed undef renamable $sgpr0_sgpr1, 0, csr_amdgpu
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $sgpr46, $vgpr40
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
+  bb.0:
+    successors: %bb.1(0x80000000)
+    liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $vgpr0, $vgpr1, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+
+    SCRATCH_STORE_DWORDX4_SADDR killed undef $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 :: (store (s128) into %stack.0, align 4, addrspace 5)
+
+  bb.1:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $vgpr31, $vgpr40, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $vgpr0_vgpr1:0x000000000000000F
+
+    dead $sgpr30_sgpr31 = SI_CALL killed undef renamable $sgpr0_sgpr1, 0, csr_amdgpu
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.2:
+    liveins: $sgpr46, $vgpr40
+
+    S_SETPC_B64_return undef $sgpr30_sgpr31
+...


        


More information about the llvm-commits mailing list