[llvm] [AMDGPU] Fix VS_CNT overflow assertion (PR #77935)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 12 06:44:05 PST 2024


https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/77935

Always set the upper bound for VS_CNT higher than the lower bound.
Before #77439 this code was only executed on function entry where the
lower bound was 0 so it was not a problem.

Fixes #77931


>From 2516434ebdd8af27aa897b83cb6bb9ad0e51e8ec Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 12 Jan 2024 14:39:58 +0000
Subject: [PATCH] [AMDGPU] Fix VS_CNT overflow assertion

Always set the upper bound for VS_CNT higher than the lower bound.
Before #77439 this code was only executed on function entry where the
lower bound was 0 so it was not a problem.

Fixes #77931
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   |  2 +-
 .../CodeGen/AMDGPU/insert-waitcnts-crash.ll   | 60 +++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 1f480c248154e3..99cf661cc46d3e 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -293,7 +293,7 @@ class WaitcntBrackets {
   }
 
   void setStateOnFunctionEntryOrReturn() {
-    setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
+    setScoreUB(VS_CNT, getScoreLB(VS_CNT) + getWaitCountMax(VS_CNT));
     PendingEvents |= WaitEventMaskForInst[VS_CNT];
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
new file mode 100644
index 00000000000000..cfec77e68eae96
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -stop-after=si-insert-waitcnts -verify-machineinstrs < %s | FileCheck %s
+
+declare fastcc void @bar()
+
+define fastcc i32 @foo() {
+  ; CHECK-LABEL: name: foo
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 $sgpr33
+  ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 $sgpr32
+  ; CHECK-NEXT:   $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr17
+  ; CHECK-NEXT:   $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
+  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
+  ; CHECK-NEXT:   BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $scc {
+  ; CHECK-NEXT:     $sgpr16_sgpr17 = S_GETPC_B64
+  ; CHECK-NEXT:     $sgpr16 = S_ADD_U32 internal $sgpr16, target-flags(amdgpu-gotprel32-lo) @bar + 4, implicit-def $scc
+  ; CHECK-NEXT:     $sgpr17 = S_ADDC_U32 internal $sgpr17, target-flags(amdgpu-gotprel32-hi) @bar + 12, implicit-def $scc, implicit internal $scc
+  ; CHECK-NEXT:   }
+  ; CHECK-NEXT:   S_WAITCNT_VSCNT undef $sgpr_null, 0
+  ; CHECK-NEXT:   BUFFER_GL0_INV implicit $exec
+  ; CHECK-NEXT:   BUFFER_GL1_INV implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40
+  ; CHECK-NEXT:   S_WAITCNT 49279
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $vcc_lo = S_MOV_B32 $exec_lo
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   liveins: $vcc_lo, $vgpr40
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.DummyReturnBlock:
+  ; CHECK-NEXT:   liveins: $vgpr40
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr31 = V_READLANE_B32 $vgpr40, 1
+  ; CHECK-NEXT:   $sgpr30 = V_READLANE_B32 $vgpr40, 0
+  ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr40, 2
+  ; CHECK-NEXT:   $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr5
+  ; CHECK-NEXT:   $sgpr32 = frame-destroy S_ADDK_I32 $sgpr32, -512, implicit-def dead $scc
+  ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 killed $sgpr4
+  ; CHECK-NEXT:   S_WAITCNT 16240
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31, implicit undef $vgpr0
+  fence acquire
+  call fastcc void @bar()
+  br label %1
+
+1:
+  br label %1
+}



More information about the llvm-commits mailing list