[llvm] [WIP][AMDGPU] frame index elimination hit assertion for scavenged nonreg (PR #130287)

Pankaj Dwivedi via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 08:16:09 PST 2025


https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/130287

>From 621043f059f62f24b5a12d6fd3108722c5f29558 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Fri, 7 Mar 2025 19:23:13 +0530
Subject: [PATCH 1/3] [WIP][AMDGPU] frame index elimination hit assertion for
 scavenged nonreg

---
 .../AMDGPU/fix-s-add-i32-fi-elimination.ll    | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll

diff --git a/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll b/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
new file mode 100644
index 0000000000000..15f2d16caeb5a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
@@ -0,0 +1,63 @@
+; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs 2>&1 %s | FileCheck %s --check-prefix=ASSERTION
+
+; This test case hit the assertion below, when register scavenger is unable to find a valid register.
+; ASSERTION: Assertion `getReg().isPhysical() && "setIsRenamable should only be called on physical registers
+
+define amdgpu_gfx [13 x i32] @_sect_5() {
+bb:
+  %i = alloca [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], i32 0, align 16, addrspace(5)
+  %i1 = getelementptr [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], ptr addrspace(5) %i, i32 0, i32 0, i32 20
+  %i2 = getelementptr [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], ptr addrspace(5) %i, i32 0, i32 6, i32 20
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb
+  %i4 = phi i32 [ 1, %bb ], [ 0, %bb3 ]
+  %i5 = icmp eq i32 %i4, 0
+  %i6 = select i1 %i5, ptr addrspace(5) %i2, ptr addrspace(5) %i1
+  store i32 0, ptr addrspace(5) %i6, align 16
+  %i7 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 1
+  %i8 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i7
+  store float 0.000000e+00, ptr addrspace(5) %i8, align 4
+  %i9 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 2
+  %i10 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i9
+  store i32 0, ptr addrspace(5) %i10, align 8
+  %i11 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 3
+  %i12 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i11
+  store i32 0, ptr addrspace(5) %i12, align 4
+  %i13 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 4
+  %i14 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i13
+  store i32 0, ptr addrspace(5) %i14, align 16
+  %i15 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 5
+  %i16 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i15
+  store i32 0, ptr addrspace(5) %i16, align 4
+  %i17 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 6
+  %i18 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i17
+  store <2 x float> zeroinitializer, ptr addrspace(5) %i18, align 8
+  %i19 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 7
+  %i20 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i19
+  store i32 0, ptr addrspace(5) %i20, align 16
+  %i21 = getelementptr { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, ptr addrspace(5) %i1, i32 0, i32 8
+  %i22 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i21
+  store <3 x float> zeroinitializer, ptr addrspace(5) %i22, align 16
+  %i23 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i1
+  store <3 x float> zeroinitializer, ptr addrspace(5) %i23, align 16
+  %i24 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 1
+  %i25 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i24
+  store i32 0, ptr addrspace(5) %i25, align 4
+  %i26 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 2
+  %i27 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i26
+  store i32 0, ptr addrspace(5) %i27, align 8
+  %i28 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 3
+  %i29 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i28
+  store i32 0, ptr addrspace(5) %i29, align 4
+  %i30 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 4
+  %i31 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i30
+  store i32 0, ptr addrspace(5) %i31, align 16
+  %i32 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 5
+  %i33 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i32
+  store i32 0, ptr addrspace(5) %i33, align 4
+  %i34 = getelementptr { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, ptr addrspace(5) %i, i32 0, i32 6
+  %i35 = select i1 %i5, ptr addrspace(5) null, ptr addrspace(5) %i34
+  store i32 0, ptr addrspace(5) %i35, align 8
+  br label %bb3
+}

>From 189962bbe6d2d002a6e4254827af5466adce111a Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Fri, 7 Mar 2025 20:36:47 +0530
Subject: [PATCH 2/3] fall back to default handling if scavenger couldn't find
 valid reg

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  2 +
 .../AMDGPU/fix-s-add-i32-fi-elimination.ll    | 82 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 924aa45559366..179ef1330408f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2750,6 +2750,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
                                                  MI, /*RestoreAfter=*/false, 0,
                                                  /*AllowSpill=*/false);
+          if (!TmpReg.isValid())
+            break;
           DstReg = TmpReg;
         }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll b/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
index 15f2d16caeb5a..64368a7234148 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-s-add-i32-fi-elimination.ll
@@ -1,9 +1,83 @@
-; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs 2>&1 %s | FileCheck %s --check-prefix=ASSERTION
-
-; This test case hit the assertion below, when register scavenger is unable to find a valid register.
-; ASSERTION: Assertion `getReg().isPhysical() && "setIsRenamable should only be called on physical registers
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 
 define amdgpu_gfx [13 x i32] @_sect_5() {
+; CHECK-LABEL: _sect_5:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_movk_i32 s1, 0xf4
+; CHECK-NEXT:    s_movk_i32 s2, 0xf8
+; CHECK-NEXT:    s_movk_i32 s3, 0xfc
+; CHECK-NEXT:    s_movk_i32 s34, 0x100
+; CHECK-NEXT:    v_mov_b32_e32 v1, v0
+; CHECK-NEXT:    s_movk_i32 s35, 0x104
+; CHECK-NEXT:    s_movk_i32 s36, 0x108
+; CHECK-NEXT:    s_movk_i32 s37, 0x110
+; CHECK-NEXT:    s_movk_i32 s38, 0x120
+; CHECK-NEXT:    s_add_i32 s0, s32, 0xf0
+; CHECK-NEXT:    s_add_i32 s1, s32, s1
+; CHECK-NEXT:    s_add_i32 s2, s32, s2
+; CHECK-NEXT:    s_add_i32 s3, s32, s3
+; CHECK-NEXT:    s_add_i32 s34, s32, s34
+; CHECK-NEXT:    s_add_i32 s35, s32, s35
+; CHECK-NEXT:    s_add_i32 s36, s32, s36
+; CHECK-NEXT:    s_add_i32 s37, s32, s37
+; CHECK-NEXT:    s_add_i32 s38, s32, s38
+; CHECK-NEXT:    s_or_b32 s39, s32, 4
+; CHECK-NEXT:    s_or_b32 s40, s32, 8
+; CHECK-NEXT:    s_or_b32 s41, s32, 12
+; CHECK-NEXT:    s_add_i32 s42, s32, 16
+; CHECK-NEXT:    s_add_i32 s43, s32, 20
+; CHECK-NEXT:    s_add_i32 s44, s32, 24
+; CHECK-NEXT:    s_mov_b32 s46, 1
+; CHECK-NEXT:    s_movk_i32 s45, 0x990
+; CHECK-NEXT:    s_mov_b32 s48, 0
+; CHECK-NEXT:  .LBB0_1: ; %bb3
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_cmp_eq_u32 s46, 0
+; CHECK-NEXT:    s_mov_b32 s49, s48
+; CHECK-NEXT:    s_mov_b32 s50, s48
+; CHECK-NEXT:    s_cselect_b32 s47, s45, 0xf0
+; CHECK-NEXT:    s_cselect_b32 s51, 0, s1
+; CHECK-NEXT:    s_cselect_b32 s55, 0, s35
+; CHECK-NEXT:    v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49
+; CHECK-NEXT:    s_cselect_b32 s52, 0, s2
+; CHECK-NEXT:    s_cselect_b32 s56, 0, s36
+; CHECK-NEXT:    s_cselect_b32 vcc_lo, 0, s43
+; CHECK-NEXT:    v_mov_b32_e32 v4, s50
+; CHECK-NEXT:    s_cselect_b32 s53, 0, s3
+; CHECK-NEXT:    s_cselect_b32 s54, 0, s34
+; CHECK-NEXT:    s_cselect_b32 s57, 0, s37
+; CHECK-NEXT:    s_cselect_b32 s58, 0, s38
+; CHECK-NEXT:    s_cselect_b32 s59, 0, s0
+; CHECK-NEXT:    s_cselect_b32 s60, 0, s39
+; CHECK-NEXT:    s_cselect_b32 s61, 0, s40
+; CHECK-NEXT:    s_cselect_b32 s62, 0, s41
+; CHECK-NEXT:    s_cselect_b32 s63, 0, s42
+; CHECK-NEXT:    s_cselect_b32 vcc_hi, 0, s44
+; CHECK-NEXT:    s_mov_b32 s46, s48
+; CHECK-NEXT:    s_add_i32 s47, s47, s32
+; CHECK-NEXT:    scratch_store_b32 off, v0, s51
+; CHECK-NEXT:    scratch_store_b32 off, v0, s52
+; CHECK-NEXT:    scratch_store_b32 off, v0, s53
+; CHECK-NEXT:    scratch_store_b32 off, v0, s54
+; CHECK-NEXT:    scratch_store_b32 off, v0, s55
+; CHECK-NEXT:    scratch_store_b64 off, v[0:1], s56
+; CHECK-NEXT:    scratch_store_b32 off, v0, s57
+; CHECK-NEXT:    scratch_store_b32 off, v0, s47
+; CHECK-NEXT:    scratch_store_b96 off, v[2:4], s58
+; CHECK-NEXT:    scratch_store_b96 off, v[2:4], s59
+; CHECK-NEXT:    scratch_store_b32 off, v0, s60
+; CHECK-NEXT:    scratch_store_b32 off, v0, s61
+; CHECK-NEXT:    scratch_store_b32 off, v0, s62
+; CHECK-NEXT:    scratch_store_b32 off, v0, s63
+; CHECK-NEXT:    scratch_store_b32 off, v0, vcc_lo
+; CHECK-NEXT:    scratch_store_b32 off, v0, vcc_hi
+; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], i32 0, align 16, addrspace(5)
   %i1 = getelementptr [8 x { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, { <3 x float>, float, <3 x float>, float }, float, i32, i32, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, { i32, float, i32, i32, i32, i32, <2 x float>, i32, <3 x float>, <3 x float> }, i32, i32, i32, i32, i32, i32, i32, i32, i32 }], ptr addrspace(5) %i, i32 0, i32 0, i32 20

>From 7f3e9f8770e34d4f32f26fca1a0d396fbbb433b3 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Fri, 7 Mar 2025 21:45:10 +0530
Subject: [PATCH 3/3] address review

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 179ef1330408f..2181b5af7946e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2750,7 +2750,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
                                                  MI, /*RestoreAfter=*/false, 0,
                                                  /*AllowSpill=*/false);
-          if (!TmpReg.isValid())
+          if (!TmpReg)
             break;
           DstReg = TmpReg;
         }



More information about the llvm-commits mailing list