[llvm] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index (PR #178991)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 16 09:01:43 PST 2026
https://github.com/hjagasiaAMD updated https://github.com/llvm/llvm-project/pull/178991
>From ff1a452313414865ea5db7d0d59b790c06b184dc Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 30 Jan 2026 17:10:15 -0600
Subject: [PATCH 1/7] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 2 --
.../AMDGPU/eliminate-frame-index-select.mir | 35 +++++++++++++++++++
2 files changed, 35 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 96c2f6530fe4c..3df9a78a391bc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3257,8 +3257,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsSALU)
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
.addReg(TmpResultReg, RegState::Kill);
- else
- ResultReg = TmpResultReg;
// If there were truly no free SGPRs, we need to undo everything.
if (!TmpScaledReg.isValid()) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
new file mode 100644
index 0000000000000..8ce4428a3f20b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -0,0 +1,35 @@
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
+# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
+--- |
+ %struct.wobble = type { %struct.quux }
+ %struct.quux = type { float, float, float }
+ define void @wobble() {
+ %alloca = alloca %struct.wobble, align 4, addrspace(5)
+ ret void
+ }
+...
+---
+name: wobble
+tracksRegLiveness: true
+stack:
+ - { id: 0, name: alloca, offset: 0, size: 12}
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr4 = S_MOV_B32 0
+ renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+ bb.1:
+ liveins: $sgpr4, $sgpr54:0x000000000000000F
+ S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+ renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+ S_BRANCH %bb.2
+ bb.2:
+ SI_RETURN
+...
>From adffe15966b7d5baccdc5403b1df3102e1c91ce6 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 2 Feb 2026 16:30:51 -0600
Subject: [PATCH 2/7] [AMDGPU] Fix lit tests
---
.../AMDGPU/eliminate-frame-index-select.ll | 35 +++++++++++++++++++
.../AMDGPU/eliminate-frame-index-select.mir | 28 +++++++--------
2 files changed, 47 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
new file mode 100644
index 0000000000000..f246ba57410e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: .LBB0_1:
+; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
+; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
+; CHECK: v_readfirstlane_b32 [[S:s[0-9]+]], [[V]]
+; CHECK: s_cselect_b32 {{s[0-9]+}}, 0, [[S]]
+
+%struct.wobble = type { %struct.quux }
+%struct.quux = type { float, float, float }
+
+declare %struct.wobble @foo(%struct.quux)
+
+define void @wobble() #0 {
+bb:
+ %alloca = alloca %struct.wobble, align 4, addrspace(5)
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phi = phi i32 [ 0, %bb ], [ 1, %bb1 ]
+ store i32 0, ptr addrspacecast (ptr addrspace(5) null to ptr), align 4
+ %getelementptr = getelementptr i8, ptr addrspace(5) %alloca, i32 4
+ %icmp = icmp eq i32 %phi, 0
+ %load = load float, ptr addrspace(5) null, align 2147483648
+ %load2 = load float, ptr addrspace(5) %alloca, align 4
+ %select = select i1 %icmp, float %load, float %load2
+ %insertvalue = insertvalue %struct.quux zeroinitializer, float %select, 0
+ %load3 = load float, ptr addrspace(5) inttoptr (i32 4 to ptr addrspace(5)), align 4
+ %load4 = load float, ptr addrspace(5) %getelementptr, align 4
+ %select5 = select i1 %icmp, float %load3, float %load4
+ %insertvalue6 = insertvalue %struct.quux %insertvalue, float %select5, 1
+ %call = call %struct.wobble @foo(%struct.quux %insertvalue6)
+ br label %bb1
+}
+
+attributes #0 = { "target-cpu"="gfx1030" }
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 8ce4428a3f20b..cbd6149b97d98 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -1,34 +1,30 @@
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
-# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
---- |
- %struct.wobble = type { %struct.quux }
- %struct.quux = type { float, float, float }
- define void @wobble() {
- %alloca = alloca %struct.wobble, align 4, addrspace(5)
- ret void
- }
-...
+# CHECK: bb.1:
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R1:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V1]], implicit $exec
+# CHECK: S_CSELECT_B32 0, killed [[R1]], implicit $scc
+# CHECK: [[V2:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
+# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
---
-name: wobble
+name: test
tracksRegLiveness: true
stack:
- - { id: 0, name: alloca, offset: 0, size: 12}
+ - { id: 0, size: 12}
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
- renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr4 = S_MOV_B32 0
- renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+ renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
bb.1:
liveins: $sgpr4, $sgpr54:0x000000000000000F
S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
- renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+ renamable $sgpr4 = S_CSELECT_B32 0, %stack.0, implicit $scc
+ renamable $sgpr6 = S_CSELECT_B32 %stack.0, 0, implicit $scc
renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
- S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
S_BRANCH %bb.2
bb.2:
SI_RETURN
>From 20bd5d9d4e96806c37605f130af7286ee878a992 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:18:46 -0600
Subject: [PATCH 3/7] Update
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index f246ba57410e2..d1b89a40d738b 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s
; CHECK-LABEL: .LBB0_1:
; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
>From 6e65b09e97991848b5320769aefaa11ca4c5b735 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:19:03 -0600
Subject: [PATCH 4/7] Update
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index d1b89a40d738b..0def34ac3fb6d 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -32,4 +32,4 @@ bb1: ; preds = %bb1, %bb
br label %bb1
}
-attributes #0 = { "target-cpu"="gfx1030" }
+attributes #0 = { nounwind }
>From ef5c1d8c6fb7f6e39d663da2f164b638ab8abc4f Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 6 Feb 2026 12:06:43 -0600
Subject: [PATCH 5/7] [AMDGPU] Fix lit tests
---
.../AMDGPU/eliminate-frame-index-select.mir | 33 ++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index cbd6149b97d98..3d5a842964b42 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -7,7 +7,7 @@
# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
---
-name: test
+name: test_s_cselect_b32
tracksRegLiveness: true
stack:
- { id: 0, size: 12}
@@ -17,6 +17,7 @@ machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
+ renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr4 = S_MOV_B32 0
renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
bb.1:
@@ -29,3 +30,33 @@ body: |
bb.2:
SI_RETURN
...
+# CHECK: bb.1:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
+# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+---
+name: test_v_cndmask_e32
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 12}
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr4 = S_MOV_B32 0
+ renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
+ bb.1:
+ liveins: $sgpr4, $sgpr54:0x000000000000000F
+ V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
+ renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
+ renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
+ renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
+ S_BRANCH %bb.2
+ bb.2:
+ SI_RETURN
+...
>From 8267ffe9ca48ecf0c5f51d4be7027eac5718ffd0 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 9 Feb 2026 18:12:39 -0600
Subject: [PATCH 6/7] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
for VOP3 FI operands & add test for V_CNDMASK_B32_e64
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 +++++-
.../AMDGPU/eliminate-frame-index-select.mir | 33 ++++++++++++-------
2 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3df9a78a391bc..0c67cac71e6ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3077,7 +3077,15 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsMUBUF && !MFI->isBottomOfStack()) {
// Convert to a swizzled stack address by scaling by the wave size.
// In an entry function/kernel the offset is already swizzled.
- bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum));
+ const TargetRegisterClass *FiRC =
+ TII->getRegClass(MI->getDesc(), FIOperandNum);
+ bool IsSALU = false;
+ // If FiRC is null, fall back to non-SALU handling to avoid crashing.
+ // Some instructions may not have regclass information for FI operands
+ // yet.
+ if (FiRC)
+ IsSALU = isSGPRClass(FiRC);
+
bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
!MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
const TargetRegisterClass *RC = IsSALU && !LiveSCC
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 3d5a842964b42..e258cbac08a43 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -30,10 +30,9 @@ body: |
bb.2:
SI_RETURN
...
-# CHECK: bb.1:
+# CHECK: bb.0:
# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
---
name: test_v_cndmask_e32
@@ -46,17 +45,29 @@ machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
- renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
- renamable $sgpr4 = S_MOV_B32 0
- renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
- bb.1:
- liveins: $sgpr4, $sgpr54:0x000000000000000F
+ liveins: $sgpr4
V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
- renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
- renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
- S_BRANCH %bb.2
- bb.2:
+ SI_RETURN
+...
+# CHECK: bb.0:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: V_CNDMASK_B32_e64 killed [[V1]], killed [[V0]], 0, 0, $sgpr4, implicit $exec
+---
+name: test_v_cndmask_e64
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 12}
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4
+ renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0 = V_CNDMASK_B32_e64 %stack.0, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
SI_RETURN
...
>From 942644202a34bc564e673ce0146bb27d27494aef Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 16 Feb 2026 10:32:55 -0600
Subject: [PATCH 7/7] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
for VOP3 FI operands
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0c67cac71e6ad..6174dab19c952 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3079,13 +3079,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// In an entry function/kernel the offset is already swizzled.
const TargetRegisterClass *FiRC =
TII->getRegClass(MI->getDesc(), FIOperandNum);
- bool IsSALU = false;
// If FiRC is null, fall back to non-SALU handling to avoid crashing.
// Some instructions may not have regclass information for FI operands
// yet.
- if (FiRC)
- IsSALU = isSGPRClass(FiRC);
-
+ bool IsSALU = FiRC && isSGPRClass(FiRC);
bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
!MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
const TargetRegisterClass *RC = IsSALU && !LiveSCC
More information about the llvm-commits
mailing list