[llvm] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index (PR #178991)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 16 09:01:43 PST 2026


https://github.com/hjagasiaAMD updated https://github.com/llvm/llvm-project/pull/178991

>From ff1a452313414865ea5db7d0d59b790c06b184dc Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 30 Jan 2026 17:10:15 -0600
Subject: [PATCH 1/7] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  2 --
 .../AMDGPU/eliminate-frame-index-select.mir   | 35 +++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 96c2f6530fe4c..3df9a78a391bc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3257,8 +3257,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           if (!IsSALU)
             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
                 .addReg(TmpResultReg, RegState::Kill);
-          else
-            ResultReg = TmpResultReg;
           // If there were truly no free SGPRs, we need to undo everything.
           if (!TmpScaledReg.isValid()) {
             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
new file mode 100644
index 0000000000000..8ce4428a3f20b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -0,0 +1,35 @@
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
+# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
+--- |
+  %struct.wobble = type { %struct.quux }
+  %struct.quux = type { float, float, float }
+  define void @wobble() {
+    %alloca = alloca %struct.wobble, align 4, addrspace(5)
+    ret void
+  }
+...
+---
+name:            wobble
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: alloca, offset: 0, size: 12}
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+    renamable $sgpr4 = S_MOV_B32 0
+    renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+  bb.1:
+    liveins: $sgpr4, $sgpr54:0x000000000000000F
+    S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
+    renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+    renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+  bb.2:
+    SI_RETURN
+...

>From adffe15966b7d5baccdc5403b1df3102e1c91ce6 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 2 Feb 2026 16:30:51 -0600
Subject: [PATCH 2/7] [AMDGPU] Fix lit tests

---
 .../AMDGPU/eliminate-frame-index-select.ll    | 35 +++++++++++++++++++
 .../AMDGPU/eliminate-frame-index-select.mir   | 28 +++++++--------
 2 files changed, 47 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
new file mode 100644
index 0000000000000..f246ba57410e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: .LBB0_1:
+; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
+; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
+; CHECK: v_readfirstlane_b32 [[S:s[0-9]+]], [[V]]
+; CHECK: s_cselect_b32 {{s[0-9]+}}, 0, [[S]]
+
+%struct.wobble = type { %struct.quux }
+%struct.quux = type { float, float, float }
+
+declare %struct.wobble @foo(%struct.quux)
+
+define void @wobble() #0 {
+bb:
+  %alloca = alloca %struct.wobble, align 4, addrspace(5)
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %phi = phi i32 [ 0, %bb ], [ 1, %bb1 ]
+  store i32 0, ptr addrspacecast (ptr addrspace(5) null to ptr), align 4
+  %getelementptr = getelementptr i8, ptr addrspace(5) %alloca, i32 4
+  %icmp = icmp eq i32 %phi, 0
+  %load = load float, ptr addrspace(5) null, align 2147483648
+  %load2 = load float, ptr addrspace(5) %alloca, align 4
+  %select = select i1 %icmp, float %load, float %load2
+  %insertvalue = insertvalue %struct.quux zeroinitializer, float %select, 0
+  %load3 = load float, ptr addrspace(5) inttoptr (i32 4 to ptr addrspace(5)), align 4
+  %load4 = load float, ptr addrspace(5) %getelementptr, align 4
+  %select5 = select i1 %icmp, float %load3, float %load4
+  %insertvalue6 = insertvalue %struct.quux %insertvalue, float %select5, 1
+  %call = call %struct.wobble @foo(%struct.quux %insertvalue6)
+  br label %bb1
+}
+
+attributes #0 = { "target-cpu"="gfx1030" }
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 8ce4428a3f20b..cbd6149b97d98 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -1,34 +1,30 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
-# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
---- |
-  %struct.wobble = type { %struct.quux }
-  %struct.quux = type { float, float, float }
-  define void @wobble() {
-    %alloca = alloca %struct.wobble, align 4, addrspace(5)
-    ret void
-  }
-...
+# CHECK: bb.1:
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R1:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V1]], implicit $exec
+# CHECK: S_CSELECT_B32 0, killed [[R1]], implicit $scc
+# CHECK: [[V2:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
+# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
 ---
-name:            wobble
+name:            test
 tracksRegLiveness: true
 stack:
-  - { id: 0, name: alloca, offset: 0, size: 12}
+  - { id: 0, size: 12}
 machineFunctionInfo:
   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
   frameOffsetReg:  '$sgpr33'
   stackPtrOffsetReg: '$sgpr32'
 body:             |
   bb.0:
-    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
     renamable $sgpr4 = S_MOV_B32 0
-    renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+    renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
   bb.1:
     liveins: $sgpr4, $sgpr54:0x000000000000000F
     S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
-    renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+    renamable $sgpr4 = S_CSELECT_B32 0, %stack.0, implicit $scc
+    renamable $sgpr6 = S_CSELECT_B32 %stack.0, 0, implicit $scc
     renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
-    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
     S_BRANCH %bb.2
   bb.2:
     SI_RETURN

>From 20bd5d9d4e96806c37605f130af7286ee878a992 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:18:46 -0600
Subject: [PATCH 3/7] Update
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index f246ba57410e2..d1b89a40d738b 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030  < %s | FileCheck %s
 ; CHECK-LABEL: .LBB0_1:
 ; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
 ; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]

>From 6e65b09e97991848b5320769aefaa11ca4c5b735 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:19:03 -0600
Subject: [PATCH 4/7] Update
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index d1b89a40d738b..0def34ac3fb6d 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -32,4 +32,4 @@ bb1:                                              ; preds = %bb1, %bb
   br label %bb1
 }
 
-attributes #0 = { "target-cpu"="gfx1030" }
+attributes #0 = { nounwind }

>From ef5c1d8c6fb7f6e39d663da2f164b638ab8abc4f Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 6 Feb 2026 12:06:43 -0600
Subject: [PATCH 5/7] [AMDGPU] Fix lit tests

---
 .../AMDGPU/eliminate-frame-index-select.mir   | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index cbd6149b97d98..3d5a842964b42 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -7,7 +7,7 @@
 # CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
 # CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
 ---
-name:            test
+name:            test_s_cselect_b32
 tracksRegLiveness: true
 stack:
   - { id: 0, size: 12}
@@ -17,6 +17,7 @@ machineFunctionInfo:
   stackPtrOffsetReg: '$sgpr32'
 body:             |
   bb.0:
+    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
     renamable $sgpr4 = S_MOV_B32 0
     renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
   bb.1:
@@ -29,3 +30,33 @@ body:             |
   bb.2:
     SI_RETURN
 ...
+# CHECK: bb.1:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
+# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+---
+name:            test_v_cndmask_e32
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 12}
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+    renamable $sgpr4 = S_MOV_B32 0
+    renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
+  bb.1:
+    liveins: $sgpr4, $sgpr54:0x000000000000000F
+    V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
+    renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
+    renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
+    renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
+    S_BRANCH %bb.2
+  bb.2:
+    SI_RETURN
+...

>From 8267ffe9ca48ecf0c5f51d4be7027eac5718ffd0 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 9 Feb 2026 18:12:39 -0600
Subject: [PATCH 6/7] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
 for VOP3 FI operands & add test for V_CNDMASK_B32_e64

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     | 10 +++++-
 .../AMDGPU/eliminate-frame-index-select.mir   | 33 ++++++++++++-------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3df9a78a391bc..0c67cac71e6ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3077,7 +3077,15 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     if (!IsMUBUF && !MFI->isBottomOfStack()) {
       // Convert to a swizzled stack address by scaling by the wave size.
       // In an entry function/kernel the offset is already swizzled.
-      bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum));
+      const TargetRegisterClass *FiRC =
+          TII->getRegClass(MI->getDesc(), FIOperandNum);
+      bool IsSALU = false;
+      // If FiRC is null, fall back to non-SALU handling to avoid crashing.
+      // Some instructions may not have regclass information for FI operands
+      // yet.
+      if (FiRC)
+        IsSALU = isSGPRClass(FiRC);
+
       bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
                      !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
       const TargetRegisterClass *RC = IsSALU && !LiveSCC
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 3d5a842964b42..e258cbac08a43 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -30,10 +30,9 @@ body:             |
   bb.2:
     SI_RETURN
 ...
-# CHECK: bb.1:
+# CHECK: bb.0:
 # CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
 # CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
 # CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
 ---
 name:            test_v_cndmask_e32
@@ -46,17 +45,29 @@ machineFunctionInfo:
   stackPtrOffsetReg: '$sgpr32'
 body:             |
   bb.0:
-    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
-    renamable $sgpr4 = S_MOV_B32 0
-    renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
-  bb.1:
-    liveins: $sgpr4, $sgpr54:0x000000000000000F
+    liveins: $sgpr4
     V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
     renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
     renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
-    renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
-    renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
-    S_BRANCH %bb.2
-  bb.2:
+    SI_RETURN
+...
+# CHECK: bb.0:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: V_CNDMASK_B32_e64 killed [[V1]], killed [[V0]], 0, 0, $sgpr4, implicit $exec
+---
+name:            test_v_cndmask_e64
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 12}
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4
+    renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr0 = V_CNDMASK_B32_e64 %stack.0, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
     SI_RETURN
 ...

>From 942644202a34bc564e673ce0146bb27d27494aef Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 16 Feb 2026 10:32:55 -0600
Subject: [PATCH 7/7] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
 for VOP3 FI operands

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0c67cac71e6ad..6174dab19c952 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3079,13 +3079,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       // In an entry function/kernel the offset is already swizzled.
       const TargetRegisterClass *FiRC =
           TII->getRegClass(MI->getDesc(), FIOperandNum);
-      bool IsSALU = false;
       // If FiRC is null, fall back to non-SALU handling to avoid crashing.
       // Some instructions may not have regclass information for FI operands
       // yet.
-      if (FiRC)
-        IsSALU = isSGPRClass(FiRC);
-
+      bool IsSALU = FiRC && isSGPRClass(FiRC);
       bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
                      !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
       const TargetRegisterClass *RC = IsSALU && !LiveSCC



More information about the llvm-commits mailing list