[llvm] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index (PR #178991)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 18 14:00:53 PST 2026


https://github.com/hjagasiaAMD updated https://github.com/llvm/llvm-project/pull/178991

>From ff1a452313414865ea5db7d0d59b790c06b184dc Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 30 Jan 2026 17:10:15 -0600
Subject: [PATCH 1/9] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  2 --
 .../AMDGPU/eliminate-frame-index-select.mir   | 35 +++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 96c2f6530fe4c..3df9a78a391bc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3257,8 +3257,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           if (!IsSALU)
             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
                 .addReg(TmpResultReg, RegState::Kill);
-          else
-            ResultReg = TmpResultReg;
           // If there were truly no free SGPRs, we need to undo everything.
           if (!TmpScaledReg.isValid()) {
             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
new file mode 100644
index 0000000000000..8ce4428a3f20b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -0,0 +1,35 @@
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
+# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
+--- |
+  %struct.wobble = type { %struct.quux }
+  %struct.quux = type { float, float, float }
+  define void @wobble() {
+    %alloca = alloca %struct.wobble, align 4, addrspace(5)
+    ret void
+  }
+...
+---
+name:            wobble
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: alloca, offset: 0, size: 12}
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+    renamable $sgpr4 = S_MOV_B32 0
+    renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+  bb.1:
+    liveins: $sgpr4, $sgpr54:0x000000000000000F
+    S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
+    renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+    renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+  bb.2:
+    SI_RETURN
+...

>From adffe15966b7d5baccdc5403b1df3102e1c91ce6 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 2 Feb 2026 16:30:51 -0600
Subject: [PATCH 2/9] [AMDGPU] Fix lit tests

---
 .../AMDGPU/eliminate-frame-index-select.ll    | 35 +++++++++++++++++++
 .../AMDGPU/eliminate-frame-index-select.mir   | 28 +++++++--------
 2 files changed, 47 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
new file mode 100644
index 0000000000000..f246ba57410e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: .LBB0_1:
+; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
+; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
+; CHECK: v_readfirstlane_b32 [[S:s[0-9]+]], [[V]]
+; CHECK: s_cselect_b32 {{s[0-9]+}}, 0, [[S]]
+
+%struct.wobble = type { %struct.quux }
+%struct.quux = type { float, float, float }
+
+declare %struct.wobble @foo(%struct.quux)
+
+define void @wobble() #0 {
+bb:
+  %alloca = alloca %struct.wobble, align 4, addrspace(5)
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %phi = phi i32 [ 0, %bb ], [ 1, %bb1 ]
+  store i32 0, ptr addrspacecast (ptr addrspace(5) null to ptr), align 4
+  %getelementptr = getelementptr i8, ptr addrspace(5) %alloca, i32 4
+  %icmp = icmp eq i32 %phi, 0
+  %load = load float, ptr addrspace(5) null, align 2147483648
+  %load2 = load float, ptr addrspace(5) %alloca, align 4
+  %select = select i1 %icmp, float %load, float %load2
+  %insertvalue = insertvalue %struct.quux zeroinitializer, float %select, 0
+  %load3 = load float, ptr addrspace(5) inttoptr (i32 4 to ptr addrspace(5)), align 4
+  %load4 = load float, ptr addrspace(5) %getelementptr, align 4
+  %select5 = select i1 %icmp, float %load3, float %load4
+  %insertvalue6 = insertvalue %struct.quux %insertvalue, float %select5, 1
+  %call = call %struct.wobble @foo(%struct.quux %insertvalue6)
+  br label %bb1
+}
+
+attributes #0 = { "target-cpu"="gfx1030" }
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 8ce4428a3f20b..cbd6149b97d98 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -1,34 +1,30 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
-# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
---- |
-  %struct.wobble = type { %struct.quux }
-  %struct.quux = type { float, float, float }
-  define void @wobble() {
-    %alloca = alloca %struct.wobble, align 4, addrspace(5)
-    ret void
-  }
-...
+# CHECK: bb.1:
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R1:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V1]], implicit $exec
+# CHECK: S_CSELECT_B32 0, killed [[R1]], implicit $scc
+# CHECK: [[V2:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
+# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
 ---
-name:            wobble
+name:            test
 tracksRegLiveness: true
 stack:
-  - { id: 0, name: alloca, offset: 0, size: 12}
+  - { id: 0, size: 12}
 machineFunctionInfo:
   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
   frameOffsetReg:  '$sgpr33'
   stackPtrOffsetReg: '$sgpr32'
 body:             |
   bb.0:
-    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
     renamable $sgpr4 = S_MOV_B32 0
-    renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+    renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
   bb.1:
     liveins: $sgpr4, $sgpr54:0x000000000000000F
     S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
-    renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+    renamable $sgpr4 = S_CSELECT_B32 0, %stack.0, implicit $scc
+    renamable $sgpr6 = S_CSELECT_B32 %stack.0, 0, implicit $scc
     renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
-    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
     S_BRANCH %bb.2
   bb.2:
     SI_RETURN

>From 20bd5d9d4e96806c37605f130af7286ee878a992 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:18:46 -0600
Subject: [PATCH 3/9] Update
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index f246ba57410e2..d1b89a40d738b 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030  < %s | FileCheck %s
 ; CHECK-LABEL: .LBB0_1:
 ; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
 ; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]

>From 6e65b09e97991848b5320769aefaa11ca4c5b735 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:19:03 -0600
Subject: [PATCH 4/9] Update
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index d1b89a40d738b..0def34ac3fb6d 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -32,4 +32,4 @@ bb1:                                              ; preds = %bb1, %bb
   br label %bb1
 }
 
-attributes #0 = { "target-cpu"="gfx1030" }
+attributes #0 = { nounwind }

>From ef5c1d8c6fb7f6e39d663da2f164b638ab8abc4f Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 6 Feb 2026 12:06:43 -0600
Subject: [PATCH 5/9] [AMDGPU] Fix lit tests

---
 .../AMDGPU/eliminate-frame-index-select.mir   | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index cbd6149b97d98..3d5a842964b42 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -7,7 +7,7 @@
 # CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
 # CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
 ---
-name:            test
+name:            test_s_cselect_b32
 tracksRegLiveness: true
 stack:
   - { id: 0, size: 12}
@@ -17,6 +17,7 @@ machineFunctionInfo:
   stackPtrOffsetReg: '$sgpr32'
 body:             |
   bb.0:
+    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
     renamable $sgpr4 = S_MOV_B32 0
     renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
   bb.1:
@@ -29,3 +30,33 @@ body:             |
   bb.2:
     SI_RETURN
 ...
+# CHECK: bb.1:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
+# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+---
+name:            test_v_cndmask_e32
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 12}
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+    renamable $sgpr4 = S_MOV_B32 0
+    renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
+  bb.1:
+    liveins: $sgpr4, $sgpr54:0x000000000000000F
+    V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
+    renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
+    renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
+    renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
+    S_BRANCH %bb.2
+  bb.2:
+    SI_RETURN
+...

>From 8267ffe9ca48ecf0c5f51d4be7027eac5718ffd0 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 9 Feb 2026 18:12:39 -0600
Subject: [PATCH 6/9] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
 for VOP3 FI operands & add test for V_CNDMASK_B32_e64

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     | 10 +++++-
 .../AMDGPU/eliminate-frame-index-select.mir   | 33 ++++++++++++-------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3df9a78a391bc..0c67cac71e6ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3077,7 +3077,15 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     if (!IsMUBUF && !MFI->isBottomOfStack()) {
       // Convert to a swizzled stack address by scaling by the wave size.
       // In an entry function/kernel the offset is already swizzled.
-      bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum));
+      const TargetRegisterClass *FiRC =
+          TII->getRegClass(MI->getDesc(), FIOperandNum);
+      bool IsSALU = false;
+      // If FiRC is null, fall back to non-SALU handling to avoid crashing.
+      // Some instructions may not have regclass information for FI operands
+      // yet.
+      if (FiRC)
+        IsSALU = isSGPRClass(FiRC);
+
       bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
                      !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
       const TargetRegisterClass *RC = IsSALU && !LiveSCC
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 3d5a842964b42..e258cbac08a43 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -30,10 +30,9 @@ body:             |
   bb.2:
     SI_RETURN
 ...
-# CHECK: bb.1:
+# CHECK: bb.0:
 # CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
 # CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
 # CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
 ---
 name:            test_v_cndmask_e32
@@ -46,17 +45,29 @@ machineFunctionInfo:
   stackPtrOffsetReg: '$sgpr32'
 body:             |
   bb.0:
-    renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
-    renamable $sgpr4 = S_MOV_B32 0
-    renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
-  bb.1:
-    liveins: $sgpr4, $sgpr54:0x000000000000000F
+    liveins: $sgpr4
     V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
     renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
     renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
-    renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
-    renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
-    S_BRANCH %bb.2
-  bb.2:
+    SI_RETURN
+...
+# CHECK: bb.0:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: V_CNDMASK_B32_e64 killed [[V1]], killed [[V0]], 0, 0, $sgpr4, implicit $exec
+---
+name:            test_v_cndmask_e64
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 12}
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4
+    renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr0 = V_CNDMASK_B32_e64 %stack.0, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
     SI_RETURN
 ...

>From 942644202a34bc564e673ce0146bb27d27494aef Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 16 Feb 2026 10:32:55 -0600
Subject: [PATCH 7/9] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
 for VOP3 FI operands

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0c67cac71e6ad..6174dab19c952 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3079,13 +3079,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       // In an entry function/kernel the offset is already swizzled.
       const TargetRegisterClass *FiRC =
           TII->getRegClass(MI->getDesc(), FIOperandNum);
-      bool IsSALU = false;
       // If FiRC is null, fall back to non-SALU handling to avoid crashing.
       // Some instructions may not have regclass information for FI operands
       // yet.
-      if (FiRC)
-        IsSALU = isSGPRClass(FiRC);
-
+      bool IsSALU = FiRC && isSGPRClass(FiRC);
       bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
                      !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
       const TargetRegisterClass *RC = IsSALU && !LiveSCC

>From ad3c56babd3d91ef79f404de33c9adee80be6016 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 18 Feb 2026 12:59:00 -0600
Subject: [PATCH 8/9] [AMDGPU] Generate checks and comment for lit tests

---
 .../AMDGPU/eliminate-frame-index-select.ll    | 115 +++++++++++++++++-
 .../AMDGPU/eliminate-frame-index-select.mir   |  68 ++++++++---
 2 files changed, 163 insertions(+), 20 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index 0def34ac3fb6d..a6b415a1860b6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -1,16 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030  < %s | FileCheck %s
-; CHECK-LABEL: .LBB0_1:
-; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
-; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
-; CHECK: v_readfirstlane_b32 [[S:s[0-9]+]], [[V]]
-; CHECK: s_cselect_b32 {{s[0-9]+}}, 0, [[S]]
 
 %struct.wobble = type { %struct.quux }
 %struct.quux = type { float, float, float }
 
 declare %struct.wobble @foo(%struct.quux)
 
+; s_cselect_b32 does not allow vreg & should use the sreg frameindex generated
+; by v_readfirstlane_b32 in eliminateFrameIndex
 define void @wobble() #0 {
+; CHECK-LABEL: wobble:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s16, s33
+; CHECK-NEXT:    s_mov_b32 s33, s32
+; CHECK-NEXT:    s_or_saveexec_b32 s17, -1
+; CHECK-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b32 exec_lo, s17
+; CHECK-NEXT:    v_writelane_b32 v43, s16, 15
+; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT:    v_mov_b32_e32 v40, v31
+; CHECK-NEXT:    v_mov_b32_e32 v41, 0
+; CHECK-NEXT:    s_addk_i32 s32, 0x400
+; CHECK-NEXT:    v_writelane_b32 v43, s30, 0
+; CHECK-NEXT:    v_writelane_b32 v43, s31, 1
+; CHECK-NEXT:    v_writelane_b32 v43, s34, 2
+; CHECK-NEXT:    v_writelane_b32 v43, s35, 3
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; CHECK-NEXT:    v_writelane_b32 v43, s36, 4
+; CHECK-NEXT:    v_writelane_b32 v43, s37, 5
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[8:9], src_private_base
+; CHECK-NEXT:    v_mov_b32_e32 v42, s9
+; CHECK-NEXT:    v_writelane_b32 v43, s38, 6
+; CHECK-NEXT:    v_writelane_b32 v43, s39, 7
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT:    v_writelane_b32 v43, s48, 8
+; CHECK-NEXT:    v_writelane_b32 v43, s49, 9
+; CHECK-NEXT:    s_mov_b64 s[48:49], s[4:5]
+; CHECK-NEXT:    s_lshr_b32 s5, s33, 5
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    v_writelane_b32 v43, s50, 10
+; CHECK-NEXT:    s_mov_b32 s50, s15
+; CHECK-NEXT:    v_writelane_b32 v43, s51, 11
+; CHECK-NEXT:    s_mov_b32 s51, s14
+; CHECK-NEXT:    v_writelane_b32 v43, s52, 12
+; CHECK-NEXT:    s_mov_b32 s52, s13
+; CHECK-NEXT:    v_writelane_b32 v43, s53, 13
+; CHECK-NEXT:    s_mov_b32 s53, s12
+; CHECK-NEXT:    v_writelane_b32 v43, s54, 14
+; CHECK-NEXT:    s_add_i32 s54, s5, 16
+; CHECK-NEXT:    s_inst_prefetch 0x1
+; CHECK-NEXT:    .p2align 6
+; CHECK-NEXT:  .LBB0_1: ; %bb1
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
+; CHECK-NEXT:    s_cmp_eq_u32 s4, 0
+; CHECK-NEXT:    flat_store_dword v[41:42], v41
+; CHECK-NEXT:    v_mov_b32_e32 v31, v40
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_add_nc_u32_e32 v0, 12, v0
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT:    s_mov_b32 s12, s53
+; CHECK-NEXT:    v_readfirstlane_b32 s5, v0
+; CHECK-NEXT:    s_mov_b32 s13, s52
+; CHECK-NEXT:    s_mov_b32 s14, s51
+; CHECK-NEXT:    s_mov_b32 s15, s50
+; CHECK-NEXT:    s_cselect_b32 s4, 0, s5
+; CHECK-NEXT:    s_cselect_b32 s5, 4, s54
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    s_getpc_b64 s[4:5]
+; CHECK-NEXT:    s_add_u32 s4, s4, foo at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s5, s5, foo at gotpcrel32@hi+12
+; CHECK-NEXT:    s_clause 0x1
+; CHECK-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
+; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
+; CHECK-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT:    s_mov_b32 s4, 1
+; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT:    s_inst_prefetch 0x2
+; CHECK-NEXT:    s_clause 0x2 ; 12-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v42, off, s[0:3], s33
+; CHECK-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:4
+; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:8
+; CHECK-NEXT:    v_readlane_b32 s54, v43, 14
+; CHECK-NEXT:    v_readlane_b32 s53, v43, 13
+; CHECK-NEXT:    v_readlane_b32 s52, v43, 12
+; CHECK-NEXT:    v_readlane_b32 s51, v43, 11
+; CHECK-NEXT:    v_readlane_b32 s50, v43, 10
+; CHECK-NEXT:    v_readlane_b32 s49, v43, 9
+; CHECK-NEXT:    v_readlane_b32 s48, v43, 8
+; CHECK-NEXT:    v_readlane_b32 s39, v43, 7
+; CHECK-NEXT:    v_readlane_b32 s38, v43, 6
+; CHECK-NEXT:    v_readlane_b32 s37, v43, 5
+; CHECK-NEXT:    v_readlane_b32 s36, v43, 4
+; CHECK-NEXT:    v_readlane_b32 s35, v43, 3
+; CHECK-NEXT:    v_readlane_b32 s34, v43, 2
+; CHECK-NEXT:    v_readlane_b32 s31, v43, 1
+; CHECK-NEXT:    v_readlane_b32 s30, v43, 0
+; CHECK-NEXT:    s_mov_b32 s32, s33
+; CHECK-NEXT:    v_readlane_b32 s4, v43, 15
+; CHECK-NEXT:    s_or_saveexec_b32 s5, -1
+; CHECK-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_mov_b32 exec_lo, s5
+; CHECK-NEXT:    s_mov_b32 s33, s4
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %alloca = alloca %struct.wobble, align 4, addrspace(5)
   br label %bb1
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index e258cbac08a43..a4d3015968ecd 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -1,11 +1,7 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
-# CHECK: bb.1:
-# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[R1:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V1]], implicit $exec
-# CHECK: S_CSELECT_B32 0, killed [[R1]], implicit $scc
-# CHECK: [[V2:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
-# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
+# s_cselect_b32 does not allow vreg & should use the sreg frameindex generated
+# by v_readfirstlane_b32 in eliminateFrameIndex.
 ---
 name:            test_s_cselect_b32
 tracksRegLiveness: true
@@ -16,6 +12,36 @@ machineFunctionInfo:
   frameOffsetReg:  '$sgpr33'
   stackPtrOffsetReg: '$sgpr32'
 body:             |
+  ; CHECK-LABEL: name: test_s_cselect_b32
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr41
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; CHECK-NEXT:   renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr4 = S_MOV_B32 0
+  ; CHECK-NEXT:   renamable $sgpr5 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+  ; CHECK-NEXT:   renamable $sgpr54 = S_ADD_I32 $sgpr5, 8, implicit-def dead $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr4, $sgpr54:0x000000000000000F
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
+  ; CHECK-NEXT:   $vgpr0 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 4, killed $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
+  ; CHECK-NEXT:   $vgpr0 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 4, killed $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr6 = S_CSELECT_B32 killed $sgpr4, 0, implicit $scc
+  ; CHECK-NEXT:   renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; CHECK-NEXT:   SI_RETURN
   bb.0:
     renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
     renamable $sgpr4 = S_MOV_B32 0
@@ -30,10 +56,8 @@ body:             |
   bb.2:
     SI_RETURN
 ...
-# CHECK: bb.0:
-# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
-# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+# ensure register constraints of V_CNDMASK_B32_e32 are respected by
+# eliminateFrameIndex.
 ---
 name:            test_v_cndmask_e32
 tracksRegLiveness: true
@@ -46,15 +70,22 @@ machineFunctionInfo:
 body:             |
   bb.0:
     liveins: $sgpr4
+    ; CHECK-LABEL: name: test_v_cndmask_e32
+    ; CHECK: liveins: $sgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vcc_lo = V_CMP_EQ_U32_e64 killed $sgpr4, 0, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 killed $vgpr1, killed $vgpr2, implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+    ; CHECK-NEXT: SI_RETURN
     V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
     renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
     renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
     SI_RETURN
 ...
-# CHECK: bb.0:
-# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
-# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: V_CNDMASK_B32_e64 killed [[V1]], killed [[V0]], 0, 0, $sgpr4, implicit $exec
+# ensure register constraints of V_CNDMASK_B32_e64 are respected by
+# eliminateFrameIndex. V_CNDMASK_B32_e64 with stack operand hits crash due to
+# null register class for that operand, which needs a follow up fix.
 ---
 name:            test_v_cndmask_e64
 tracksRegLiveness: true
@@ -67,6 +98,13 @@ machineFunctionInfo:
 body:             |
   bb.0:
     liveins: $sgpr4
+    ; CHECK-LABEL: name: test_v_cndmask_e64
+    ; CHECK: liveins: $sgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 killed $vgpr1, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
+    ; CHECK-NEXT: SI_RETURN
     renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
     renamable $vgpr0 = V_CNDMASK_B32_e64 %stack.0, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
     SI_RETURN

>From c6992ae5f59d91684027603e727029d73e76db3d Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 18 Feb 2026 15:44:52 -0600
Subject: [PATCH 9/9] [AMDGPU] Generate checks and comment for lit tests

---
 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index a4d3015968ecd..32fbafad72d4b 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -17,7 +17,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr41
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK-NEXT:   renamable $sgpr4 = S_MOV_B32 0
   ; CHECK-NEXT:   renamable $sgpr5 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
@@ -40,7 +40,7 @@ body:             |
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; CHECK-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
   bb.0:
     renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec



More information about the llvm-commits mailing list