[llvm] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index (PR #178991)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 18 14:00:53 PST 2026
https://github.com/hjagasiaAMD updated https://github.com/llvm/llvm-project/pull/178991
>From ff1a452313414865ea5db7d0d59b790c06b184dc Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 30 Jan 2026 17:10:15 -0600
Subject: [PATCH 1/9] [AMDGPU] fix eliminateFrameIndex to use SGPR frame index
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 2 --
.../AMDGPU/eliminate-frame-index-select.mir | 35 +++++++++++++++++++
2 files changed, 35 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 96c2f6530fe4c..3df9a78a391bc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3257,8 +3257,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsSALU)
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
.addReg(TmpResultReg, RegState::Kill);
- else
- ResultReg = TmpResultReg;
// If there were truly no free SGPRs, we need to undo everything.
if (!TmpScaledReg.isValid()) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
new file mode 100644
index 0000000000000..8ce4428a3f20b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -0,0 +1,35 @@
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
+# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
+--- |
+ %struct.wobble = type { %struct.quux }
+ %struct.quux = type { float, float, float }
+ define void @wobble() {
+ %alloca = alloca %struct.wobble, align 4, addrspace(5)
+ ret void
+ }
+...
+---
+name: wobble
+tracksRegLiveness: true
+stack:
+ - { id: 0, name: alloca, offset: 0, size: 12}
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr4 = S_MOV_B32 0
+ renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+ bb.1:
+ liveins: $sgpr4, $sgpr54:0x000000000000000F
+ S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+ renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+ S_BRANCH %bb.2
+ bb.2:
+ SI_RETURN
+...
>From adffe15966b7d5baccdc5403b1df3102e1c91ce6 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 2 Feb 2026 16:30:51 -0600
Subject: [PATCH 2/9] [AMDGPU] Fix lit tests
---
.../AMDGPU/eliminate-frame-index-select.ll | 35 +++++++++++++++++++
.../AMDGPU/eliminate-frame-index-select.mir | 28 +++++++--------
2 files changed, 47 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
new file mode 100644
index 0000000000000..f246ba57410e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; CHECK-LABEL: .LBB0_1:
+; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
+; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
+; CHECK: v_readfirstlane_b32 [[S:s[0-9]+]], [[V]]
+; CHECK: s_cselect_b32 {{s[0-9]+}}, 0, [[S]]
+
+%struct.wobble = type { %struct.quux }
+%struct.quux = type { float, float, float }
+
+declare %struct.wobble @foo(%struct.quux)
+
+define void @wobble() #0 {
+bb:
+ %alloca = alloca %struct.wobble, align 4, addrspace(5)
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phi = phi i32 [ 0, %bb ], [ 1, %bb1 ]
+ store i32 0, ptr addrspacecast (ptr addrspace(5) null to ptr), align 4
+ %getelementptr = getelementptr i8, ptr addrspace(5) %alloca, i32 4
+ %icmp = icmp eq i32 %phi, 0
+ %load = load float, ptr addrspace(5) null, align 2147483648
+ %load2 = load float, ptr addrspace(5) %alloca, align 4
+ %select = select i1 %icmp, float %load, float %load2
+ %insertvalue = insertvalue %struct.quux zeroinitializer, float %select, 0
+ %load3 = load float, ptr addrspace(5) inttoptr (i32 4 to ptr addrspace(5)), align 4
+ %load4 = load float, ptr addrspace(5) %getelementptr, align 4
+ %select5 = select i1 %icmp, float %load3, float %load4
+ %insertvalue6 = insertvalue %struct.quux %insertvalue, float %select5, 1
+ %call = call %struct.wobble @foo(%struct.quux %insertvalue6)
+ br label %bb1
+}
+
+attributes #0 = { "target-cpu"="gfx1030" }
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 8ce4428a3f20b..cbd6149b97d98 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -1,34 +1,30 @@
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
-# CHECK: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-# CHECK: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
---- |
- %struct.wobble = type { %struct.quux }
- %struct.quux = type { float, float, float }
- define void @wobble() {
- %alloca = alloca %struct.wobble, align 4, addrspace(5)
- ret void
- }
-...
+# CHECK: bb.1:
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R1:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V1]], implicit $exec
+# CHECK: S_CSELECT_B32 0, killed [[R1]], implicit $scc
+# CHECK: [[V2:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
+# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
---
-name: wobble
+name: test
tracksRegLiveness: true
stack:
- - { id: 0, name: alloca, offset: 0, size: 12}
+ - { id: 0, size: 12}
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
- renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr4 = S_MOV_B32 0
- renamable $sgpr54 = S_ADD_I32 %stack.0.alloca, 4, implicit-def dead $scc
+ renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
bb.1:
liveins: $sgpr4, $sgpr54:0x000000000000000F
S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
- renamable $sgpr4 = S_CSELECT_B32 0, %stack.0.alloca, implicit $scc
+ renamable $sgpr4 = S_CSELECT_B32 0, %stack.0, implicit $scc
+ renamable $sgpr6 = S_CSELECT_B32 %stack.0, 0, implicit $scc
renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
- S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
S_BRANCH %bb.2
bb.2:
SI_RETURN
>From 20bd5d9d4e96806c37605f130af7286ee878a992 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:18:46 -0600
Subject: [PATCH 3/9] Update
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index f246ba57410e2..d1b89a40d738b 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s
; CHECK-LABEL: .LBB0_1:
; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
>From 6e65b09e97991848b5320769aefaa11ca4c5b735 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 4 Feb 2026 12:19:03 -0600
Subject: [PATCH 4/9] Update
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index d1b89a40d738b..0def34ac3fb6d 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -32,4 +32,4 @@ bb1: ; preds = %bb1, %bb
br label %bb1
}
-attributes #0 = { "target-cpu"="gfx1030" }
+attributes #0 = { nounwind }
>From ef5c1d8c6fb7f6e39d663da2f164b638ab8abc4f Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Fri, 6 Feb 2026 12:06:43 -0600
Subject: [PATCH 5/9] [AMDGPU] Fix lit tests
---
.../AMDGPU/eliminate-frame-index-select.mir | 33 ++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index cbd6149b97d98..3d5a842964b42 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -7,7 +7,7 @@
# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
---
-name: test
+name: test_s_cselect_b32
tracksRegLiveness: true
stack:
- { id: 0, size: 12}
@@ -17,6 +17,7 @@ machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
+ renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr4 = S_MOV_B32 0
renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
bb.1:
@@ -29,3 +30,33 @@ body: |
bb.2:
SI_RETURN
...
+# CHECK: bb.1:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
+# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+---
+name: test_v_cndmask_e32
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 12}
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr4 = S_MOV_B32 0
+ renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
+ bb.1:
+ liveins: $sgpr4, $sgpr54:0x000000000000000F
+ V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
+ renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
+ renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
+ renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
+ S_BRANCH %bb.2
+ bb.2:
+ SI_RETURN
+...
>From 8267ffe9ca48ecf0c5f51d4be7027eac5718ffd0 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 9 Feb 2026 18:12:39 -0600
Subject: [PATCH 6/9] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
for VOP3 FI operands & add test for V_CNDMASK_B32_e64
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 +++++-
.../AMDGPU/eliminate-frame-index-select.mir | 33 ++++++++++++-------
2 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3df9a78a391bc..0c67cac71e6ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3077,7 +3077,15 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsMUBUF && !MFI->isBottomOfStack()) {
// Convert to a swizzled stack address by scaling by the wave size.
// In an entry function/kernel the offset is already swizzled.
- bool IsSALU = isSGPRClass(TII->getRegClass(MI->getDesc(), FIOperandNum));
+ const TargetRegisterClass *FiRC =
+ TII->getRegClass(MI->getDesc(), FIOperandNum);
+ bool IsSALU = false;
+ // If FiRC is null, fall back to non-SALU handling to avoid crashing.
+ // Some instructions may not have regclass information for FI operands
+ // yet.
+ if (FiRC)
+ IsSALU = isSGPRClass(FiRC);
+
bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
!MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
const TargetRegisterClass *RC = IsSALU && !LiveSCC
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index 3d5a842964b42..e258cbac08a43 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -30,10 +30,9 @@ body: |
bb.2:
SI_RETURN
...
-# CHECK: bb.1:
+# CHECK: bb.0:
# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[V1]] = V_ADD_U32_e32 4, killed [[V1]], implicit $exec
# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
---
name: test_v_cndmask_e32
@@ -46,17 +45,29 @@ machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
- renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
- renamable $sgpr4 = S_MOV_B32 0
- renamable $sgpr54 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
- bb.1:
- liveins: $sgpr4, $sgpr54:0x000000000000000F
+ liveins: $sgpr4
V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
- renamable $vgpr3 = V_MOV_B32_e32 4, implicit $exec
- renamable $vgpr1 = V_CNDMASK_B32_e32 $sgpr54, killed $vgpr3, implicit $vcc_lo, implicit $vcc, implicit $exec
- S_BRANCH %bb.2
- bb.2:
+ SI_RETURN
+...
+# CHECK: bb.0:
+# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
+# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+# CHECK: V_CNDMASK_B32_e64 killed [[V1]], killed [[V0]], 0, 0, $sgpr4, implicit $exec
+---
+name: test_v_cndmask_e64
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 12}
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4
+ renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0 = V_CNDMASK_B32_e64 %stack.0, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
SI_RETURN
...
>From 942644202a34bc564e673ce0146bb27d27494aef Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Mon, 16 Feb 2026 10:32:55 -0600
Subject: [PATCH 7/9] [AMDGPU] Avoid null regclass crash in eliminateFrameIndex
for VOP3 FI operands
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0c67cac71e6ad..6174dab19c952 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3079,13 +3079,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// In an entry function/kernel the offset is already swizzled.
const TargetRegisterClass *FiRC =
TII->getRegClass(MI->getDesc(), FIOperandNum);
- bool IsSALU = false;
// If FiRC is null, fall back to non-SALU handling to avoid crashing.
// Some instructions may not have regclass information for FI operands
// yet.
- if (FiRC)
- IsSALU = isSGPRClass(FiRC);
-
+ bool IsSALU = FiRC && isSGPRClass(FiRC);
bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
!MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
const TargetRegisterClass *RC = IsSALU && !LiveSCC
>From ad3c56babd3d91ef79f404de33c9adee80be6016 Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 18 Feb 2026 12:59:00 -0600
Subject: [PATCH 8/9] [AMDGPU] Generate checks and comment for lit tests
---
.../AMDGPU/eliminate-frame-index-select.ll | 115 +++++++++++++++++-
.../AMDGPU/eliminate-frame-index-select.mir | 68 ++++++++---
2 files changed, 163 insertions(+), 20 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
index 0def34ac3fb6d..a6b415a1860b6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.ll
@@ -1,16 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s
-; CHECK-LABEL: .LBB0_1:
-; CHECK: v_lshrrev_b32_e64 [[V:v[0-9]+]], 5, s33
-; CHECK: v_add_nc_u32_e32 [[V]], 12, [[V]]
-; CHECK: v_readfirstlane_b32 [[S:s[0-9]+]], [[V]]
-; CHECK: s_cselect_b32 {{s[0-9]+}}, 0, [[S]]
%struct.wobble = type { %struct.quux }
%struct.quux = type { float, float, float }
declare %struct.wobble @foo(%struct.quux)
+; s_cselect_b32 does not allow vreg & should use the sreg frameindex generated
+; by v_readfirstlane_b32 in eliminateFrameIndex
define void @wobble() #0 {
+; CHECK-LABEL: wobble:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s16, s33
+; CHECK-NEXT: s_mov_b32 s33, s32
+; CHECK-NEXT: s_or_saveexec_b32 s17, -1
+; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: s_mov_b32 exec_lo, s17
+; CHECK-NEXT: v_writelane_b32 v43, s16, 15
+; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v40, v31
+; CHECK-NEXT: v_mov_b32_e32 v41, 0
+; CHECK-NEXT: s_addk_i32 s32, 0x400
+; CHECK-NEXT: v_writelane_b32 v43, s30, 0
+; CHECK-NEXT: v_writelane_b32 v43, s31, 1
+; CHECK-NEXT: v_writelane_b32 v43, s34, 2
+; CHECK-NEXT: v_writelane_b32 v43, s35, 3
+; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
+; CHECK-NEXT: v_writelane_b32 v43, s36, 4
+; CHECK-NEXT: v_writelane_b32 v43, s37, 5
+; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
+; CHECK-NEXT: s_mov_b64 s[8:9], src_private_base
+; CHECK-NEXT: v_mov_b32_e32 v42, s9
+; CHECK-NEXT: v_writelane_b32 v43, s38, 6
+; CHECK-NEXT: v_writelane_b32 v43, s39, 7
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: v_writelane_b32 v43, s48, 8
+; CHECK-NEXT: v_writelane_b32 v43, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
+; CHECK-NEXT: s_lshr_b32 s5, s33, 5
+; CHECK-NEXT: s_mov_b32 s4, 0
+; CHECK-NEXT: v_writelane_b32 v43, s50, 10
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: v_writelane_b32 v43, s51, 11
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: v_writelane_b32 v43, s52, 12
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: v_writelane_b32 v43, s53, 13
+; CHECK-NEXT: s_mov_b32 s53, s12
+; CHECK-NEXT: v_writelane_b32 v43, s54, 14
+; CHECK-NEXT: s_add_i32 s54, s5, 16
+; CHECK-NEXT: s_inst_prefetch 0x1
+; CHECK-NEXT: .p2align 6
+; CHECK-NEXT: .LBB0_1: ; %bb1
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; CHECK-NEXT: s_cmp_eq_u32 s4, 0
+; CHECK-NEXT: flat_store_dword v[41:42], v41
+; CHECK-NEXT: v_mov_b32_e32 v31, v40
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_add_nc_u32_e32 v0, 12, v0
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: v_readfirstlane_b32 s5, v0
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
+; CHECK-NEXT: s_cselect_b32 s4, 0, s5
+; CHECK-NEXT: s_cselect_b32 s5, 4, s54
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: s_getpc_b64 s[4:5]
+; CHECK-NEXT: s_add_u32 s4, s4, foo at gotpcrel32@lo+4
+; CHECK-NEXT: s_addc_u32 s5, s5, foo at gotpcrel32@hi+12
+; CHECK-NEXT: s_clause 0x1
+; CHECK-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: s_mov_b32 s4, 1
+; CHECK-NEXT: s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT: ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT: s_inst_prefetch 0x2
+; CHECK-NEXT: s_clause 0x2 ; 12-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33
+; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
+; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8
+; CHECK-NEXT: v_readlane_b32 s54, v43, 14
+; CHECK-NEXT: v_readlane_b32 s53, v43, 13
+; CHECK-NEXT: v_readlane_b32 s52, v43, 12
+; CHECK-NEXT: v_readlane_b32 s51, v43, 11
+; CHECK-NEXT: v_readlane_b32 s50, v43, 10
+; CHECK-NEXT: v_readlane_b32 s49, v43, 9
+; CHECK-NEXT: v_readlane_b32 s48, v43, 8
+; CHECK-NEXT: v_readlane_b32 s39, v43, 7
+; CHECK-NEXT: v_readlane_b32 s38, v43, 6
+; CHECK-NEXT: v_readlane_b32 s37, v43, 5
+; CHECK-NEXT: v_readlane_b32 s36, v43, 4
+; CHECK-NEXT: v_readlane_b32 s35, v43, 3
+; CHECK-NEXT: v_readlane_b32 s34, v43, 2
+; CHECK-NEXT: v_readlane_b32 s31, v43, 1
+; CHECK-NEXT: v_readlane_b32 s30, v43, 0
+; CHECK-NEXT: s_mov_b32 s32, s33
+; CHECK-NEXT: v_readlane_b32 s4, v43, 15
+; CHECK-NEXT: s_or_saveexec_b32 s5, -1
+; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b32 exec_lo, s5
+; CHECK-NEXT: s_mov_b32 s33, s4
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
%alloca = alloca %struct.wobble, align 4, addrspace(5)
br label %bb1
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index e258cbac08a43..a4d3015968ecd 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -1,11 +1,7 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -mcpu=gfx1030 -run-pass prologepilog -o - %s | FileCheck %s
-# CHECK: bb.1:
-# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[R1:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V1]], implicit $exec
-# CHECK: S_CSELECT_B32 0, killed [[R1]], implicit $scc
-# CHECK: [[V2:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: [[R2:\$sgpr[0-9]+]] = V_READFIRSTLANE_B32 [[V2]], implicit $exec
-# CHECK: S_CSELECT_B32 killed [[R2]], 0, implicit $scc
+# s_cselect_b32 does not allow vreg & should use the sreg frameindex generated
+# by v_readfirstlane_b32 in eliminateFrameIndex.
---
name: test_s_cselect_b32
tracksRegLiveness: true
@@ -16,6 +12,36 @@ machineFunctionInfo:
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
+ ; CHECK-LABEL: name: test_s_cselect_b32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr41
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr4 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr5 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr54 = S_ADD_I32 $sgpr5, 8, implicit-def dead $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr4, $sgpr54:0x000000000000000F
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 4, killed $vgpr0, implicit $exec
+ ; CHECK-NEXT: $sgpr5 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr4 = S_CSELECT_B32 0, killed $sgpr5, implicit $scc
+ ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 4, killed $vgpr0, implicit $exec
+ ; CHECK-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr6 = S_CSELECT_B32 killed $sgpr4, 0, implicit $scc
+ ; CHECK-NEXT: renamable $sgpr5 = S_CSELECT_B32 4, renamable $sgpr54, implicit $scc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: SI_RETURN
bb.0:
renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
renamable $sgpr4 = S_MOV_B32 0
@@ -30,10 +56,8 @@ body: |
bb.2:
SI_RETURN
...
-# CHECK: bb.0:
-# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
-# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: V_CNDMASK_B32_e32 killed [[V1]], killed [[V0]], implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+# ensure register constraints of V_CNDMASK_B32_e32 are respected by
+# eliminateFrameIndex.
---
name: test_v_cndmask_e32
tracksRegLiveness: true
@@ -46,15 +70,22 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $sgpr4
+ ; CHECK-LABEL: name: test_v_cndmask_e32
+ ; CHECK: liveins: $sgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc_lo = V_CMP_EQ_U32_e64 killed $sgpr4, 0, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 killed $vgpr1, killed $vgpr2, implicit $vcc_lo, implicit $vcc_lo, implicit $exec
+ ; CHECK-NEXT: SI_RETURN
V_CMP_EQ_U32_e64 def $vcc_lo, killed $sgpr4, 0, implicit $exec
renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr0 = V_CNDMASK_B32_e32 %stack.0, killed $vgpr2, implicit $vcc_lo, implicit $vcc, implicit $exec
SI_RETURN
...
-# CHECK: bb.0:
-# CHECK: [[V0:\$vgpr[0-9]+]] = V_MOV_B32_e32 0, implicit $exec
-# CHECK: [[V1:\$vgpr[0-9]+]] = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
-# CHECK: V_CNDMASK_B32_e64 killed [[V1]], killed [[V0]], 0, 0, $sgpr4, implicit $exec
+# ensure register constraints of V_CNDMASK_B32_e64 are respected by
+# eliminateFrameIndex. V_CNDMASK_B32_e64 with stack operand hits crash due to
+# null register class for that operand, which needs a follow up fix.
---
name: test_v_cndmask_e64
tracksRegLiveness: true
@@ -67,6 +98,13 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $sgpr4
+ ; CHECK-LABEL: name: test_v_cndmask_e64
+ ; CHECK: liveins: $sgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 killed $vgpr1, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
+ ; CHECK-NEXT: SI_RETURN
renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec
renamable $vgpr0 = V_CNDMASK_B32_e64 %stack.0, killed $vgpr2, 0, 0, $sgpr4, implicit $exec
SI_RETURN
>From c6992ae5f59d91684027603e727029d73e76db3d Mon Sep 17 00:00:00 2001
From: hjagasiaAMD <harsha.jagasia at amd.com>
Date: Wed, 18 Feb 2026 15:44:52 -0600
Subject: [PATCH 9/9] [AMDGPU] Generate checks and comment for lit tests
---
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
index a4d3015968ecd..32fbafad72d4b 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-select.mir
@@ -17,7 +17,7 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr41
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
; CHECK-NEXT: renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: renamable $sgpr4 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr5 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
@@ -40,7 +40,7 @@ body: |
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
; CHECK-NEXT: SI_RETURN
bb.0:
renamable $vgpr41 = V_MOV_B32_e32 0, implicit $exec
More information about the llvm-commits
mailing list