[llvm] [AMDGPU] S_SET_GPR_IDX_ON can be passed an immediate index (PR #125086)
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 30 08:23:02 PST 2025
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/125086
Oversight found by ISel fuzz effort. Assuming the argument is a register, in some cases it can be an immediate. Tablegen's type for the instruction is SSrc_b32, i.e. register or immediate fine. Added the repro from the bug reporter as a test case - prior to this patch llvm will assert in getReg.
Fixes SWDEV-508589
>From 0965b0b28e3330a69d90e69e2d4adfe156cd7ad9 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Thu, 30 Jan 2025 16:14:02 +0000
Subject: [PATCH] [AMDGPU] S_SET_GPR_IDX_ON can be passed an immediate index
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 +-
.../CodeGen/AMDGPU/copy-to-reg-frameindex.ll | 38 +++++++++++++++++++
2 files changed, 39 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5727d14ec49e8ac..2c7665f5b8acfaf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2366,11 +2366,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
assert(ST.useVGPRIndexMode());
Register VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();
- Register Idx = MI.getOperand(3).getReg();
Register SubReg = MI.getOperand(4).getImm();
MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON))
- .addReg(Idx)
+ .add(MI.getOperand(3)) // Index
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
SetOn->getOperand(3).setIsUndef();
diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
new file mode 100644
index 000000000000000..d86f497aa5e13da
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s
+
+define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: copy_to_reg_frameindex:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; implicit-def: $vgpr0
+; CHECK-NEXT: .LBB0_1: ; %loop
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: s_cmp_lt_u32 0, 16
+; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_set_gpr_idx_off
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
+; CHECK-NEXT: ; %bb.2: ; %done
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_dword v1, v0, s[0:1]
+; CHECK-NEXT: s_endpgm
+entry:
+ %B = srem i32 %c, -1
+ %alloca = alloca [16 x i32], align 4, addrspace(5)
+ br label %loop
+
+loop:
+ %inc = phi i32 [ 0, %entry ], [ %inc.i, %loop ]
+ %ptr = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %inc
+ store i32 %inc, ptr addrspace(5) %ptr, align 4
+ %inc.i = add i32 %inc, %B
+ %cnd = icmp uge i32 %inc.i, 16
+ br i1 %cnd, label %done, label %loop
+
+done:
+ %tmp1 = load i32, ptr addrspace(5) %alloca, align 4
+ store i32 %tmp1, ptr addrspace(1) %out, align 4
+ ret void
+}
More information about the llvm-commits
mailing list