[llvm] AMDGPU: Add regression test for multiple frame index lowering (PR #140784)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 12:35:48 PDT 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/140784
>From 6f6f3b0a622e06139f64a53ccc12a404910fbebb Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 20 May 2025 20:07:55 +0200
Subject: [PATCH 1/2] AMDGPU: Add regression test for multiple frame index
lowering
---
.../CodeGen/AMDGPU/frame-index-elimination.ll | 111 ++++++++++++++++++
1 file changed, 111 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index ee62359cffc63..aea6329d56885 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -424,4 +424,115 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
ret void
}
+; Check that we do not produce a verifier error after prolog
+; epilog. alloca1 and alloca2 will lower to literals.
+
+; GCN-LABEL: {{^}}s_multiple_frame_indexes_literal_offsets:
+; GCN: s_load_dword [[ARG0:s[0-9]+]]
+; GCN: s_movk_i32 [[ALLOCA1:s[0-9]+]], 0x44
+; GCN: s_cmp_eq_u32 [[ARG0]], 0
+; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 0x48
+; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @s_multiple_frame_indexes_literal_offsets(i32 inreg %arg0) #0 {
+ %alloca0 = alloca [17 x i32], align 8, addrspace(5)
+ %alloca1 = alloca i32, align 4, addrspace(5)
+ %alloca2 = alloca i32, align 4, addrspace(5)
+ %cmp = icmp eq i32 %arg0, 0
+ %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+ call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+ ret void
+}
+
+; %alloca1 or alloca2 will lower to an inline constant, and one will
+; %be a literal, so we could fold both indexes into the instruction.
+
+; GCN-LABEL: {{^}}s_multiple_frame_indexes_one_imm_one_literal_offset:
+; GCN: s_load_dword [[ARG0:s[0-9]+]]
+; GCN: s_mov_b32 [[ALLOCA1:s[0-9]+]], 64
+; GCN: s_cmp_eq_u32 [[ARG0]], 0
+; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 0x44
+; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @s_multiple_frame_indexes_one_imm_one_literal_offset(i32 inreg %arg0) #0 {
+ %alloca0 = alloca [16 x i32], align 8, addrspace(5)
+ %alloca1 = alloca i32, align 4, addrspace(5)
+ %alloca2 = alloca i32, align 4, addrspace(5)
+ %cmp = icmp eq i32 %arg0, 0
+ %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+ call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_multiple_frame_indexes_imm_offsets:
+; GCN: s_load_dword [[ARG0:s[0-9]+]]
+; GCN: s_mov_b32 [[ALLOCA1:s[0-9]+]], 16
+; GCN: s_cmp_eq_u32 [[ARG0]], 0
+; GCN: s_cselect_b32 [[SELECT:s[0-9]+]], [[ALLOCA1]], 20
+; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @s_multiple_frame_indexes_imm_offsets(i32 inreg %arg0) #0 {
+ %alloca0 = alloca [4 x i32], align 8, addrspace(5)
+ %alloca1 = alloca i32, align 4, addrspace(5)
+ %alloca2 = alloca i32, align 4, addrspace(5)
+ %cmp = icmp eq i32 %arg0, 0
+ %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+ call void asm sideeffect "; use $0, $1","s,s"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_multiple_frame_indexes_literal_offsets:
+; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 0x48
+; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 0x44
+; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
+; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @v_multiple_frame_indexes_literal_offsets() #0 {
+ %vgpr = call i32 @llvm.amdgcn.workitem.id.x()
+ %alloca0 = alloca [17 x i32], align 8, addrspace(5)
+ %alloca1 = alloca i32, align 4, addrspace(5)
+ %alloca2 = alloca i32, align 4, addrspace(5)
+ %cmp = icmp eq i32 %vgpr, 0
+ %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+ call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_multiple_frame_indexes_one_imm_one_literal_offset:
+; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 0x44
+; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 64
+; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
+; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @v_multiple_frame_indexes_one_imm_one_literal_offset() #0 {
+ %vgpr = call i32 @llvm.amdgcn.workitem.id.x()
+ %alloca0 = alloca [16 x i32], align 8, addrspace(5)
+ %alloca1 = alloca i32, align 4, addrspace(5)
+ %alloca2 = alloca i32, align 4, addrspace(5)
+ %cmp = icmp eq i32 %vgpr, 0
+ %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+ call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_multiple_frame_indexes_imm_offsets:
+; GCN: v_mov_b32_e32 [[ALLOCA1:v[0-9]+]], 12
+; GCN: v_mov_b32_e32 [[ALLOCA2:v[0-9]+]], 8
+; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[ALLOCA1]], [[ALLOCA2]], vcc
+; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
+; GCN: ; use [[SELECT]], [[ALLOCA0]]
+define amdgpu_kernel void @v_multiple_frame_indexes_imm_offsets() #0 {
+ %vgpr = call i32 @llvm.amdgcn.workitem.id.x()
+ %alloca0 = alloca [2 x i32], align 8, addrspace(5)
+ %alloca1 = alloca i32, align 4, addrspace(5)
+ %alloca2 = alloca i32, align 4, addrspace(5)
+ %cmp = icmp eq i32 %vgpr, 0
+ %select = select i1 %cmp, ptr addrspace(5) %alloca1, ptr addrspace(5) %alloca2
+ call void asm sideeffect "; use $0, $1","v,v"(ptr addrspace(5) %select, ptr addrspace(5) %alloca0)
+ ret void
+}
+
attributes #0 = { nounwind }
>From 64e4a0ccf67f52d7a8ed2f87846bd84f62ce118e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 20 May 2025 21:35:40 +0200
Subject: [PATCH 2/2] Update
llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
---
llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index aea6329d56885..40cff44d6d3e6 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -445,7 +445,7 @@ define amdgpu_kernel void @s_multiple_frame_indexes_literal_offsets(i32 inreg %a
}
; %alloca1 or alloca2 will lower to an inline constant, and one will
-; %be a literal, so we could fold both indexes into the instruction.
+; be a literal, so we could fold both indexes into the instruction.
; GCN-LABEL: {{^}}s_multiple_frame_indexes_one_imm_one_literal_offset:
; GCN: s_load_dword [[ARG0:s[0-9]+]]
More information about the llvm-commits
mailing list