[llvm] [WIP][AMDGPU] Improve the handling of `inreg` arguments (PR #133614)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 30 05:15:08 PDT 2025
================
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -o - %s | FileCheck %s
+
+; arg3 is v0, arg4 is in v1. These should be packed into a lane and extracted with readlane
+define i32 @callee(<8 x i32> inreg %arg0, <8 x i32> inreg %arg1, <2 x i32> inreg %arg2, i32 inreg %arg3, i32 inreg %arg4) {
+; CHECK-LABEL: test0:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_readlane_b32 s0, v0, 1
+; CHECK-NEXT: v_readlane_b32 s1, v0, 0
+; CHECK-NEXT: s_add_i32 s1, s1, s0
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: v_mov_b32_e32 v0, s1
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %add = add i32 %arg3, %arg4
+ ret i32 %add
+}
+
+define amdgpu_kernel void @kernel(ptr %p0, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p) {
+ %arg0 = load <8 x i32>, ptr %p0
+ %arg1 = load <8 x i32>, ptr %p1
+ %arg2 = load <2 x i32>, ptr %p2
+ %arg3 = load i32, ptr %p3
+ %arg4 = load i32, ptr %p4
+ %ret = call i32 @callee(<8 x i32> %arg0, <8 x i32> %arg1, <2 x i32> %arg2, i32 %arg3, i32 %arg4)
+ store i32 %ret, ptr %p
+ ret void
+}
----------------
arsenm wrote:
Also test with a func caller, and a tail call
https://github.com/llvm/llvm-project/pull/133614
More information about the llvm-commits
mailing list