[llvm] [AMDGPU] Split struct kernel arguments (PR #133786)
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 14 13:39:39 PDT 2025
================
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-split-kernel-arguments -amdgpu-enable-split-kernel-args < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-split-kernel-arguments -amdgpu-enable-split-kernel-args < %s > %t.ll
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %t.ll | FileCheck --check-prefix=GCN %s
+;
+; The LLVM IR is from the following HIP program:
+;
+; struct A {
+; int i;
+; char c;
+; long l;
+; int *p;
+; };
+
+; struct B {
+; char c;
+; A a1;
+; int i;
+; A a2;
+; };
+;
+; __global__ void test(int *out, int i, A a, char c, B b) {
+; *out = i + a.l + c + a.l + b.a1.c;
+; b.a2.p[2] = a.l + b.a2.c;
+;}
+;
+%struct.A = type { i32, i8, i64, ptr }
+%struct.B = type { i8, %struct.A, i32, %struct.A }
+
+; The "amdgpu-original-arg" function parameter attribute encodes how is the
+; argument split from the original kernel argument.
+;
+; Format: "amdgpu-original-arg"="OrigIndex:OrigOffset"
+; - OrigIndex: Index of the original kernel argument before splitting
+; - OrigOffset: Byte offset within the original argument
+
+define amdgpu_kernel void @_Z4testPii1Ac1B(
+; CHECK-LABEL: define amdgpu_kernel void @_Z4testPii1Ac1B(
+; CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 noundef [[I:%.*]], i64 "amdgpu-original-arg"="2:8" [[A_L:%.*]], i8 noundef [[C:%.*]], ptr "amdgpu-original-arg"="4:56" [[B_A2_P:%.*]], i8 "amdgpu-original-arg"="4:44" [[B_A2_C:%.*]], i8 "amdgpu-original-arg"="4:12" [[B_A1_C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[B_A2_P]] to ptr addrspace(1)
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT: [[CONV3:%.*]] = sext i8 [[C]] to i64
+; CHECK-NEXT: [[CONV8:%.*]] = sext i8 [[B_A1_C]] to i64
+; CHECK-NEXT: [[FACTOR:%.*]] = shl i64 [[A_L]], 1
+; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[CONV]], [[CONV3]]
+; CHECK-NEXT: [[ADD6:%.*]] = add i64 [[ADD4]], [[FACTOR]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[ADD6]], [[CONV8]]
+; CHECK-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32
+; CHECK-NEXT: store i32 [[CONV10]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: [[B_A2_C_SEXT:%.*]] = sext i8 [[B_A2_C]] to i64
+; CHECK-NEXT: [[ADD14:%.*]] = add nsw i64 [[A_L]], [[B_A2_C_SEXT]]
+; CHECK-NEXT: [[CONV15:%.*]] = trunc i64 [[ADD14]] to i32
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP1]], i64 2
+; CHECK-NEXT: store i32 [[CONV15]], ptr addrspace(1) [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret void
+;
+ ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) %out,
----------------
yxsamliu wrote:
will do
https://github.com/llvm/llvm-project/pull/133786
More information about the llvm-commits
mailing list