[llvm] [AMDGPU] Split struct kernel arguments (PR #133786)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 17:31:11 PDT 2025
================
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-split-kernel-arguments -amdgpu-enable-split-kernel-args -verify-machineinstrs < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-split-kernel-arguments -amdgpu-enable-split-kernel-args -verify-machineinstrs < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;
+; The LLVM IR is from the following HIP program:
+;
+; struct A {
+; int i;
+; char c;
+; long l;
+; int *p;
+; };
+
+; struct B {
+; char c;
+; A a1;
+; int i;
+; A a2;
+; };
+;
+; __global__ void test(int *out, int i, A a, char c, B b) {
+; *out = i + a.l + c + a.l + b.a1.c;
+; b.a2.p[2] = a.l + b.a2.c;
+;}
+;
+%struct.A = type { i32, i8, i64, ptr }
+%struct.B = type { i8, %struct.A, i32, %struct.A }
+
+define amdgpu_kernel void @_Z4testPii1Ac1B(ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) %out.coerce, i32 noundef %i, ptr addrspace(4) noundef readonly byref(%struct.A) align 8 captures(none) %0, i8 noundef %c, ptr addrspace(4) noundef readonly byref(%struct.B) align 8 captures(none) %1) {
+; CHECK-LABEL: define amdgpu_kernel void @_Z4testPii1Ac1B(
+; CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT_COERCE:%.*]], i32 noundef [[I:%.*]], i64 [[COERCE_SROA_1_0_COPYLOAD:%.*]], i8 noundef [[C:%.*]], ptr addrspace(1) [[COERCE1_SROA_318_0_COPYLOAD:%.*]], i8 [[COERCE1_SROA_217_0_COPYLOAD:%.*]], i8 [[COERCE1_SROA_1_0_COPYLOAD:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[COERCE1_SROA_318_0_COPYLOAD]] to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(1)
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT: [[CONV3:%.*]] = sext i8 [[C]] to i64
+; CHECK-NEXT: [[CONV8:%.*]] = sext i8 [[COERCE1_SROA_1_0_COPYLOAD]] to i64
+; CHECK-NEXT: [[FACTOR:%.*]] = shl i64 [[COERCE_SROA_1_0_COPYLOAD]], 1
+; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[CONV3]], [[CONV]]
+; CHECK-NEXT: [[ADD6:%.*]] = add i64 [[ADD4]], [[FACTOR]]
+; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[ADD6]], [[CONV8]]
+; CHECK-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32
+; CHECK-NEXT: store i32 [[CONV10]], ptr addrspace(1) [[OUT_COERCE]], align 4
+; CHECK-NEXT: [[CONV13:%.*]] = sext i8 [[COERCE1_SROA_217_0_COPYLOAD]] to i64
+; CHECK-NEXT: [[ADD14:%.*]] = add nsw i64 [[COERCE_SROA_1_0_COPYLOAD]], [[CONV13]]
+; CHECK-NEXT: [[CONV15:%.*]] = trunc i64 [[ADD14]] to i32
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP1]], i64 8
+; CHECK-NEXT: store i32 [[CONV15]], ptr addrspace(1) [[ARRAYIDX]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %coerce.sroa.1.0..sroa_idx = getelementptr inbounds nuw i8, ptr addrspace(4) %0, i64 8
+ %coerce.sroa.1.0.copyload = load i64, ptr addrspace(4) %coerce.sroa.1.0..sroa_idx, align 8
+ %coerce1.sroa.1.0..sroa_idx = getelementptr inbounds nuw i8, ptr addrspace(4) %1, i64 12
+ %coerce1.sroa.1.0.copyload = load i8, ptr addrspace(4) %coerce1.sroa.1.0..sroa_idx, align 4
+ %coerce1.sroa.217.0..sroa_idx = getelementptr inbounds nuw i8, ptr addrspace(4) %1, i64 44
+ %coerce1.sroa.217.0.copyload = load i8, ptr addrspace(4) %coerce1.sroa.217.0..sroa_idx, align 4
+ %coerce1.sroa.318.0..sroa_idx = getelementptr inbounds nuw i8, ptr addrspace(4) %1, i64 56
+ %coerce1.sroa.318.0.copyload = load ptr, ptr addrspace(4) %coerce1.sroa.318.0..sroa_idx, align 8
+ %2 = addrspacecast ptr %coerce1.sroa.318.0.copyload to ptr addrspace(1)
----------------
arsenm wrote:
Use named values
https://github.com/llvm/llvm-project/pull/133786
More information about the llvm-commits
mailing list