[llvm] [AMDGPU] Pre-commit tests for preload kernarg move to attributor (PR #123546)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 21 04:08:34 PST 2025
================
@@ -27,9 +79,34 @@ define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) %out) {
ret void
}
-define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, i512) {
-; NO-PRELOAD-LABEL: define amdgpu_kernel void @no_free_sgprs_block_count_x(
-; NO-PRELOAD-SAME: ptr addrspace(1) [[OUT:%.*]], i512 [[TMP0:%.*]]) #[[ATTR0]] {
+define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) %out, i32 inreg) {
+; NO-PRELOAD-LABEL: define {{[^@]+}}@preload_unused_arg_block_count_x
+; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], i32 inreg [[TMP0:%.*]]) #[[ATTR0]] {
+; NO-PRELOAD-NEXT: [[PRELOAD_UNUSED_ARG_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; NO-PRELOAD-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[PRELOAD_UNUSED_ARG_BLOCK_COUNT_X_KERNARG_SEGMENT]], i64 0
+; NO-PRELOAD-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META0]]
+; NO-PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; NO-PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
+; NO-PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4
+; NO-PRELOAD-NEXT: ret void
+;
+; PRELOAD-LABEL: define {{[^@]+}}@preload_unused_arg_block_count_x
+; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], i32 inreg [[TMP0:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_X:%.*]]) #[[ATTR0]] {
+; PRELOAD-NEXT: [[PRELOAD_UNUSED_ARG_BLOCK_COUNT_X_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
+; PRELOAD-NEXT: store i32 [[_HIDDEN_BLOCK_COUNT_X]], ptr addrspace(1) [[OUT]], align 4
+; PRELOAD-NEXT: ret void
+;
+ %imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ %load = load i32, ptr addrspace(4) %imp_arg_ptr
+ store i32 %load, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, i512 inreg) {
----------------
arsenm wrote:
Probably should use something more normal than i512 for this, maybe <16 x i32>?
https://github.com/llvm/llvm-project/pull/123546
More information about the llvm-commits
mailing list