[llvm] [AMDGPU] Add an option to completely disable kernel argument preload (PR #153975)

via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 16 12:25:55 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>

The existing `amdgpu-kernarg-preload-count` can't be used as a switch to turn it
off if it is set to 0. This PR adds an extra option to turn it off.

Fixes SWDEV-550147.

---
Full diff: https://github.com/llvm/llvm-project/pull/153975.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp (+8) 
- (added) llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll (+29) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp
index 984c1ee89309e..a386fe621a553 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp
@@ -37,6 +37,11 @@ static cl::opt<unsigned> KernargPreloadCount(
     "amdgpu-kernarg-preload-count",
     cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
 
+static cl::opt<bool>
+    EnableKernargPreload("amdgpu-kernarg-preload",
+                         cl::desc("Enable preload kernel arguments to SGPRs"),
+                         cl::init(true));
+
 namespace {
 
 class AMDGPUPreloadKernelArgumentsLegacy : public ModulePass {
@@ -275,6 +280,9 @@ AMDGPUPreloadKernelArgumentsLegacy::AMDGPUPreloadKernelArgumentsLegacy(
     : ModulePass(ID), TM(TM) {}
 
 static bool markKernelArgsAsInreg(Module &M, const TargetMachine &TM) {
+  if (!EnableKernargPreload)
+    return false;
+
   SmallVector<Function *, 4> FunctionsToErase;
   bool Changed = false;
   for (auto &F : M) {
diff --git a/llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll
new file mode 100644
index 0000000000000..75aaec6f1fa70
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=amdgpu-preload-kernel-arguments -amdgpu-kernarg-preload=0 %s -o - | FileCheck -check-prefix=NO-PRELOAD %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=amdgpu-preload-kernel-arguments %s -o - | FileCheck -check-prefix=DEFAULT-PRELOAD %s
+
+ at g1 = protected addrspace(1) externally_initialized global i16 0, align 2
+
+define amdgpu_kernel void @test_kernel_with_zero_kernel_arg() {
+; NO-PRELOAD-LABEL: define amdgpu_kernel void @test_kernel_with_zero_kernel_arg(
+; NO-PRELOAD-SAME: ) #[[ATTR0:[0-9]+]] {
+; NO-PRELOAD-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; NO-PRELOAD-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
+; NO-PRELOAD-NEXT:    [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP]], align 2
+; NO-PRELOAD-NEXT:    store i16 [[GROUP_SIZE_X]], ptr addrspace(1) @g1, align 2
+; NO-PRELOAD-NEXT:    ret void
+;
+; DEFAULT-PRELOAD-LABEL: define amdgpu_kernel void @test_kernel_with_zero_kernel_arg(
+; DEFAULT-PRELOAD-SAME: i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_X:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_Y:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_Z:%.*]], i16 inreg "amdgpu-hidden-argument" [[_HIDDEN_GROUP_SIZE_X:%.*]]) #[[ATTR0:[0-9]+]] {
+; DEFAULT-PRELOAD-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; DEFAULT-PRELOAD-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
+; DEFAULT-PRELOAD-NEXT:    [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP]], align 2
+; DEFAULT-PRELOAD-NEXT:    store i16 [[_HIDDEN_GROUP_SIZE_X]], ptr addrspace(1) @g1, align 2
+; DEFAULT-PRELOAD-NEXT:    ret void
+;
+  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12
+  %group_size_x = load i16, ptr addrspace(4) %gep
+  store i16 %group_size_x, ptr addrspace(1) @g1
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/153975


More information about the llvm-commits mailing list