[llvm] [AMDGPU] Enable serializing of allocated preload kernarg SGPRs info (PR #168374)

via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 22 12:58:44 PST 2025


https://github.com/tyb0807 updated https://github.com/llvm/llvm-project/pull/168374

>From 63f624e85c46de437f018035e295ca336e3079ce Mon Sep 17 00:00:00 2001
From: tyb0807 <sontuan.vu at amd.com>
Date: Mon, 17 Nov 2025 08:11:40 -0600
Subject: [PATCH 1/4] [AMDGPU] Enable serializing of allocated preload kernarg
 SGPRs info

- Support serialization of the number of allocated preload kernarg SGPRs
- Support serialization of the first preload kernarg SGPR allocated

Together they enable reconstructing correctly MIR with preload kernarg
SGPRs.
---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  29 +++++
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   |  20 +++-
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |   5 +
 .../AMDGPU/long-branch-reg-all-sgpr-used.ll   |   2 +
 .../AMDGPU/machine-function-info-after-pei.ll |   1 +
 ...ine-function-info-long-branch-reg-debug.ll |   1 +
 .../machine-function-info-long-branch-reg.ll  |   1 +
 .../AMDGPU/machine-function-info-no-ir.mir    |   4 +
 .../MIR/AMDGPU/machine-function-info.ll       |   4 +
 .../CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll | 108 ++++++++++++++++++
 10 files changed, 173 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0346580ffa684..f0da640b08a01 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2027,6 +2027,35 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
                              MFI->ArgInfo.WorkItemIDZ, 0, 0)))
     return true;
 
+  // Parse FirstKernArgPreloadReg separately, since it's a Register,
+  // not ArgDescriptor.
+  if (YamlMFI.ArgInfo && YamlMFI.ArgInfo->FirstKernArgPreloadReg) {
+    const auto &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg;
+
+    if (!A.IsRegister) {
+      const MemoryBuffer &Buffer =
+          *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
+      Error =
+          SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 0,
+                       SourceMgr::DK_Error,
+                       "firstKernArgPreloadReg must be a register", "", {}, {});
+      return true;
+    }
+
+    Register Reg;
+    if (parseNamedRegisterReference(PFS, Reg, A.RegisterName.Value, Error)) {
+      SourceRange = A.RegisterName.SourceRange;
+      return true;
+    }
+
+    if (!AMDGPU::SGPR_32RegClass.contains(Reg))
+      return diagnoseRegisterClass(A.RegisterName);
+
+    MFI->ArgInfo.FirstKernArgPreloadReg = Reg;
+
+    MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs;
+  }
+
   if (ST.hasIEEEMode())
     MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
   if (ST.hasDX10ClampMode())
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index b398db4f7caff..a716a99ee98e6 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -696,7 +696,6 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
     return true;
   };
 
-  // TODO: Need to serialize kernarg preloads.
   bool Any = false;
   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
@@ -718,6 +717,20 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
 
+  // Write FirstKernArgPreloadReg separately, since it's a Register,
+  // not ArgDescriptor.
+  if (ArgInfo.FirstKernArgPreloadReg) {
+    Register Reg = ArgInfo.FirstKernArgPreloadReg;
+    if (Reg.isPhysical()) {
+      yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
+      raw_string_ostream OS(SA.RegisterName.Value);
+      OS << printReg(Reg, &TRI);
+
+      AI.FirstKernArgPreloadReg = SA;
+      Any = true;
+    }
+  }
+
   if (Any)
     return AI;
 
@@ -750,7 +763,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
       Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()),
       IsWholeWaveFunction(MFI.isWholeWaveFunction()),
       DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()),
-      ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) {
+      ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()),
+      NumKernargPreloadSGPRs(MFI.getNumKernargPreloadedSGPRs()) {
   for (Register Reg : MFI.getSGPRSpillPhysVGPRs())
     SpillPhysVGPRS.push_back(regToString(Reg, TRI));
 
@@ -799,6 +813,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
   ReturnsVoid = YamlMFI.ReturnsVoid;
   IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction;
 
+  UserSGPRInfo.allocKernargPreloadSGPRs(YamlMFI.NumKernargPreloadSGPRs);
+
   if (YamlMFI.ScavengeFI) {
     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
     if (!FIOrErr) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ca3c35067a923..d901f4c216551 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -170,6 +170,7 @@ struct SIArgumentInfo {
   std::optional<SIArgument> DispatchID;
   std::optional<SIArgument> FlatScratchInit;
   std::optional<SIArgument> PrivateSegmentSize;
+  std::optional<SIArgument> FirstKernArgPreloadReg;
 
   std::optional<SIArgument> WorkGroupIDX;
   std::optional<SIArgument> WorkGroupIDY;
@@ -195,6 +196,7 @@ template <> struct MappingTraits<SIArgumentInfo> {
     YamlIO.mapOptional("dispatchID", AI.DispatchID);
     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
+    YamlIO.mapOptional("firstKernArgPreloadReg", AI.FirstKernArgPreloadReg);
 
     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
@@ -305,6 +307,8 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
   unsigned DynamicVGPRBlockSize = 0;
   unsigned ScratchReservedForDynamicVGPRs = 0;
 
+  unsigned NumKernargPreloadSGPRs = 0;
+
   SIMachineFunctionInfo() = default;
   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
                         const TargetRegisterInfo &TRI,
@@ -361,6 +365,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
     YamlIO.mapOptional("dynamicVGPRBlockSize", MFI.DynamicVGPRBlockSize, false);
     YamlIO.mapOptional("scratchReservedForDynamicVGPRs",
                        MFI.ScratchReservedForDynamicVGPRs, 0);
+    YamlIO.mapOptional("numKernargPreloadSGPRs", MFI.NumKernargPreloadSGPRs, 0);
     YamlIO.mapOptional("isWholeWaveFunction", MFI.IsWholeWaveFunction, false);
   }
 };
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index ed8bc9ca700a8..d1fae8ae92a2a 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -48,6 +48,7 @@
 ; CHECK-NEXT:   hasInitWholeWave: false
 ; CHECK-NEXT:   dynamicVGPRBlockSize: 0
 ; CHECK-NEXT:   scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT:   numKernargPreloadSGPRs: 0
 ; CHECK-NEXT:   isWholeWaveFunction: false
 ; CHECK-NEXT: body:
   define amdgpu_kernel void @long_branch_used_all_sgprs(ptr addrspace(1) %arg, i32 %cnd) #0 {
@@ -320,6 +321,7 @@
 ; CHECK-NEXT:   hasInitWholeWave: false
 ; CHECK-NEXT:   dynamicVGPRBlockSize: 0
 ; CHECK-NEXT:   scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT:   numKernargPreloadSGPRs: 0
 ; CHECK-NEXT:   isWholeWaveFunction: false
 ; CHECK-NEXT: body:
   define amdgpu_kernel void @long_branch_high_num_sgprs_used(ptr addrspace(1) %arg, i32 %cnd) #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index 68c3d1b2f2972..3e4eaf0a3cd98 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -48,6 +48,7 @@
 ; AFTER-PEI-NEXT: hasInitWholeWave: false
 ; AFTER-PEI-NEXT: dynamicVGPRBlockSize: 0
 ; AFTER-PEI-NEXT: scratchReservedForDynamicVGPRs: 0
+; AFTER-PEI-NEXT: numKernargPreloadSGPRs: 0
 ; AFTER-PEI-NEXT: isWholeWaveFunction: false
 ; AFTER-PEI-NEXT: body:
 define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 55598ec70d953..2d820102e8706 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -48,6 +48,7 @@
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
   define amdgpu_kernel void @uniform_long_forward_branch_debug(ptr addrspace(1) %arg, i32 %arg1) #0 !dbg !5 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 2326b2dc09b58..c949a3d94c6a3 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -48,6 +48,7 @@
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 0cb9bc095bc50..87c3eb626ef0d 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -57,6 +57,7 @@
 # FULL-NEXT:  hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
@@ -167,6 +168,7 @@ body:             |
 # FULL-NEXT: hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
@@ -248,6 +250,7 @@ body:             |
 # FULL-NEXT: hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
@@ -330,6 +333,7 @@ body:             |
 # FULL-NEXT: hasInitWholeWave: false
 # FULL-NEXT: dynamicVGPRBlockSize: 0
 # FULL-NEXT: scratchReservedForDynamicVGPRs: 0
+# FULL-NEXT: numKernargPreloadSGPRs: 0
 # FULL-NEXT: isWholeWaveFunction: false
 # FULL-NEXT: body:
 
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index ab4383b675243..ab3c0335f8ea9 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -58,6 +58,7 @@
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
@@ -110,6 +111,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
@@ -186,6 +188,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define void @function() {
@@ -244,6 +247,7 @@ define void @function() {
 ; CHECK-NEXT: hasInitWholeWave: false
 ; CHECK-NEXT: dynamicVGPRBlockSize: 0
 ; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
+; CHECK-NEXT: numKernargPreloadSGPRs: 0
 ; CHECK-NEXT: isWholeWaveFunction: false
 ; CHECK-NEXT: body:
 define void @function_nsz() #0 {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
new file mode 100644
index 0000000000000..de9a268805995
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
@@ -0,0 +1,108 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -stop-after=amdgpu-isel %s -o - | FileCheck --check-prefix=MIR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -stop-after=amdgpu-isel -o %t.mir %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %t.mir -o - | FileCheck --check-prefix=ASM %s
+
+; Test that kernarg preloading information is correctly serialized to MIR and
+; can be round-tripped through MIR serialization/deserialization.
+
+; MIR-LABEL: name: kernarg_preload_single_arg
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 1
+
+; ASM-LABEL: kernarg_preload_single_arg:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_single_arg(i32 inreg %arg0) {
+entry:
+  %val = add i32 %arg0, 1
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_multiple_args_unaligned
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 5
+
+; ASM-LABEL: kernarg_preload_multiple_args_unaligned:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 5
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_multiple_args_unaligned(i32 inreg %arg0, i64 inreg %arg1, i32 inreg %arg2) {
+entry:
+  %val = add i32 %arg0, %arg2
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_multiple_args_aligned
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 4
+
+; ASM-LABEL: kernarg_preload_multiple_args_aligned:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 4
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_multiple_args_aligned(i64 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2) {
+entry:
+  %val = add i32 %arg1, %arg2
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_with_ptr
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_with_ptr:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+; ASM: .amdhsa_user_sgpr_kernarg_preload_offset 0
+define amdgpu_kernel void @kernarg_preload_with_ptr(ptr inreg %ptr) {
+entry:
+  %val = load i32, ptr %ptr
+  %add = add i32 %val, 1
+  store i32 %add, ptr %ptr
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_no_preload
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR-NOT: firstKernArgPreloadReg
+; MIR: numKernargPreloadSGPRs: 0
+
+; ASM-LABEL: kernarg_no_preload:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 0
+define amdgpu_kernel void @kernarg_no_preload(i32 %arg0) {
+entry:
+  %val = add i32 %arg0, 1
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_mixed
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_mixed:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+define amdgpu_kernel void @kernarg_preload_mixed(i32 inreg %arg0, i32 inreg %arg1, i32 %arg2) {
+entry:
+  %val = add i32 %arg0, %arg1
+  %val2 = add i32 %val, %arg2
+  store i32 %val2, ptr addrspace(1) null
+  ret void
+}

>From 13a6ac79260dac26ce17a0a017e0383a1f8e8fb1 Mon Sep 17 00:00:00 2001
From: tyb0807 <sontuan.vu at amd.com>
Date: Wed, 19 Nov 2025 17:31:51 -0600
Subject: [PATCH 2/4] Address comments

---
 .../CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll | 146 ++++++++++++++++++
 1 file changed, 146 insertions(+)

diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
index de9a268805995..d96dcfc52e1b0 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-mfi.ll
@@ -106,3 +106,149 @@ entry:
   store i32 %val2, ptr addrspace(1) null
   ret void
 }
+
+; MIR-LABEL: name: kernarg_preload_with_dispatch_ptr
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: dispatchPtr: { reg: '$sgpr0_sgpr1' }
+; MIR: kernargSegmentPtr: { reg: '$sgpr2_sgpr3' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr4' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_with_dispatch_ptr:
+; ASM: .amdhsa_user_sgpr_dispatch_ptr 1
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+
+define amdgpu_kernel void @kernarg_preload_with_dispatch_ptr(i64 inreg %arg0) #0 {
+entry:
+  %val = add i64 %arg0, 1
+  store i64 %val, ptr addrspace(1) null
+  ret void
+}
+
+attributes #0 = { "amdgpu-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-dispatch-id" }
+
+; MIR-LABEL: name: kernarg_preload_with_queue_ptr
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: queuePtr: { reg: '$sgpr0_sgpr1' }
+; MIR: kernargSegmentPtr: { reg: '$sgpr2_sgpr3' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr4' }
+; MIR: numKernargPreloadSGPRs: 1
+
+; ASM-LABEL: kernarg_preload_with_queue_ptr:
+; ASM: .amdhsa_user_sgpr_queue_ptr 1
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1
+
+define amdgpu_kernel void @kernarg_preload_with_queue_ptr(i32 inreg %arg0) #1 {
+entry:
+  %val = add i32 %arg0, 1
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+attributes #1 = { "amdgpu-queue-ptr" "amdgpu-no-dispatch-ptr" "amdgpu-no-dispatch-id" }
+
+; MIR-LABEL: name: kernarg_preload_with_multiple_user_sgprs
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: dispatchPtr: { reg: '$sgpr0_sgpr1' }
+; MIR: queuePtr: { reg: '$sgpr2_sgpr3' }
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: dispatchID: { reg: '$sgpr6_sgpr7' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_with_multiple_user_sgprs:
+; ASM: .amdhsa_user_sgpr_dispatch_ptr 1
+; ASM: .amdhsa_user_sgpr_queue_ptr 1
+; ASM: .amdhsa_user_sgpr_dispatch_id 1
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+
+define amdgpu_kernel void @kernarg_preload_with_multiple_user_sgprs(i64 inreg %arg0) #5 {
+entry:
+  %val = add i64 %arg0, 1
+  store i64 %val, ptr addrspace(1) null
+  ret void
+}
+
+attributes #2 = { "amdgpu-dispatch-ptr" "amdgpu-queue-ptr" "amdgpu-dispatch-id" }
+
+; MIR-LABEL: name: kernarg_preload_without_user_sgprs
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: kernargSegmentPtr: { reg: '$sgpr0_sgpr1' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr2' }
+; MIR: numKernargPreloadSGPRs: 1
+
+; ASM-LABEL: kernarg_preload_without_user_sgprs:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 1
+
+define amdgpu_kernel void @kernarg_preload_without_user_sgprs(i32 inreg %arg0) #3 {
+entry:
+  %val = add i32 %arg0, 1
+  store i32 %val, ptr addrspace(1) null
+  ret void
+}
+
+attributes #3 = { "amdgpu-no-queue-ptr" "amdgpu-no-dispatch-ptr" "amdgpu-no-dispatch-id" }
+
+; MIR-LABEL: name: kernarg_preload_max_args
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: dispatchPtr: { reg: '$sgpr0_sgpr1' }
+; MIR: queuePtr: { reg: '$sgpr2_sgpr3' }
+; MIR: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+; MIR: dispatchID: { reg: '$sgpr6_sgpr7' }
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 8
+
+; ASM-LABEL: kernarg_preload_max_args:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 8
+
+define amdgpu_kernel void @kernarg_preload_max_args(
+    i32 inreg %a0, i32 inreg %a1, i32 inreg %a2, i32 inreg %a3,
+    i32 inreg %a4, i32 inreg %a5, i32 inreg %a6, i32 inreg %a7,
+    i32 inreg %a8, i32 inreg %a9, i32 inreg %a10, i32 inreg %a11,
+    i32 inreg %a12, i32 inreg %a13, i32 inreg %a14, i32 inreg %a15) {
+entry:
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_mixed_inreg_and_stack
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 2
+
+; ASM-LABEL: kernarg_preload_mixed_inreg_and_stack:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 2
+
+define amdgpu_kernel void @kernarg_preload_mixed_inreg_and_stack(
+    i32 inreg %preload0,
+    i32 inreg %preload1,
+    i32 %stack0,
+    i32 %stack1) {
+entry:
+  %val = add i32 %preload0, %preload1
+  %val2 = add i32 %val, %stack0
+  %val3 = add i32 %val2, %stack1
+  store i32 %val3, ptr addrspace(1) null
+  ret void
+}
+
+; MIR-LABEL: name: kernarg_preload_vector_types
+; MIR: machineFunctionInfo:
+; MIR: argumentInfo:
+; MIR: firstKernArgPreloadReg: { reg: '$sgpr8' }
+; MIR: numKernargPreloadSGPRs: 4
+
+; ASM-LABEL: kernarg_preload_vector_types:
+; ASM: .amdhsa_user_sgpr_kernarg_preload_length 4
+
+define amdgpu_kernel void @kernarg_preload_vector_types(<4 x i32> inreg %vec) {
+entry:
+  %elem = extractelement <4 x i32> %vec, i32 0
+  store i32 %elem, ptr addrspace(1) null
+  ret void
+}

>From d6b6f6aa2afcafcbff7ab533be120797a7377b08 Mon Sep 17 00:00:00 2001
From: tyb0807 <sontuan.vu at amd.com>
Date: Thu, 20 Nov 2025 17:36:27 -0600
Subject: [PATCH 3/4] More tests + address comments

---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 19 +++++++++++++------
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   | 14 +++++++-------
 ...d-kernarg-invalid-register-class-error.mir | 18 ++++++++++++++++++
 ...ad-kernarg-invalid-register-name-error.mir | 19 +++++++++++++++++++
 .../preload-kernarg-stack-type-error.mir      | 18 ++++++++++++++++++
 5 files changed, 75 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir
 create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir
 create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f0da640b08a01..413b4bc5c4843 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2030,15 +2030,23 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
   // Parse FirstKernArgPreloadReg separately, since it's a Register,
   // not ArgDescriptor.
   if (YamlMFI.ArgInfo && YamlMFI.ArgInfo->FirstKernArgPreloadReg) {
-    const auto &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg;
+    const yaml::SIArgument &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg;
 
     if (!A.IsRegister) {
+      // For stack arguments, we don't have RegisterName.SourceRange,
+      // but we should have some location info from the YAML parser
       const MemoryBuffer &Buffer =
           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
-      Error =
-          SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 0,
-                       SourceMgr::DK_Error,
-                       "firstKernArgPreloadReg must be a register", "", {}, {});
+      // Create a minimal valid source range
+      SMLoc Loc = SMLoc::getFromPointer(Buffer.getBufferStart());
+      SMRange Range(Loc, Loc);
+      
+      Error = SMDiagnostic(*PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0,
+                           SourceMgr::DK_Error,
+                           "firstKernArgPreloadReg must be a register, not a stack location",
+                           "", {}, {});
+      
+      SourceRange = Range;
       return true;
     }
 
@@ -2052,7 +2060,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
       return diagnoseRegisterClass(A.RegisterName);
 
     MFI->ArgInfo.FirstKernArgPreloadReg = Reg;
-
     MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a716a99ee98e6..33e3b18a43a12 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -721,14 +721,14 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
   // not ArgDescriptor.
   if (ArgInfo.FirstKernArgPreloadReg) {
     Register Reg = ArgInfo.FirstKernArgPreloadReg;
-    if (Reg.isPhysical()) {
-      yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
-      raw_string_ostream OS(SA.RegisterName.Value);
-      OS << printReg(Reg, &TRI);
+    assert(Reg.isPhysical() && "FirstKernArgPreloadReg must be a physical register");
 
-      AI.FirstKernArgPreloadReg = SA;
-      Any = true;
-    }
+    yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
+    raw_string_ostream OS(SA.RegisterName.Value);
+    OS << printReg(Reg, &TRI);
+
+    AI.FirstKernArgPreloadReg = SA;
+    Any = true;
   }
 
   if (Any)
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir
new file mode 100644
index 0000000000000..10391dbaa9ba8
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+# CHECK: error: {{.*}} incorrect register class for field
+name: kernarg_preload_wrong_register_class
+tracksRegLiveness: true
+machineFunctionInfo:
+  explicitKernArgSize: 4
+  maxKernArgAlign: 4
+  numKernargPreloadSGPRs: 1
+  isEntryFunction: true
+  argumentInfo:
+    kernargSegmentPtr: { reg: '$sgpr0_sgpr1' }
+    firstKernArgPreloadReg: { reg: '$vgpr0' }  # ERROR: VGPR instead of SGPR
+body: |
+  bb.0:
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir
new file mode 100644
index 0000000000000..dbf739c41003b
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir
@@ -0,0 +1,19 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+# CHECK: Invalid register name
+name: kernarg_preload_invalid_register_name
+tracksRegLiveness: true
+machineFunctionInfo:
+  explicitKernArgSize: 4
+  maxKernArgAlign: 4
+  numKernargPreloadSGPRs: 1
+  isEntryFunction: true
+  argumentInfo:
+    kernargSegmentPtr: { reg: '$sgpr0_sgpr1' }
+    firstKernArgPreloadReg: { reg: '$invalid_reg' }  # ERROR: Invalid register name
+body: |
+  bb.0:
+    S_ENDPGM 0
+...
+
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir
new file mode 100644
index 0000000000000..de2d8f586c2c1
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+# CHECK: error: {{.*}} firstKernArgPreloadReg must be a register, not a stack location
+name: kernarg_preload_stack_argument
+tracksRegLiveness: true
+machineFunctionInfo:
+  explicitKernArgSize: 4
+  maxKernArgAlign: 4
+  numKernargPreloadSGPRs: 1
+  isEntryFunction: true
+  argumentInfo:
+    kernargSegmentPtr: { reg: '$sgpr0_sgpr1' }
+    firstKernArgPreloadReg: { offset: 0 }  # ERROR: Stack instead of register
+body: |
+  bb.0:
+    S_ENDPGM 0
+...

>From 227220351cbd1945a4bb04cea95816cdbdaf068a Mon Sep 17 00:00:00 2001
From: tyb0807 <sontuan.vu at amd.com>
Date: Sat, 22 Nov 2025 14:58:32 -0600
Subject: [PATCH 4/4] Format + review comments

---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp       | 12 ++++++------
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp     |  3 ++-
 .../preload-kernarg-invalid-register-class-error.mir |  2 +-
 .../preload-kernarg-invalid-register-name-error.mir  |  2 +-
 .../MIR/AMDGPU/preload-kernarg-stack-type-error.mir  |  2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 413b4bc5c4843..05e52d043c3b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2040,12 +2040,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
       // Create a minimal valid source range
       SMLoc Loc = SMLoc::getFromPointer(Buffer.getBufferStart());
       SMRange Range(Loc, Loc);
-      
-      Error = SMDiagnostic(*PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0,
-                           SourceMgr::DK_Error,
-                           "firstKernArgPreloadReg must be a register, not a stack location",
-                           "", {}, {});
-      
+
+      Error = SMDiagnostic(
+          *PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0, SourceMgr::DK_Error,
+          "firstKernArgPreloadReg must be a register, not a stack location", "",
+          {}, {});
+
       SourceRange = Range;
       return true;
     }
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 33e3b18a43a12..9abda275d7e42 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -721,7 +721,8 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
   // not ArgDescriptor.
   if (ArgInfo.FirstKernArgPreloadReg) {
     Register Reg = ArgInfo.FirstKernArgPreloadReg;
-    assert(Reg.isPhysical() && "FirstKernArgPreloadReg must be a physical register");
+    assert(Reg.isPhysical() &&
+           "FirstKernArgPreloadReg must be a physical register");
 
     yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
     raw_string_ostream OS(SA.RegisterName.Value);
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir
index 10391dbaa9ba8..e7c1740711952 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-class-error.mir
@@ -1,4 +1,4 @@
-# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s
 
 ---
 # CHECK: error: {{.*}} incorrect register class for field
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir
index dbf739c41003b..c74a437b5221c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-invalid-register-name-error.mir
@@ -1,4 +1,4 @@
-# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s
 
 ---
 # CHECK: Invalid register name
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir
index de2d8f586c2c1..19076044170fd 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/preload-kernarg-stack-type-error.mir
@@ -1,4 +1,4 @@
-# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -start-after=amdgpu-isel -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s
 
 ---
 # CHECK: error: {{.*}} firstKernArgPreloadReg must be a register, not a stack location



More information about the llvm-commits mailing list