[llvm] AMDGPU: Add amdgpu-num-agpr attribute to control AGPR allocation (PR #128034)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 03:33:57 PST 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/128034

>From 288902beb48d921149898b6a15d3097b8d2450e9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 19 Feb 2025 21:59:31 +0700
Subject: [PATCH 1/2] AMDGPU: Add amdgpu-num-agpr attribute to control AGPR
 allocation

This provides a range to decide how to subdivide the vector register
budget on gfx90a+. A single value declares the minimum AGPRs that
should be allocatable. Eventually this should replace amdgpu-no-agpr.

I want this primarily for testing agpr allocation behavior. We should
have a heuristic try to detect a reasonable number of AGPRs to keep
allocatable.
---
 llvm/docs/AMDGPUUsage.rst                   |  16 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp   |  56 ++-
 llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll | 508 ++++++++++++++++++++
 3 files changed, 569 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 9932074830866..d3feebe73d16a 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1707,6 +1707,22 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                                       as hidden. Hidden arguments are managed by the compiler and are not part of
                                                       the explicit arguments supplied by the user.
 
+     "amdgpu-num-agpr"="min(,max)"                    Indicates a minimum and maximum range for the number of AGPRs to make
+                                                      available to allocate. The values will be rounded up to the next multiple
+                                                      of the allocation granularity (4). The minimum value is interpreted as the
+                                                      minimum number of AGPRs the function will require to allocate. If only one
+                                                      value is specified, it is interpreted as the minimum register budget.
+
+                                                      The values may be ignored if satisfying it would violate other allocation
+                                                      constraints.
+
+                                                      The behavior is undefined if a function which requires more AGPRs than the
+                                                      lower bound is reached through any function marked with a higher value of this
+                                                      attribute. A minimum value of 0 indicates the function does not require
+                                                      any AGPRs. A minimum of 0 is equivalent to "amdgpu-no-agpr".
+
+                                                      This is only relevant on targets with AGPRs which support accum_offset (gfx90a+).
+
      "amdgpu-sgpr-hazard-wait"                        Disabled SGPR hazard wait insertion if set to 0.
                                                       Exists for testing performance impact of SGPR hazard waits only.
 
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 924aa45559366..de1f31634d27a 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -572,9 +572,10 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
 std::pair<unsigned, unsigned>
 SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
-  unsigned MaxNumAGPRs = MaxNumVGPRs;
-  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+  const unsigned MaxVectorRegs = ST.getMaxNumVGPRs(MF);
+
+  unsigned MaxNumVGPRs = MaxVectorRegs;
+  unsigned MaxNumAGPRs = 0;
 
   // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
   // a wave may have up to 512 total vector registers combining together both
@@ -585,16 +586,49 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
   // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
   //       register file accordingly.
   if (ST.hasGFX90AInsts()) {
-    if (MFI->mayNeedAGPRs()) {
-      MaxNumVGPRs /= 2;
-      MaxNumAGPRs = MaxNumVGPRs;
+    unsigned MinNumAGPRs = 0;
+    const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
+    const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+    const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
+
+    // TODO: Replace amdgpu-no-agpr with amdgpu-num-agpr=0
+    // TODO: Move this logic into subtarget on IR function
+    //
+    // TODO: The lower bound should probably force the number of required
+    // registers up, overriding amdgpu-waves-per-eu.
+    std::tie(MinNumAGPRs, MaxNumAGPRs) = AMDGPU::getIntegerPairAttribute(
+        MF.getFunction(), "amdgpu-num-agpr", DefaultNumAGPR,
+        /*OnlyFirstRequired=*/true);
+
+    if (MinNumAGPRs == DefaultNumAGPR.first) {
+      // Default to splitting half the registers if AGPRs are required.
+
+      if (MFI->mayNeedAGPRs())
+        MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
+      else
+        MinNumAGPRs = 0;
     } else {
-      if (MaxNumVGPRs > TotalNumVGPRs) {
-        MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
-        MaxNumVGPRs = TotalNumVGPRs;
-      } else
-        MaxNumAGPRs = 0;
+      // Align to accum_offset's allocation granularity.
+      MinNumAGPRs = alignTo(MinNumAGPRs, 4);
+
+      MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
     }
+
+    // Clamp values to be inbounds of our limits, and ensure min <= max.
+
+    MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
+    MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
+
+    MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
+    MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
+
+    assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
+           MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
+           "invalid register counts");
+  } else if (ST.hasMAIInsts()) {
+    // On gfx908 the number of AGPRs always equals the number of VGPRs.
+    MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
   }
 
   return std::pair(MaxNumVGPRs, MaxNumAGPRs);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll
new file mode 100644
index 0000000000000..8cad42eae37cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-num-agpr.ll
@@ -0,0 +1,508 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck --implicit-check-not=warning -check-prefix=WARN %s
+
+; Check the effect that amdgpu-num-agpr has on register reservations.
+;
+; Asm clobbers will print a warning when they clobber reserved
+; registers, and should be uniquely identified in the message from the
+; !srcloc values.
+
+; The occupancy target warnings should be a side effect of violating
+; the register budget with asm.
+
+; WARN: warning: inline asm clobber list contains reserved registers: a0 at line 1
+define amdgpu_kernel void @min_num_agpr_0_0__amdgpu_no_agpr() #0 {
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 1}
+  ret void
+}
+
+attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="0,0" "amdgpu-no-agpr" }
+
+; Check parse of single entry 0
+
+; WARN: warning: inline asm clobber list contains reserved registers: a0 at line 2
+define amdgpu_kernel void @min_num_agpr_0__amdgpu_no_agpr() #1 {
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 2}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 2}
+  ret void
+}
+
+attributes #1 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="0" "amdgpu-no-agpr" }
+
+
+; Undefined use
+define amdgpu_kernel void @min_num_agpr_1_1__amdgpu_no_agpr() #2 {
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 3}
+  ret void
+}
+
+attributes #2 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="1,1" "amdgpu-no-agpr" }
+
+; Check parse of single entry 4, interpreted as the minimum. Total budget is 64.
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_4__amdgpu_no_agpr': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: v60 at line 4
+define amdgpu_kernel void @min_num_agpr_4__amdgpu_no_agpr() #3 {
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 4}
+  call void asm sideeffect "; clobber $0","~{a3}"(), !srcloc !{i32 4}
+  call void asm sideeffect "; clobber $0","~{v59}"(), !srcloc !{i32 4}
+  call void asm sideeffect "; clobber $0","~{v60}"(), !srcloc !{i32 4}
+  ret void
+}
+
+attributes #3 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="4" "amdgpu-no-agpr" }
+
+
+; Allocation granularity requires rounding this to use 4 AGPRs, so the
+; top 4 VGPRs are unavailable. The maximum agpr count is also padded
+; up to the minimum of 4
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ8_1_1': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: a4 at line 5
+; WARN: warning: inline asm clobber list contains reserved registers: v60 at line 5
+; WARN: warning: inline asm clobber list contains reserved registers: v63 at line 5
+define amdgpu_kernel void @min_num_agpr_occ8_1_1() #4 {
+  call void asm sideeffect "; clobber $0","~{a3}"(), !srcloc !{i32 5}
+  call void asm sideeffect "; clobber $0","~{a4}"(), !srcloc !{i32 5}
+  call void asm sideeffect "; clobber $0","~{v59}"(), !srcloc !{i32 5}
+  call void asm sideeffect "; clobber $0","~{v60}"(), !srcloc !{i32 5}
+  call void asm sideeffect "; clobber $0","~{v63}"(), !srcloc !{i32 5}
+  ret void
+}
+
+attributes #4 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="1,1" }
+
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_64_64__amdgpu_no_agpr': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 6
+define amdgpu_kernel void @min_num_agpr_64_64__amdgpu_no_agpr() #5 {
+  call void asm sideeffect "; clobber $0","~{a63}"(), !srcloc !{i32 6}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 6}
+  ret void
+}
+
+attributes #5 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="64,64" "amdgpu-no-agpr" }
+
+; No free VGPRs
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 7
+define amdgpu_kernel void @min_num_agpr_64_64() #6 {
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 7}
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 7}
+  ret void
+}
+
+attributes #6 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="64,64" }
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_63_64': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 8
+; WARN: warning: inline asm clobber list contains reserved registers: v3 at line 8
+define amdgpu_kernel void @min_num_agpr_63_64() #7 {
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 8}
+  call void asm sideeffect "; clobber $0","~{v3}"(), !srcloc !{i32 8}
+  call void asm sideeffect "; clobber $0","~{a59}"(), !srcloc !{i32 8}
+  call void asm sideeffect "; clobber $0","~{a60}"(), !srcloc !{i32 8}
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 8}
+  ret void
+}
+
+attributes #7 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="63,64" }
+
+
+; No-op value.
+define amdgpu_kernel void @min_num_agpr_occ8_0_64() #8 {
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 9}
+  call void asm sideeffect "; clobber $0","~{v59}"(), !srcloc !{i32 9}
+  ret void
+}
+
+attributes #8 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="0,64" }
+
+
+; Register budget 64
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ8_11_59': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: a12 at line 10
+; WARN: warning: inline asm clobber list contains reserved registers: v52 at line 10
+define amdgpu_kernel void @min_num_agpr_occ8_11_59() #9 {
+  call void asm sideeffect "; clobber $0","~{a11}"(), !srcloc !{i32 10}
+  call void asm sideeffect "; clobber $0","~{a12}"(), !srcloc !{i32 10}
+  call void asm sideeffect "; clobber $0","~{v51}"(), !srcloc !{i32 10}
+  call void asm sideeffect "; clobber $0","~{v52}"(), !srcloc !{i32 10}
+  ret void
+}
+
+attributes #9 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="11,59" }
+
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ8_12_59': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: a12 at line 11
+; WARN: warning: inline asm clobber list contains reserved registers: v52 at line 11
+define amdgpu_kernel void @min_num_agpr_occ8_12_59() #10 {
+  call void asm sideeffect "; clobber $0","~{a11}"(), !srcloc !{i32 11}
+  call void asm sideeffect "; clobber $0","~{a12}"(), !srcloc !{i32 11}
+  call void asm sideeffect "; clobber $0","~{v51}"(), !srcloc !{i32 11}
+  call void asm sideeffect "; clobber $0","~{v52}"(), !srcloc !{i32 11}
+  ret void
+}
+
+attributes #10 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="12,59" }
+
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ8_12_20': desired occupancy was 8, final occupancy is 7
+; WARN: warning: inline asm clobber list contains reserved registers: a12 at line 12
+; WARN: warning: inline asm clobber list contains reserved registers: v52 at line 12
+define amdgpu_kernel void @min_num_agpr_occ8_12_20() #11 {
+  call void asm sideeffect "; clobber $0","~{a11}"(), !srcloc !{i32 12}
+  call void asm sideeffect "; clobber $0","~{a12}"(), !srcloc !{i32 12}
+  call void asm sideeffect "; clobber $0","~{v51}"(), !srcloc !{i32 12}
+  call void asm sideeffect "; clobber $0","~{v52}"(), !srcloc !{i32 12}
+  ret void
+}
+
+attributes #11 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="12,20" }
+
+
+; WARN: warning: inline asm clobber list contains reserved registers: a20 at line 13
+define amdgpu_kernel void @min_num_agpr_occ1_12_20() #12 {
+  call void asm sideeffect "; clobber $0","~{a12}"(), !srcloc !{i32 13}
+  call void asm sideeffect "; clobber $0","~{a19}"(), !srcloc !{i32 13}
+  call void asm sideeffect "; clobber $0","~{a20}"(), !srcloc !{i32 13}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 13}
+  call void asm sideeffect "; clobber $0","~{v20}"(), !srcloc !{i32 13}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 13}
+  ret void
+}
+
+attributes #12 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="12,20" }
+
+; WARN: warning: inline asm clobber list contains reserved registers: a20 at line 14
+define amdgpu_kernel void @min_num_agpr_occ1_13_20() #13 {
+  call void asm sideeffect "; clobber $0","~{a11}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{a12}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{a13}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{a19}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{a20}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{v51}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{v20}"(), !srcloc !{i32 14}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 14}
+  ret void
+}
+
+attributes #13 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="13,20" }
+
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ2_13_20': desired occupancy was 2, final occupancy is 1
+; WARN: warning: inline asm clobber list contains reserved registers: a16 at line 15
+; WARN: warning: inline asm clobber list contains reserved registers: a20 at line 15
+; WARN: warning: inline asm clobber list contains reserved registers: v240 at line 15
+define amdgpu_kernel void @min_num_agpr_occ2_13_20() #14 {
+  call void asm sideeffect "; clobber $0","~{a15}"(), !srcloc !{i32 15}
+  call void asm sideeffect "; clobber $0","~{a16}"(), !srcloc !{i32 15}
+  call void asm sideeffect "; clobber $0","~{a20}"(), !srcloc !{i32 15}
+
+  call void asm sideeffect "; clobber $0","~{v239}"(), !srcloc !{i32 15}
+  call void asm sideeffect "; clobber $0","~{v240}"(), !srcloc !{i32 15}
+
+  ret void
+}
+
+attributes #14 = { "amdgpu-waves-per-eu"="2,2" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="13,20" }
+
+
+; Test maximum exceeds the hardware limit.
+define amdgpu_kernel void @min_num_agpr_occ1_13_257() #15 {
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 16}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 16}
+  ret void
+}
+
+attributes #15 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="13,257" }
+
+
+; Test min and max exceeds the hardware limit.
+define amdgpu_kernel void @min_num_agpr_occ1_257_257() #16 {
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 17}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 17}
+  ret void
+}
+
+attributes #16 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="257,257" }
+
+
+; Test round up hits the hardware limit
+define amdgpu_kernel void @min_num_agpr_occ1_255_255() #17 {
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 18}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 18}
+  ret void
+}
+
+attributes #17 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="255,255" }
+
+
+; Test round up hits the hardware limit
+define amdgpu_kernel void @min_num_agpr_occ1_253_259() #18 {
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 19}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 19}
+  ret void
+}
+
+attributes #18 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="253,259" }
+
+; With a minimum of 0, we are not required to allocate any AGPRs
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ4_0_64': desired occupancy was 4, final occupancy is 2
+; WARN: warning: inline asm clobber list contains reserved registers: a0 at line 20
+; WARN: warning: inline asm clobber list contains reserved registers: a63 at line 20
+; WARN: warning: inline asm clobber list contains reserved registers: a64 at line 20
+; WARN: warning: inline asm clobber list contains reserved registers: v128 at line 20
+define amdgpu_kernel void @min_num_agpr_occ4_0_64() #19 {
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 20}
+  call void asm sideeffect "; clobber $0","~{a63}"(), !srcloc !{i32 20}
+  call void asm sideeffect "; clobber $0","~{a64}"(), !srcloc !{i32 20}
+  call void asm sideeffect "; clobber $0","~{v63}"(), !srcloc !{i32 20}
+  call void asm sideeffect "; clobber $0","~{v64}"(), !srcloc !{i32 20}
+  call void asm sideeffect "; clobber $0","~{v127}"(), !srcloc !{i32 20}
+  call void asm sideeffect "; clobber $0","~{v128}"(), !srcloc !{i32 20}
+  ret void
+}
+
+attributes #19 = { "amdgpu-waves-per-eu"="4,4" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="0,64" }
+
+
+; With a non-0 minimum, we must allocate at least 4 AGPRs. The rest of
+; the budget is for VGPRs.
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ4_1_64': desired occupancy was 4, final occupancy is 2
+; WARN: warning: inline asm clobber list contains reserved registers: a4 at line 21
+; WARN: warning: inline asm clobber list contains reserved registers: a63 at line 21
+; WARN: warning: inline asm clobber list contains reserved registers: a64 at line 21
+; WARN: warning: inline asm clobber list contains reserved registers: v124 at line 21
+define amdgpu_kernel void @min_num_agpr_occ4_1_64() #20 {
+  call void asm sideeffect "; clobber $0","~{a0}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{a3}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{a4}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{a63}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{a64}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{v63}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{v64}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{v123}"(), !srcloc !{i32 21}
+  call void asm sideeffect "; clobber $0","~{v124}"(), !srcloc !{i32 21}
+  ret void
+}
+
+attributes #20 = { "amdgpu-waves-per-eu"="4,4" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="1,64" }
+
+; 128 vector registers
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ4_32_64': desired occupancy was 4, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a32 at line 22
+; WARN: warning: inline asm clobber list contains reserved registers: v96 at line 22
+define amdgpu_kernel void @min_num_agpr_occ4_32_64() #21 {
+  call void asm sideeffect "; clobber $0","~{a31}"(), !srcloc !{i32 22}
+  call void asm sideeffect "; clobber $0","~{a32}"(), !srcloc !{i32 22}
+  call void asm sideeffect "; clobber $0","~{v95}"(), !srcloc !{i32 22}
+  call void asm sideeffect "; clobber $0","~{v96}"(), !srcloc !{i32 22}
+  ret void
+}
+
+attributes #21 = { "amdgpu-waves-per-eu"="4,4" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="32,64" }
+
+; Evenly partition the 128 vector registers
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ4_64_64': desired occupancy was 4, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a64 at line 23
+; WARN: warning: inline asm clobber list contains reserved registers: v64 at line 23
+define amdgpu_kernel void @min_num_agpr_occ4_64_64() #22 {
+  call void asm sideeffect "; clobber $0","~{a63}"(), !srcloc !{i32 23}
+  call void asm sideeffect "; clobber $0","~{a64}"(), !srcloc !{i32 23}
+  call void asm sideeffect "; clobber $0","~{v63}"(), !srcloc !{i32 23}
+  call void asm sideeffect "; clobber $0","~{v64}"(), !srcloc !{i32 23}
+  ret void
+}
+
+attributes #22 = { "amdgpu-waves-per-eu"="4,4" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="64,64" }
+
+; We are not required to allocate any AGPRs, but they are available
+; with a budget of 512 vector registers. We are artificially limiting
+; to use 64.
+
+; WARN: warning: inline asm clobber list contains reserved registers: a64 at line 24
+define amdgpu_kernel void @min_num_agpr_occ1_0_64() #23 {
+  call void asm sideeffect "; clobber $0","~{a63}"(), !srcloc !{i32 24}
+  call void asm sideeffect "; clobber $0","~{a64}"(), !srcloc !{i32 24}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 24}
+  ret void
+}
+
+attributes #23 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="0,64" }
+
+; WARN: warning: inline asm clobber list contains reserved registers: a68 at line 25
+define amdgpu_kernel void @min_num_agpr_occ1_0_68() #24 {
+  call void asm sideeffect "; clobber $0","~{a67}"(), !srcloc !{i32 25}
+  call void asm sideeffect "; clobber $0","~{a68}"(), !srcloc !{i32 25}
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 25}
+  ret void
+}
+
+attributes #24 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-agpr"="0,68" }
+
+
+; The total vector register budget is 128, claim more than that for
+; the minimum AGPRs. This checks for an assertion.
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ10__min_agpr_129': desired occupancy was 8, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a128 at line 26
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 26
+define amdgpu_kernel void @min_num_agpr_occ10__min_agpr_129() #25 {
+  call void asm sideeffect "; clobber $0","~{a127}"(), !srcloc !{i32 26}
+  call void asm sideeffect "; clobber $0","~{a128}"(), !srcloc !{i32 26}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 26}
+  ret void
+}
+
+attributes #25 = { "amdgpu-waves-per-eu"="8,10" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="129" }
+
+; Check for another assertion, request beyond the budget.
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ10__min_agpr_129_129': desired occupancy was 8, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a128 at line 27
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 27
+define amdgpu_kernel void @min_num_agpr_occ10__min_agpr_129_129() #26 {
+  call void asm sideeffect "; clobber $0","~{a127}"(), !srcloc !{i32 27}
+  call void asm sideeffect "; clobber $0","~{a128}"(), !srcloc !{i32 27}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 27}
+  ret void
+}
+
+attributes #26 = { "amdgpu-waves-per-eu"="8,10" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="129,129" }
+
+; The total vector register budget is 128, claim all of it for AGPRs.
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ10__min_agpr_128': desired occupancy was 8, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a128 at line 28
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 28
+
+define amdgpu_kernel void @min_num_agpr_occ10__min_agpr_128() #27 {
+  call void asm sideeffect "; clobber $0","~{a127}"(), !srcloc !{i32 28}
+  call void asm sideeffect "; clobber $0","~{a128}"(), !srcloc !{i32 28}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 28}
+  ret void
+}
+
+attributes #27 = { "amdgpu-waves-per-eu"="8,10" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="128" }
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ10__min_agpr_257': desired occupancy was 8, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a128 at line 29
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 29
+define amdgpu_kernel void @min_num_agpr_occ10__min_agpr_257() #28 {
+  call void asm sideeffect "; clobber $0","~{a127}"(), !srcloc !{i32 29}
+  call void asm sideeffect "; clobber $0","~{a128}"(), !srcloc !{i32 29}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 29}
+  ret void
+}
+
+attributes #28 = { "amdgpu-waves-per-eu"="8,10" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="257" }
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ10__min_agpr_257_257': desired occupancy was 8, final occupancy is 3
+; WARN: warning: inline asm clobber list contains reserved registers: a128 at line 30
+; WARN: warning: inline asm clobber list contains reserved registers: v0 at line 30
+define amdgpu_kernel void @min_num_agpr_occ10__min_agpr_257_257() #29 {
+  call void asm sideeffect "; clobber $0","~{a127}"(), !srcloc !{i32 30}
+  call void asm sideeffect "; clobber $0","~{a128}"(), !srcloc !{i32 30}
+  call void asm sideeffect "; clobber $0","~{v0}"(), !srcloc !{i32 30}
+  ret void
+}
+
+attributes #29 = { "amdgpu-waves-per-eu"="8,10" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="257,257" }
+
+
+; The total vector register budget is 96
+
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ5__min_agpr_8_256': desired occupancy was 5, final occupancy is 4
+; WARN: warning: inline asm clobber list contains reserved registers: v88 at line 31
+; WARN: warning: inline asm clobber list contains reserved registers: a8 at line 31
+define amdgpu_kernel void @min_num_agpr_occ5__min_agpr_8_256() #30 {
+  call void asm sideeffect "; clobber $0","~{v87}"(), !srcloc !{i32 31}
+  call void asm sideeffect "; clobber $0","~{v88}"(), !srcloc !{i32 31}
+  call void asm sideeffect "; clobber $0","~{a7}"(), !srcloc !{i32 31}
+  call void asm sideeffect "; clobber $0","~{a8}"(), !srcloc !{i32 31}
+  ret void
+}
+
+attributes #30 = { "amdgpu-waves-per-eu"="5,5" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="8,256" }
+
+; The total vector register budget is 96
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ5__min_agpr_8': desired occupancy was 5, final occupancy is 4
+; WARN: warning: inline asm clobber list contains reserved registers: v88 at line 32
+; WARN: warning: inline asm clobber list contains reserved registers: a8 at line 32
+define amdgpu_kernel void @min_num_agpr_occ5__min_agpr_8() #31 {
+  call void asm sideeffect "; clobber $0","~{v87}"(), !srcloc !{i32 32}
+  call void asm sideeffect "; clobber $0","~{v88}"(), !srcloc !{i32 32}
+  call void asm sideeffect "; clobber $0","~{a7}"(), !srcloc !{i32 32}
+  call void asm sideeffect "; clobber $0","~{a8}"(), !srcloc !{i32 32}
+  ret void
+}
+
+; budget is 96
+; WARN: warning: inline asm clobber list contains reserved registers: v88 at line 33
+define amdgpu_kernel void @min_num_agpr_occ5__min_agpr_8_no_agpr_references() #31 {
+  call void asm sideeffect "; clobber $0","~{v87}"(), !srcloc !{i32 33}
+  call void asm sideeffect "; clobber $0","~{v88}"(), !srcloc !{i32 33}
+  ret void
+}
+
+attributes #31 = { "amdgpu-waves-per-eu"="5,5" "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-num-agpr"="8" }
+
+
+; register budget 256
+; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'min_num_agpr_occ2__min_agpr_93': desired occupancy was 2, final occupancy is 1
+; WARN: warning: inline asm clobber list contains reserved registers: v160 at line 34
+; WARN: warning: inline asm clobber list contains reserved registers: a96 at line 34
+define amdgpu_kernel void @min_num_agpr_occ2__min_agpr_93() #33 {
+  call void asm sideeffect "; clobber $0","~{v159}"(), !srcloc !{i32 34}
+  call void asm sideeffect "; clobber $0","~{v160}"(), !srcloc !{i32 34}
+  call void asm sideeffect "; clobber $0","~{a95}"(), !srcloc !{i32 34}
+  call void asm sideeffect "; clobber $0","~{a96}"(), !srcloc !{i32 34}
+  ret void
+}
+
+attributes #33 = { "amdgpu-waves-per-eu"="2,2" "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-agpr"="93" }
+
+; register budget 512, no warnings and fully allocated
+define amdgpu_kernel void @min_num_agpr_occ1__min_agpr_93() #34 {
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 35}
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 35}
+  ret void
+}
+
+attributes #34 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-agpr"="93" }
+
+; register budget 256
+; WARN: warning: inline asm clobber list contains reserved registers: a96 at line 36
+define amdgpu_kernel void @min_num_agpr_occ1__min_agpr_93_93() #35 {
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 36}
+  call void asm sideeffect "; clobber $0","~{a95}"(), !srcloc !{i32 36}
+  call void asm sideeffect "; clobber $0","~{a96}"(), !srcloc !{i32 36}
+  ret void
+}
+
+attributes #35 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-agpr"="93,93" }
+
+; register budget 512, fully allocated and no warnings.
+define amdgpu_kernel void @min_num_agpr_occ1__min_agpr_256() #36 {
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 37}
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 37}
+  ret void
+}
+
+attributes #36 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-agpr"="256" }
+
+; register budget 512, fully allocated and no warnings.
+define amdgpu_kernel void @min_num_agpr_occ1__min_agpr_256_256() #37 {
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 38}
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 38}
+  ret void
+}
+
+attributes #37 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-agpr"="256,256" }
+
+; register budget 512, fully allocated and no warnings.
+define amdgpu_kernel void @occ1_min_agpr_no_attr() #38 {
+  call void asm sideeffect "; clobber $0","~{v255}"(), !srcloc !{i32 39}
+  call void asm sideeffect "; clobber $0","~{a255}"(), !srcloc !{i32 39}
+  ret void
+}
+
+attributes #38 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="256,256" }

>From c7000809185ff3a209b8d8ccac7a0736701ead14 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 20 Mar 2024 16:15:31 +0530
Subject: [PATCH 2/2] Add test for what happens when you violate the attribute

We hit the allocation error, but we probably should treat it as UB and only
emit a warning.
---
 .../AMDGPU/amdgpu-no-agprs-violations.ll      | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll
new file mode 100644
index 0000000000000..eab1cad80fbf1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-no-agprs-violations.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=CHECK,GFX908 %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s 2> %t.err | FileCheck -check-prefixes=CHECK,GFX90A %s
+; RUN: FileCheck -check-prefix=ERR < %t.err %s
+
+; Test undefined behavior where a function ends up needing AGPRs that
+; was marked with "amdgpu-num-agpr="="0". There should be no asserts.
+
+; TODO: Should this be an error, or let UB happen?
+
+; ERR: error: <unknown>:0:0: no registers from class available to allocate in function 'kernel_illegal_agpr_use_asm'
+; ERR: error: <unknown>:0:0: no registers from class available to allocate in function 'func_illegal_agpr_use_asm'
+; ERR: error: <unknown>:0:0: no registers from class available to allocate in function 'kernel_calls_mfma.f32.32x32x1f32'
+
+; CHECK: {{^}}kernel_illegal_agpr_use_asm:
+; CHECK: ; use a0
+
+; CHECK: NumVgprs: 0
+; CHECK: NumAgprs: 1
+define amdgpu_kernel void @kernel_illegal_agpr_use_asm() #0 {
+  call void asm sideeffect "; use $0", "a"(i32 poison)
+  ret void
+}
+
+; CHECK: {{^}}func_illegal_agpr_use_asm:
+; CHECK: ; use a0
+
+; CHECK: NumVgprs: 0
+; CHECK: NumAgprs: 1
+define void @func_illegal_agpr_use_asm() #0 {
+  call void asm sideeffect "; use $0", "a"(i32 poison)
+  ret void
+}
+
+; CHECK-LABEL: {{^}}kernel_calls_mfma.f32.32x32x1f32:
+; CHECK: v_accvgpr_write_b32
+; CHECK: v_accvgpr_read_b32
+
+; GFX908: NumVgprs: 5
+; GFX90A: NumVgprs: 36
+; CHECK: NumAgprs: 32
+
+; GFX908: TotalNumVgprs: 32
+; GFX90A: TotalNumVgprs: 68
+define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) #0 {
+  %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
+  store <32 x float> %result, ptr addrspace(1) %out
+  ret void
+}
+
+attributes #0 = { "amdgpu-num-agpr"="0" }



More information about the llvm-commits mailing list