[llvm] [AMDGPU] Fix computation of waves/EU maximum (PR #140921)

Lucas Ramirez via llvm-commits llvm-commits at lists.llvm.org
Wed May 21 10:33:08 PDT 2025


https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/140921

>From 46e79b451e06086de615112a1db342d7a7eda9c0 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 21 May 2025 15:35:45 +0000
Subject: [PATCH 1/2] Fix handling of requested waves/EU upper bound

---
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp     | 18 +++++++++++-------
 .../CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll | 12 ++++++++++++
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 776cc6258dbcd..2131625959827 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -191,17 +191,21 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
       getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
   Default.first = std::min(Default.first, Default.second);
 
-  // Make sure requested minimum is less than requested maximum.
-  if (RequestedWavesPerEU.second &&
-      RequestedWavesPerEU.first > RequestedWavesPerEU.second)
+  // Make sure requested min is within the default range.
+  if (RequestedWavesPerEU.first < Default.first ||
+      RequestedWavesPerEU.first > Default.second)
     return Default;
 
-  // Make sure requested values do not violate subtarget's specifications and
-  // are compatible with values implied by minimum/maximum flat workgroup sizes.
-  if (RequestedWavesPerEU.first < Default.first ||
-      RequestedWavesPerEU.second > Default.second)
+  // When provided, make sure requested max is higher than min and does not
+  // violate target specification.
+  if (RequestedWavesPerEU.second &&
+      (RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
+       RequestedWavesPerEU.second > getMaxWavesPerEU()))
     return Default;
 
+  // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
+  RequestedWavesPerEU.second =
+      std::min(RequestedWavesPerEU.second, Default.second);
   return RequestedWavesPerEU;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index 4507fd5865989..eff424ae02c81 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -200,3 +200,15 @@ entry:
   ret void
 }
 attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"}
+
+; Minimum 2 waves, maximum limited by LDS usage.
+; CHECK-LABEL: {{^}}empty_at_least_2_lds_limited:
+; CHECK: SGPRBlocks: 12
+; CHECK: VGPRBlocks: 12
+; CHECK: NumSGPRsForWavesPerEU: 102
+; CHECK: NumVGPRsForWavesPerEU: 49
+define amdgpu_kernel void @empty_at_least_2_lds_limited() #11 {
+entry:
+  ret void
+}
+attributes #11 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-lds-size"="16384"}

>From f46849ad5a81a5e7993f7356d5647c520086b5dc Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 21 May 2025 17:32:52 +0000
Subject: [PATCH 2/2] Address comments

---
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp         | 14 +++++---------
 .../CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll     | 13 +++++++++++++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 2131625959827..d095fc6cf9549 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -191,16 +191,12 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
       getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
   Default.first = std::min(Default.first, Default.second);
 
-  // Make sure requested min is within the default range.
+  // Make sure requested minimum is within the default range and lower than the
+  // requested maximum. The latter must not violate target specification.
   if (RequestedWavesPerEU.first < Default.first ||
-      RequestedWavesPerEU.first > Default.second)
-    return Default;
-
-  // When provided, make sure requested max is higher than min and does not
-  // violate target specification.
-  if (RequestedWavesPerEU.second &&
-      (RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
-       RequestedWavesPerEU.second > getMaxWavesPerEU()))
+      RequestedWavesPerEU.first > Default.second ||
+      RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
+      RequestedWavesPerEU.second > getMaxWavesPerEU())
     return Default;
 
   // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index eff424ae02c81..e9fe4f3c618c7 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -212,3 +212,16 @@ entry:
   ret void
 }
 attributes #11 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-lds-size"="16384"}
+
+; Minimum 2 waves, maximum limited by LDS usage. Requested maximum within spec
+; but above achievable occupancy has no effect.
+; CHECK-LABEL: {{^}}empty_at_least_2_lds_limited_max_above_achievable:
+; CHECK: SGPRBlocks: 12
+; CHECK: VGPRBlocks: 12
+; CHECK: NumSGPRsForWavesPerEU: 102
+; CHECK: NumVGPRsForWavesPerEU: 49
+define amdgpu_kernel void @empty_at_least_2_lds_limited_max_above_achievable() #12 {
+entry:
+  ret void
+}
+attributes #12 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2,10" "amdgpu-lds-size"="16384"}



More information about the llvm-commits mailing list