[llvm] [AMDGPU] Fix computation of waves/EU maximum (PR #140921)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 10:33:08 PDT 2025
https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/140921
>From 46e79b451e06086de615112a1db342d7a7eda9c0 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 21 May 2025 15:35:45 +0000
Subject: [PATCH 1/2] Fix handling of requested waves/EU upper bound
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 18 +++++++++++-------
.../CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll | 12 ++++++++++++
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 776cc6258dbcd..2131625959827 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -191,17 +191,21 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
Default.first = std::min(Default.first, Default.second);
- // Make sure requested minimum is less than requested maximum.
- if (RequestedWavesPerEU.second &&
- RequestedWavesPerEU.first > RequestedWavesPerEU.second)
+ // Make sure requested min is within the default range.
+ if (RequestedWavesPerEU.first < Default.first ||
+ RequestedWavesPerEU.first > Default.second)
return Default;
- // Make sure requested values do not violate subtarget's specifications and
- // are compatible with values implied by minimum/maximum flat workgroup sizes.
- if (RequestedWavesPerEU.first < Default.first ||
- RequestedWavesPerEU.second > Default.second)
+ // When provided, make sure requested max is higher than min and does not
+ // violate target specification.
+ if (RequestedWavesPerEU.second &&
+ (RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
+ RequestedWavesPerEU.second > getMaxWavesPerEU()))
return Default;
+ // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
+ RequestedWavesPerEU.second =
+ std::min(RequestedWavesPerEU.second, Default.second);
return RequestedWavesPerEU;
}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index 4507fd5865989..eff424ae02c81 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -200,3 +200,15 @@ entry:
ret void
}
attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"}
+
+; Minimum 2 waves, maximum limited by LDS usage.
+; CHECK-LABEL: {{^}}empty_at_least_2_lds_limited:
+; CHECK: SGPRBlocks: 12
+; CHECK: VGPRBlocks: 12
+; CHECK: NumSGPRsForWavesPerEU: 102
+; CHECK: NumVGPRsForWavesPerEU: 49
+define amdgpu_kernel void @empty_at_least_2_lds_limited() #11 {
+entry:
+ ret void
+}
+attributes #11 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-lds-size"="16384"}
>From f46849ad5a81a5e7993f7356d5647c520086b5dc Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 21 May 2025 17:32:52 +0000
Subject: [PATCH 2/2] Address comments
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 14 +++++---------
.../CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll | 13 +++++++++++++
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 2131625959827..d095fc6cf9549 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -191,16 +191,12 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
Default.first = std::min(Default.first, Default.second);
- // Make sure requested min is within the default range.
+ // Make sure requested minimum is within the default range and lower than the
+ // requested maximum. The latter must not violate target specification.
if (RequestedWavesPerEU.first < Default.first ||
- RequestedWavesPerEU.first > Default.second)
- return Default;
-
- // When provided, make sure requested max is higher than min and does not
- // violate target specification.
- if (RequestedWavesPerEU.second &&
- (RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
- RequestedWavesPerEU.second > getMaxWavesPerEU()))
+ RequestedWavesPerEU.first > Default.second ||
+ RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
+ RequestedWavesPerEU.second > getMaxWavesPerEU())
return Default;
// We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index eff424ae02c81..e9fe4f3c618c7 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -212,3 +212,16 @@ entry:
ret void
}
attributes #11 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-lds-size"="16384"}
+
+; Minimum 2 waves, maximum limited by LDS usage. Requested maximum within spec
+; but above achievable occupancy has no effect.
+; CHECK-LABEL: {{^}}empty_at_least_2_lds_limited_max_above_achievable:
+; CHECK: SGPRBlocks: 12
+; CHECK: VGPRBlocks: 12
+; CHECK: NumSGPRsForWavesPerEU: 102
+; CHECK: NumVGPRsForWavesPerEU: 49
+define amdgpu_kernel void @empty_at_least_2_lds_limited_max_above_achievable() #12 {
+entry:
+ ret void
+}
+attributes #12 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2,10" "amdgpu-lds-size"="16384"}
More information about the llvm-commits
mailing list