[llvm] [AMDGPU] Allow 0 as min./max. of amdgpu-waves-per-eu (PR #138284)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 07:22:34 PDT 2025
https://github.com/lucas-rami created https://github.com/llvm/llvm-project/pull/138284
Clang's attribute reference specifies, for the "amdgpu-waves-per-eu" attribute, that "_Passing 0, 0 as \<min\>, \<max\> implies the default behavior_". However, the backend currently treats a minimum or maximum of 0 as an invalid bound that makes the whole attribute invalid/ignored.
This makes the backend treat a 0 in the range as per the documentation. In particular, this allows a user to specify a maximum desired number of waves/EU without specifying a minimum (e.g., "amdgpu-waves-per-eu"="0,4"), which is not currently feasible.
The following equivalences hold ($$0<N<getMaxWavesPerEU()$$):
- "amdgpu-waves-per-eu"="N,0" and "amdgpu-waves-per-eu"="N".
- "amdgpu-waves-per-eu"="0,0" and no attribute.
>From 2b4ef1fba8fc9bedb0be9fec725ecd33e0bbbae2 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Fri, 2 May 2025 13:57:35 +0000
Subject: [PATCH] Allow 0 as min/max number of waves per EU
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 3 +-
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 24 ++++++++----
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 +-
.../AMDGPU/attr-amdgpu-waves-per-eu.ll | 38 +++++++++++++++++++
.../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 28 +++++++-------
5 files changed, 72 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index b9ce8dc0c5cdb..0bbbe766968fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1125,8 +1125,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
indicateOptimisticFixpoint();
};
- std::pair<unsigned, unsigned> MaxWavesPerEURange{
- 1U, InfoCache.getMaxWavesPerEU(*F)};
+ std::pair<unsigned, unsigned> MaxWavesPerEURange{0, 0};
// If the attribute exists, we will honor it if it is not the default.
if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 563605f964cc6..4212d97eb9404 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -191,17 +191,25 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
Default.first = std::min(Default.first, Default.second);
- // Make sure requested minimum is less than requested maximum.
+ if (RequestedWavesPerEU.first) {
+ // Requested minimum must not violate subtarget's specifications.
+ if (RequestedWavesPerEU.first < Default.first)
+ return Default;
+ // Requested maximum must be no lesser than minimum.
+ if (RequestedWavesPerEU.second &&
+ RequestedWavesPerEU.first > RequestedWavesPerEU.second)
+ return Default;
+ }
+ // Requested maximum must not violate subtarget's specifications.
if (RequestedWavesPerEU.second &&
- RequestedWavesPerEU.first > RequestedWavesPerEU.second)
- return Default;
-
- // Make sure requested values do not violate subtarget's specifications and
- // are compatible with values implied by minimum/maximum flat workgroup sizes.
- if (RequestedWavesPerEU.first < Default.first ||
RequestedWavesPerEU.second > Default.second)
return Default;
+ // Replace unspecified bounds in the request with the default bounds.
+ if (!RequestedWavesPerEU.first)
+ RequestedWavesPerEU.first = Default.first;
+ if (!RequestedWavesPerEU.second)
+ RequestedWavesPerEU.second = Default.second;
return RequestedWavesPerEU;
}
@@ -220,7 +228,7 @@ std::pair<unsigned, unsigned>
AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
unsigned LDSBytes, const Function &F) const {
// Default minimum/maximum number of waves per execution unit.
- std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
+ std::pair<unsigned, unsigned> Default(0, 0);
// Requested minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Requested =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 91fe2a69bc0b7..1cbb6a7b1ad43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -119,7 +119,9 @@ class AMDGPUSubtarget {
/// Returns the target minimum/maximum number of waves per EU. This is based
/// on the minimum/maximum number of \p RequestedWavesPerEU and further
/// limited by the maximum achievable occupancy derived from the range of \p
- /// FlatWorkGroupSizes and number of \p LDSBytes per workgroup.
+ /// FlatWorkGroupSizes and number of \p LDSBytes per workgroup. A
+ /// minimum/maximum requested waves/EU value of 0 indicates an intent to not
+ /// restrict the corresponding bound.
std::pair<unsigned, unsigned>
getEffectiveWavesPerEU(std::pair<unsigned, unsigned> RequestedWavesPerEU,
std::pair<unsigned, unsigned> FlatWorkGroupSizes,
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index 4507fd5865989..d8827d0405295 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -200,3 +200,41 @@ entry:
ret void
}
attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"}
+
+; At most 2 waves per execution unit.
+; CHECK-LABEL: {{^}}empty_at_most_2:
+; CHECK: SGPRBlocks: 12
+; CHECK: VGPRBlocks: 21
+; CHECK: NumSGPRsForWavesPerEU: 102
+; CHECK: NumVGPRsForWavesPerEU: 85
+define amdgpu_kernel void @empty_at_most_2() #11 {
+entry:
+ ret void
+}
+attributes #11 = {"amdgpu-waves-per-eu"="0,2"}
+
+; Exactly 1024 workitems (limits occupancy to 8) and at least 5 waves per execution unit.
+; "amdgpu-waves-per-eu"="5,0" should have the same effect as "amdgpu-waves-per-eu"="5".
+; CHECK-LABEL: {{^}}empty_workitems_exactly_1024_waves_at_least_5:
+; CHECK: SGPRBlocks: 8
+; CHECK: VGPRBlocks: 7
+; CHECK: NumSGPRsForWavesPerEU: 65
+; CHECK: NumVGPRsForWavesPerEU: 29
+define amdgpu_kernel void @empty_workitems_exactly_1024_waves_at_least_5() #12 {
+entry:
+ ret void
+}
+attributes #12 = {"amdgpu-waves-per-eu"="5,0" "amdgpu-flat-work-group-size"="1024,1024"}
+
+; Unrestricted number of waves per execution unit.
+; "amdgpu-waves-per-eu"="0,0" should have the same effect as not providing the attribute.
+; CHECK-LABEL: {{^}}empty_default_waves:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define amdgpu_kernel void @empty_default_waves() #13 {
+entry:
+ ret void
+}
+attributes #13 = {"amdgpu-waves-per-eu"="0,0"}
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index ae114f3213d8f..967cc764ea19c 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
-; Check propagation of amdgpu-flat-work-group-size attribute.
+; Check propagation of amdgpu-waves-per-eu attribute.
; Called from a single kernel with 1,8
define internal void @default_to_1_8_a() {
@@ -216,30 +216,30 @@ define internal i32 @bitcasted_function() {
ret i32 0
}
-define internal void @called_from_invalid_bounds_0() {
-; CHECK-LABEL: define internal void @called_from_invalid_bounds_0
-; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
+define internal void @called_without_min_waves() {
+; CHECK-LABEL: define internal void @called_without_min_waves
+; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: ret void
;
ret void
}
-define internal void @called_from_invalid_bounds_1() {
-; CHECK-LABEL: define internal void @called_from_invalid_bounds_1
-; CHECK-SAME: () #[[ATTR10]] {
+define internal void @called_from_invalid_bounds() {
+; CHECK-LABEL: define internal void @called_from_invalid_bounds
+; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
; CHECK-NEXT: ret void
;
ret void
}
-; Invalid range for amdgpu-waves-per-eu
-define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8
+; amdgpu-waves-per-eu range only provides a maximum.
+define amdgpu_kernel void @kernel_0_8() #9 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_0_8
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: call void @called_from_invalid_bounds_0()
+; CHECK-NEXT: call void @called_without_min_waves()
; CHECK-NEXT: ret void
;
- call void @called_from_invalid_bounds_0()
+ call void @called_without_min_waves()
ret void
}
@@ -247,10 +247,10 @@ define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123
; CHECK-SAME: () #[[ATTR11:[0-9]+]] {
-; CHECK-NEXT: call void @called_from_invalid_bounds_1()
+; CHECK-NEXT: call void @called_from_invalid_bounds()
; CHECK-NEXT: ret void
;
- call void @called_from_invalid_bounds_1()
+ call void @called_from_invalid_bounds()
ret void
}
More information about the llvm-commits
mailing list