[llvm] [AMDGPU] Handle unset/max flat workgroup size in waves/EU (PR #139955)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 13:20:43 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Nikolay Panchenko (npanchen)
<details>
<summary>Changes</summary>
When `amdgpu-flat-work-group-size` is either missed or set to maximum allowed `[1, 1024]`, attributor won't change the state. This later results that `getAAFor<AAAMDFlatWorkGroupSize>` returns `{0,0}` and compiler crashes on `FlatWorkGroupSize != 0` assertion.
---
Full diff: https://github.com/llvm/llvm-project/pull/139955.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+8-2)
- (added) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll (+35)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 433144a60d120..52774ff9277b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1170,13 +1170,19 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
!AssumedGroupSize->isValidState())
return false;
+ unsigned MinFWGSize =
+ AssumedGroupSize->getAssumed().getLower().getZExtValue();
+ unsigned MaxFWGSize =
+ AssumedGroupSize->getAssumed().getUpper().getZExtValue();
+ if (MinFWGSize == 0 && MaxFWGSize == 0)
+ std::tie(MinFWGSize, MaxFWGSize) =
+ InfoCache.getDefaultFlatWorkGroupSize(*Func);
unsigned Min, Max;
std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
*Caller,
{CallerInfo->getAssumed().getLower().getZExtValue(),
CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
- {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
- AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+ {MinFWGSize, MaxFWGSize - 1});
ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
IntegerRangeState CallerRangeState(CallerRange);
Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll
new file mode 100644
index 0000000000000..680fbedead429
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 -passes=amdgpu-attributor %s | FileCheck %s
+
+; CHECK-LABEL: define internal fastcc void @call1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]]
+define internal fastcc void @call1() #0 {
+ tail call fastcc void @call2()
+ ret void
+}
+
+; CHECK-LABEL: define internal fastcc void @call2(
+; CHECK-SAME: ) #[[ATTR0]]
+define internal fastcc void @call2() #1 {
+ tail call fastcc void @call5()
+ ret void
+}
+
+; CHECK-LABEL: define { ptr addrspace(1), ptr } @call3(
+; CHECK-SAME:) #[[ATTR0]]
+define { ptr addrspace(1), ptr } @call3() #2 {
+ tail call fastcc void @call5()
+ ret { ptr addrspace(1), ptr } zeroinitializer
+}
+
+; CHECK-LABEL: define internal fastcc void @call5(
+; CHECK-SAME: ) #[[ATTR0]]
+define internal fastcc void @call5() {
+ tail call fastcc void @call1()
+ ret void
+}
+
+attributes #0 = {"amdgpu-flat-work-group-size"="1, 1024" "target-cpu"="gfx942" }
+attributes #1 = {"amdgpu-flat-work-group-size"="1, 1024" "target-cpu"="gfx942" }
+attributes #2 = {"amdgpu-flat-work-group-size"="1, 256" "target-cpu"="gfx942" }
+
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx942" "uniform-work-group-size"="false" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/139955
More information about the llvm-commits
mailing list