[llvm] [AMDGPU] Handle unset/max flat workgroup size in waves/EU (PR #139955)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 14 13:20:43 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Nikolay Panchenko (npanchen)

<details>
<summary>Changes</summary>

When `amdgpu-flat-work-group-size` is either missed or set to maximum allowed `[1, 1024]`, attributor won't change the state. This later results that `getAAFor<AAAMDFlatWorkGroupSize>` returns `{0,0}` and compiler crashes on `FlatWorkGroupSize != 0` assertion.

---
Full diff: https://github.com/llvm/llvm-project/pull/139955.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+8-2) 
- (added) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll (+35) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 433144a60d120..52774ff9277b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1170,13 +1170,19 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
           !AssumedGroupSize->isValidState())
         return false;
 
+      unsigned MinFWGSize =
+          AssumedGroupSize->getAssumed().getLower().getZExtValue();
+      unsigned MaxFWGSize =
+          AssumedGroupSize->getAssumed().getUpper().getZExtValue();
+      if (MinFWGSize == 0 && MaxFWGSize == 0)
+        std::tie(MinFWGSize, MaxFWGSize) =
+            InfoCache.getDefaultFlatWorkGroupSize(*Func);
       unsigned Min, Max;
       std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
           *Caller,
           {CallerInfo->getAssumed().getLower().getZExtValue(),
            CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
-          {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
-           AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+          {MinFWGSize, MaxFWGSize - 1});
       ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
       IntegerRangeState CallerRangeState(CallerRange);
       Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll
new file mode 100644
index 0000000000000..680fbedead429
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-max-flat-wgs.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 -passes=amdgpu-attributor %s | FileCheck %s
+
+; CHECK-LABEL: define internal fastcc void @call1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]]
+define internal fastcc void @call1() #0 {
+  tail call fastcc void @call2()
+  ret void
+}
+
+; CHECK-LABEL: define internal fastcc void @call2(
+; CHECK-SAME: ) #[[ATTR0]]
+define internal fastcc void @call2() #1 {
+  tail call fastcc void @call5()
+  ret void
+}
+
+; CHECK-LABEL: define { ptr addrspace(1), ptr } @call3(
+; CHECK-SAME:) #[[ATTR0]]
+define { ptr addrspace(1), ptr } @call3() #2 {
+  tail call fastcc void @call5()
+  ret { ptr addrspace(1), ptr } zeroinitializer
+}
+
+; CHECK-LABEL: define internal fastcc void @call5(
+; CHECK-SAME: ) #[[ATTR0]]
+define internal fastcc void @call5() {
+  tail call fastcc void @call1()
+  ret void
+}
+
+attributes #0 = {"amdgpu-flat-work-group-size"="1, 1024" "target-cpu"="gfx942" }
+attributes #1 = {"amdgpu-flat-work-group-size"="1, 1024" "target-cpu"="gfx942" }
+attributes #2 = {"amdgpu-flat-work-group-size"="1, 256" "target-cpu"="gfx942" }
+
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx942" "uniform-work-group-size"="false" }

``````````

</details>


https://github.com/llvm/llvm-project/pull/139955


More information about the llvm-commits mailing list