[llvm] [WIP][AMDGPU][Attributor] Make `AAAMDFlatWorkGroupSize` honor existing attribute (PR #114357)

Wed Oct 30 22:05:20 PDT 2024

https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/114357

If a function has `amdgpu-flat-work-group-size`, honor it in `initialize` by
taking its value directly, set it to known range, indicate a pessimistic fixed
point such that the known range is propagated to the assumed range; otherwise,
it simply does nothing. We will no longer clamp (real clamp, instead of the
union one in `IntegerRangeState`) the known range, which can cause issues
because the known range is a "throttle" to the assumed range such that the
assumed range can't get widened properly in `updateImpl`. Another benefit of not
touching the known range in `initialize` is, if we indicate pessimistic state in
`updateImpl`, it is also invalid, such that `manifest` will not be called. Since
we honor the attribute, we don't want any half-baked attribute added to a
function.

>From 2269a404739b6e8ce3d8ca7a7e99a2d8be72dc89 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 31 Oct 2024 00:56:09 -0400
Subject: [PATCH] [WIP][AMDGPU][Attributor] Make `AAAMDFlatWorkGroupSize` honor
 existing attribute

If a function has `amdgpu-flat-work-group-size`, honor it in `initialize` by
taking its value directly, set it to known range, indicate a pessimistic fixed
point such that the known range is propagated to the assumed range; otherwise,
it simply does nothing. We will no longer clamp (real clamp, instead of the
union one in `IntegerRangeState`) the known range, which can cause issues
because the known range is a "throttle" to the assumed range such that the
assumed range can't get widened properly in `updateImpl`. Another benefit of not
touching the known range in `initialize` is, if we indicate pessimistic state in
`updateImpl`, it is also invalid, such that `manifest` will not be called. Since
we honor the attribute, we don't want any half-baked attribute added to a
function.
---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 65 +++++++++++++++++----
 1 file changed, 54 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 6a69b9d2bfc716..53e44bb5e094fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -168,7 +168,15 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.supportsGetDoorbellID();
   }
 
-  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
+  std::optional<std::pair<unsigned, unsigned>>
+  getFlatWorkGroupSizeAttr(const Function &F) const {
+    Attribute A = F.getFnAttribute("amdgpu-flat-work-group-size");
+    if (!A.isStringAttribute())
+      return std::nullopt;
+    return getFlatWorkGroupSizes(F);
+  }
+
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const {
     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
     return ST.getFlatWorkGroupSizes(F);
   }
@@ -707,8 +715,7 @@ struct AAAMDSizeRangeAttribute
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {}
 
-  template <class AttributeImpl>
-  ChangeStatus updateImplImpl(Attributor &A) {
+  template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
     ChangeStatus Change = ChangeStatus::UNCHANGED;
 
     auto CheckCallSite = [&](AbstractCallSite CS) {
@@ -728,12 +735,43 @@ struct AAAMDSizeRangeAttribute
     };
 
     bool AllCallSitesKnown = true;
-    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+    if (!A.checkForAllCallSites(CheckCallSite, *this,
+                                /*RequireAllCallSites=*/true,
+                                AllCallSitesKnown))
       return indicatePessimisticFixpoint();
 
     return Change;
   }
 
+  /// Clamp the assumed range to the default value ([Min, Max]) and emit the
+  /// attribute if it is not same as default.
+  ChangeStatus
+  emitAttributeIfNotDefaultAfterClamp(Attributor &A,
+                                      std::pair<unsigned, unsigned> Default) {
+    auto [Min, Max] = Default;
+    unsigned Lower = getAssumed().getLower().getZExtValue();
+    unsigned Upper = getAssumed().getUpper().getZExtValue();
+
+    // Clamp the range to the default value.
+    if (Lower < Min)
+      Lower = Min;
+    if (Upper > Max + 1)
+      Upper = Max + 1;
+
+    // No manifest if the value is same as default after clamp.
+    if (Lower == Min && Upper == Max + 1)
+      return ChangeStatus::UNCHANGED;
+
+    Function *F = getAssociatedFunction();
+    LLVMContext &Ctx = F->getContext();
+    SmallString<10> Buffer;
+    raw_svector_ostream OS(Buffer);
+    OS << Lower << ',' << Upper - 1;
+    return A.manifestAttrs(getIRPosition(),
+                           {Attribute::get(Ctx, AttrName, OS.str())},
+                           /* ForceReplace=*/true);
+  }
+
   ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
                                          unsigned Max) {
     // Don't add the attribute if it's the implied default.
@@ -767,11 +805,17 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
 
   void initialize(Attributor &A) override {
     Function *F = getAssociatedFunction();
+
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-    unsigned MinGroupSize, MaxGroupSize;
-    std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
-    intersectKnown(
-        ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+    std::optional<std::pair<unsigned, unsigned>> Attr =
+        InfoCache.getFlatWorkGroupSizeAttr(*F);
+
+    if (Attr.has_value()) {
+      auto [Min, Max] = *Attr;
+      intersectKnown(ConstantRange(APInt(32, Min), APInt(32, Max + 1)));
+      indicatePessimisticFixpoint();
+      return;
+    }
 
     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
       indicatePessimisticFixpoint();
@@ -788,9 +832,8 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
   ChangeStatus manifest(Attributor &A) override {
     Function *F = getAssociatedFunction();
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-    unsigned Min, Max;
-    std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
-    return emitAttributeIfNotDefault(A, Min, Max);
+    auto [Min, Max] = InfoCache.getMaximumFlatWorkGroupRange(*F);
+    return emitAttributeIfNotDefaultAfterClamp(A, {Min, Max});
   }
 
   /// See AbstractAttribute::getName()