[llvm] [Attributor][NFC] Performance improvements (PR #122923)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 14 07:51:45 PST 2025


https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/122923

` forallInterferingAccesses` is a hotspot and for large modules these changes make a measurable improvement in compilation time.

For kernel compilation of 519.clvleaf (SPEChpc 2021) I measured the following:
```
                    |   Measured times (s)   | Average | speedup
--------------------+------------------------+---------+---------
Baseline            | 33.268  33.332  33.275 |  33.292 |      0%
Cache "kernel"      | 30.543  30.339  30.607 |  30.496 |    9.2%
templatize callback | 30.981  30.97   30.964 |  30.972 |    7.5%
Both changes        | 29.284  29.201  29.053 |  29.179 |   14.1%
```

>From de00545b5ac23299257dc01d2df23f7a92742873 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Tue, 14 Jan 2025 08:13:10 -0600
Subject: [PATCH 1/2] [Attributor][NFC] cache "kernel" function attr

---
 llvm/include/llvm/Transforms/IPO/Attributor.h    |  9 +++++++++
 llvm/lib/Transforms/IPO/Attributor.cpp           |  6 ++++--
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 10 +++++-----
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 28bce7b906652f..85893146997499 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1287,6 +1287,12 @@ struct InformationCache {
     return AG.getAnalysis<TargetLibraryAnalysis>(F);
   }
 
+  /// Return true if \p F has the "kernel" function attribute
+  bool isKernel(const Function &F) {
+    FunctionInfo &FI = getFunctionInfo(F);
+    return FI.IsKernel;
+  }
+
   /// Return true if \p Arg is involved in a must-tail call, thus the argument
   /// of the caller or callee.
   bool isInvolvedInMustTailCall(const Argument &Arg) {
@@ -1361,6 +1367,9 @@ struct InformationCache {
 
     /// Function contains a `musttail` call.
     bool ContainsMustTailCall;
+
+    /// Function has the `"kernel"` attribute
+    bool IsKernel;
   };
 
   /// A map type from functions to informatio about it.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 116f419129a239..a93284926d684f 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -678,8 +678,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
   // kernel, values like allocas and shared memory are not accessible. We
   // implicitly check for this situation to avoid costly lookups.
   if (GoBackwardsCB && &ToFn != FromI.getFunction() &&
-      !GoBackwardsCB(*FromI.getFunction()) && ToFn.hasFnAttribute("kernel") &&
-      FromI.getFunction()->hasFnAttribute("kernel")) {
+      !GoBackwardsCB(*FromI.getFunction()) && A.getInfoCache().isKernel(ToFn) &&
+      A.getInfoCache().isKernel(*FromI.getFunction())) {
     LLVM_DEBUG(dbgs() << "[AA] assume kernel cannot be reached from within the "
                          "module; success\n";);
     return false;
@@ -3191,6 +3191,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
   // initialize the cache eagerly which would look the same to the users.
   Function &F = const_cast<Function &>(CF);
 
+  FI.IsKernel = F.hasFnAttribute("kernel");
+
   // Walk all instructions to find interesting instructions that might be
   // queried by abstract attributes during their initialization or update.
   // This has to happen before we create attributes.
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index eb45df34771d32..2ac668a4014bc7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1176,7 +1176,7 @@ struct AAPointerInfoImpl
     // TODO: Use reaching kernels from AAKernelInfo (or move it to
     // AAExecutionDomain) such that we allow scopes other than kernels as long
     // as the reaching kernels are disjoint.
-    bool InstInKernel = Scope.hasFnAttribute("kernel");
+    bool InstInKernel = A.getInfoCache().isKernel(Scope);
     bool ObjHasKernelLifetime = false;
     const bool UseDominanceReasoning =
         FindInterferingWrites && IsKnownNoRecurse;
@@ -1210,7 +1210,7 @@ struct AAPointerInfoImpl
       // If the alloca containing function is not recursive the alloca
       // must be dead in the callee.
       const Function *AIFn = AI->getFunction();
-      ObjHasKernelLifetime = AIFn->hasFnAttribute("kernel");
+      ObjHasKernelLifetime = A.getInfoCache().isKernel(*AIFn);
       bool IsKnownNoRecurse;
       if (AA::hasAssumedIRAttr<Attribute::NoRecurse>(
               A, this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL,
@@ -1222,8 +1222,8 @@ struct AAPointerInfoImpl
       // as it is "dead" in the (unknown) callees.
       ObjHasKernelLifetime = HasKernelLifetime(GV, *GV->getParent());
       if (ObjHasKernelLifetime)
-        IsLiveInCalleeCB = [](const Function &Fn) {
-          return !Fn.hasFnAttribute("kernel");
+        IsLiveInCalleeCB = [&A](const Function &Fn) {
+          return !A.getInfoCache().isKernel(Fn);
         };
     }
 
@@ -1238,7 +1238,7 @@ struct AAPointerInfoImpl
       // If the object has kernel lifetime we can ignore accesses only reachable
       // by other kernels. For now we only skip accesses *in* other kernels.
       if (InstInKernel && ObjHasKernelLifetime && !AccInSameScope &&
-          AccScope->hasFnAttribute("kernel"))
+          A.getInfoCache().isKernel(*AccScope))
         return true;
 
       if (Exact && Acc.isMustAccess() && Acc.getRemoteInst() != &I) {

>From 02bb80446a42260e50e3f30b99523f0f7d61c574 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Tue, 14 Jan 2025 08:18:06 -0600
Subject: [PATCH 2/2] [Attributor][NFC] templatize
 PointerInfo::State::forallInterfering... callback

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 2ac668a4014bc7..46d6d66593b4e5 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -885,9 +885,8 @@ struct AA::PointerInfo::State : public AbstractState {
   AAPointerInfo::OffsetInfo ReturnedOffsets;
 
   /// See AAPointerInfo::forallInterferingAccesses.
-  bool forallInterferingAccesses(
-      AA::RangeTy Range,
-      function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
+  template <typename F>
+  bool forallInterferingAccesses(AA::RangeTy Range, F CB) const {
     if (!isValidState() || !ReturnedOffsets.isUnassigned())
       return false;
 
@@ -906,10 +905,9 @@ struct AA::PointerInfo::State : public AbstractState {
   }
 
   /// See AAPointerInfo::forallInterferingAccesses.
-  bool forallInterferingAccesses(
-      Instruction &I,
-      function_ref<bool(const AAPointerInfo::Access &, bool)> CB,
-      AA::RangeTy &Range) const {
+  template <typename F>
+  bool forallInterferingAccesses(Instruction &I, F CB,
+                                 AA::RangeTy &Range) const {
     if (!isValidState() || !ReturnedOffsets.isUnassigned())
       return false;
 



More information about the llvm-commits mailing list