[llvm] [AMDGPU] Move LDS utilities from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils (PR #88002)

via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 19 04:05:14 PDT 2024


https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/88002

>From 4e67582cfa296889549e240c9714ed990fdd0078 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 5 Apr 2024 13:23:48 +0530
Subject: [PATCH 1/4] [AMDGPU] Move LDS utilities from amdgpu-lower-module-lds
 pass to AMDGPUMemoryUtils

---
 .../AMDGPU/AMDGPULowerModuleLDSPass.cpp       | 186 +-----------------
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 176 +++++++++++++++++
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.h   |  24 +++
 3 files changed, 201 insertions(+), 185 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index c8bf9dd39e389c..2c7163a7753725 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -212,6 +212,7 @@
 #define DEBUG_TYPE "amdgpu-lower-module-lds"
 
 using namespace llvm;
+using namespace AMDGPU;
 
 namespace {
 
@@ -234,17 +235,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
         clEnumValN(LoweringKind::hybrid, "hybrid",
                    "Lower via mixture of above strategies")));
 
-bool isKernelLDS(const Function *F) {
-  // Some weirdness here. AMDGPU::isKernelCC does not call into
-  // AMDGPU::isKernel with the calling conv, it instead calls into
-  // isModuleEntryFunction which returns true for more calling conventions
-  // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
-  // There's also a test that checks that the LDS lowering does not hit on
-  // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
-  // Putting LDS in the name of the function to draw attention to this.
-  return AMDGPU::isKernel(F->getCallingConv());
-}
-
 template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
   llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
     return L->getName() < R->getName();
@@ -305,183 +295,9 @@ class AMDGPULowerModuleLDS {
         Decl, {}, {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)});
   }
 
-  static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
-    // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
-    // global may have uses from multiple different functions as a result.
-    // This pass specialises LDS variables with respect to the kernel that
-    // allocates them.
-
-    // This is semantically equivalent to (the unimplemented as slow):
-    // for (auto &F : M.functions())
-    //   for (auto &BB : F)
-    //     for (auto &I : BB)
-    //       for (Use &Op : I.operands())
-    //         if (constantExprUsesLDS(Op))
-    //           replaceConstantExprInFunction(I, Op);
-
-    SmallVector<Constant *> LDSGlobals;
-    for (auto &GV : M.globals())
-      if (AMDGPU::isLDSVariableToLower(GV))
-        LDSGlobals.push_back(&GV);
-
-    return convertUsersOfConstantsToInstructions(LDSGlobals);
-  }
-
 public:
   AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
 
-  using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
-
-  using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
-
-  static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
-                                     FunctionVariableMap &kernels,
-                                     FunctionVariableMap &functions) {
-
-    // Get uses from the current function, excluding uses by called functions
-    // Two output variables to avoid walking the globals list twice
-    for (auto &GV : M.globals()) {
-      if (!AMDGPU::isLDSVariableToLower(GV)) {
-        continue;
-      }
-
-      for (User *V : GV.users()) {
-        if (auto *I = dyn_cast<Instruction>(V)) {
-          Function *F = I->getFunction();
-          if (isKernelLDS(F)) {
-            kernels[F].insert(&GV);
-          } else {
-            functions[F].insert(&GV);
-          }
-        }
-      }
-    }
-  }
-
-  struct LDSUsesInfoTy {
-    FunctionVariableMap direct_access;
-    FunctionVariableMap indirect_access;
-  };
-
-  static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
-
-    FunctionVariableMap direct_map_kernel;
-    FunctionVariableMap direct_map_function;
-    getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
-
-    // Collect variables that are used by functions whose address has escaped
-    DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
-    for (Function &F : M.functions()) {
-      if (!isKernelLDS(&F))
-        if (F.hasAddressTaken(nullptr,
-                              /* IgnoreCallbackUses */ false,
-                              /* IgnoreAssumeLikeCalls */ false,
-                              /* IgnoreLLVMUsed */ true,
-                              /* IgnoreArcAttachedCall */ false)) {
-          set_union(VariablesReachableThroughFunctionPointer,
-                    direct_map_function[&F]);
-        }
-    }
-
-    auto functionMakesUnknownCall = [&](const Function *F) -> bool {
-      assert(!F->isDeclaration());
-      for (const CallGraphNode::CallRecord &R : *CG[F]) {
-        if (!R.second->getFunction()) {
-          return true;
-        }
-      }
-      return false;
-    };
-
-    // Work out which variables are reachable through function calls
-    FunctionVariableMap transitive_map_function = direct_map_function;
-
-    // If the function makes any unknown call, assume the worst case that it can
-    // access all variables accessed by functions whose address escaped
-    for (Function &F : M.functions()) {
-      if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
-        if (!isKernelLDS(&F)) {
-          set_union(transitive_map_function[&F],
-                    VariablesReachableThroughFunctionPointer);
-        }
-      }
-    }
-
-    // Direct implementation of collecting all variables reachable from each
-    // function
-    for (Function &Func : M.functions()) {
-      if (Func.isDeclaration() || isKernelLDS(&Func))
-        continue;
-
-      DenseSet<Function *> seen; // catches cycles
-      SmallVector<Function *, 4> wip{&Func};
-
-      while (!wip.empty()) {
-        Function *F = wip.pop_back_val();
-
-        // Can accelerate this by referring to transitive map for functions that
-        // have already been computed, with more care than this
-        set_union(transitive_map_function[&Func], direct_map_function[F]);
-
-        for (const CallGraphNode::CallRecord &R : *CG[F]) {
-          Function *ith = R.second->getFunction();
-          if (ith) {
-            if (!seen.contains(ith)) {
-              seen.insert(ith);
-              wip.push_back(ith);
-            }
-          }
-        }
-      }
-    }
-
-    // direct_map_kernel lists which variables are used by the kernel
-    // find the variables which are used through a function call
-    FunctionVariableMap indirect_map_kernel;
-
-    for (Function &Func : M.functions()) {
-      if (Func.isDeclaration() || !isKernelLDS(&Func))
-        continue;
-
-      for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
-        Function *ith = R.second->getFunction();
-        if (ith) {
-          set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
-        } else {
-          set_union(indirect_map_kernel[&Func],
-                    VariablesReachableThroughFunctionPointer);
-        }
-      }
-    }
-
-    // Verify that we fall into one of 2 cases:
-    //    - All variables are absolute: this is a re-run of the pass
-    //      so we don't have anything to do.
-    //    - No variables are absolute.
-    std::optional<bool> HasAbsoluteGVs;
-    for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
-      for (auto &[Fn, GVs] : Map) {
-        for (auto *GV : GVs) {
-          bool IsAbsolute = GV->isAbsoluteSymbolRef();
-          if (HasAbsoluteGVs.has_value()) {
-            if (*HasAbsoluteGVs != IsAbsolute) {
-              report_fatal_error(
-                  "Module cannot mix absolute and non-absolute LDS GVs");
-            }
-          } else
-            HasAbsoluteGVs = IsAbsolute;
-        }
-      }
-    }
-
-    // If we only had absolute GVs, we have nothing to do, return an empty
-    // result.
-    if (HasAbsoluteGVs && *HasAbsoluteGVs)
-      return {FunctionVariableMap(), FunctionVariableMap()};
-
-    return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
-  }
-
   struct LDSVariableReplacement {
     GlobalVariable *SGV = nullptr;
     DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 25e628e5cbc558..26b3819f7fd566 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -9,13 +9,16 @@
 #include "AMDGPUMemoryUtils.h"
 #include "AMDGPU.h"
 #include "AMDGPUBaseInfo.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/ReplaceConstant.h"
 
 #define DEBUG_TYPE "amdgpu-memory-utils"
@@ -65,6 +68,179 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {
   return true;
 }
 
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
+  // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
+  // global may have uses from multiple different functions as a result.
+  // This pass specialises LDS variables with respect to the kernel that
+  // allocates them.
+
+  // This is semantically equivalent to (the unimplemented as slow):
+  // for (auto &F : M.functions())
+  //   for (auto &BB : F)
+  //     for (auto &I : BB)
+  //       for (Use &Op : I.operands())
+  //         if (constantExprUsesLDS(Op))
+  //           replaceConstantExprInFunction(I, Op);
+
+  SmallVector<Constant *> LDSGlobals;
+  for (auto &GV : M.globals())
+    if (AMDGPU::isLDSVariableToLower(GV))
+      LDSGlobals.push_back(&GV);
+  return convertUsersOfConstantsToInstructions(LDSGlobals);
+}
+
+void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+                            FunctionVariableMap &kernels,
+                            FunctionVariableMap &functions) {
+  // Get uses from the current function, excluding uses by called functions
+  // Two output variables to avoid walking the globals list twice
+  for (auto &GV : M.globals()) {
+    if (!AMDGPU::isLDSVariableToLower(GV)) {
+      continue;
+    }
+    for (User *V : GV.users()) {
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        Function *F = I->getFunction();
+        if (isKernelLDS(F)) {
+          kernels[F].insert(&GV);
+        } else {
+          functions[F].insert(&GV);
+        }
+      }
+    }
+  }
+}
+
+bool isKernelLDS(const Function *F) {
+  // Some weirdness here. AMDGPU::isKernelCC does not call into
+  // AMDGPU::isKernel with the calling conv, it instead calls into
+  // isModuleEntryFunction which returns true for more calling conventions
+  // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
+  // There's also a test that checks that the LDS lowering does not hit on
+  // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
+  // Putting LDS in the name of the function to draw attention to this.
+  return AMDGPU::isKernel(F->getCallingConv());
+}
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
+
+  FunctionVariableMap direct_map_kernel;
+  FunctionVariableMap direct_map_function;
+  getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
+
+  // Collect variables that are used by functions whose address has escaped
+  DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
+  for (Function &F : M.functions()) {
+    if (!isKernelLDS(&F))
+      if (F.hasAddressTaken(nullptr,
+                            /* IgnoreCallbackUses */ false,
+                            /* IgnoreAssumeLikeCalls */ false,
+                            /* IgnoreLLVMUsed */ true,
+                            /* IgnoreArcAttachedCall */ false)) {
+        set_union(VariablesReachableThroughFunctionPointer,
+                  direct_map_function[&F]);
+      }
+  }
+
+  auto functionMakesUnknownCall = [&](const Function *F) -> bool {
+    assert(!F->isDeclaration());
+    for (const CallGraphNode::CallRecord &R : *CG[F]) {
+      if (!R.second->getFunction()) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  // Work out which variables are reachable through function calls
+  FunctionVariableMap transitive_map_function = direct_map_function;
+
+  // If the function makes any unknown call, assume the worst case that it can
+  // access all variables accessed by functions whose address escaped
+  for (Function &F : M.functions()) {
+    if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
+      if (!isKernelLDS(&F)) {
+        set_union(transitive_map_function[&F],
+                  VariablesReachableThroughFunctionPointer);
+      }
+    }
+  }
+
+  // Direct implementation of collecting all variables reachable from each
+  // function
+  for (Function &Func : M.functions()) {
+    if (Func.isDeclaration() || isKernelLDS(&Func))
+      continue;
+
+    DenseSet<Function *> seen; // catches cycles
+    SmallVector<Function *, 4> wip{&Func};
+
+    while (!wip.empty()) {
+      Function *F = wip.pop_back_val();
+
+      // Can accelerate this by referring to transitive map for functions that
+      // have already been computed, with more care than this
+      set_union(transitive_map_function[&Func], direct_map_function[F]);
+
+      for (const CallGraphNode::CallRecord &R : *CG[F]) {
+        Function *ith = R.second->getFunction();
+        if (ith) {
+          if (!seen.contains(ith)) {
+            seen.insert(ith);
+            wip.push_back(ith);
+          }
+        }
+      }
+    }
+  }
+
+  // direct_map_kernel lists which variables are used by the kernel
+  // find the variables which are used through a function call
+  FunctionVariableMap indirect_map_kernel;
+
+  for (Function &Func : M.functions()) {
+    if (Func.isDeclaration() || !isKernelLDS(&Func))
+      continue;
+
+    for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
+      Function *ith = R.second->getFunction();
+      if (ith) {
+        set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
+      } else {
+        set_union(indirect_map_kernel[&Func],
+                  VariablesReachableThroughFunctionPointer);
+      }
+    }
+  }
+
+  // Verify that we fall into one of 2 cases:
+  //    - All variables are absolute: this is a re-run of the pass
+  //      so we don't have anything to do.
+  //    - No variables are absolute.
+  std::optional<bool> HasAbsoluteGVs;
+  for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
+    for (auto &[Fn, GVs] : Map) {
+      for (auto *GV : GVs) {
+        bool IsAbsolute = GV->isAbsoluteSymbolRef();
+        if (HasAbsoluteGVs.has_value()) {
+          if (*HasAbsoluteGVs != IsAbsolute) {
+            report_fatal_error(
+                "Module cannot mix absolute and non-absolute LDS GVs");
+          }
+        } else
+          HasAbsoluteGVs = IsAbsolute;
+      }
+    }
+  }
+
+  // If we only had absolute GVs, we have nothing to do, return an empty
+  // result.
+  if (HasAbsoluteGVs && *HasAbsoluteGVs)
+    return {FunctionVariableMap(), FunctionVariableMap()};
+
+  return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
+}
+
 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
   Instruction *DefInst = Def->getMemoryInst();
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index e42b27f8e09e14..a199b927a28b6c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -9,6 +9,9 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+
 namespace llvm {
 
 struct Align;
@@ -19,14 +22,35 @@ class LoadInst;
 class MemoryDef;
 class MemorySSA;
 class Value;
+class Function;
+class CallGraph;
+class Module;
 
 namespace AMDGPU {
 
+using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
+using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
+
 Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
 
 bool isDynamicLDS(const GlobalVariable &GV);
 bool isLDSVariableToLower(const GlobalVariable &GV);
 
+struct LDSUsesInfoTy {
+  FunctionVariableMap direct_access;
+  FunctionVariableMap indirect_access;
+};
+
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M);
+
+void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+                            FunctionVariableMap &kernels,
+                            FunctionVariableMap &functions);
+
+bool isKernelLDS(const Function *F);
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M);
+
 /// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
 /// if this is actually a memory update or an artificial clobber to facilitate
 /// ordering constraints.

>From b92b55524dc10cc871b045acbfc4730f25c38a75 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 9 Apr 2024 17:17:03 +0530
Subject: [PATCH 2/4] [AMDGPU] Move removeNoLdsKernelIdFromReachable from
 amdgpu-lower-module-lds pass to AMDGPUMemoryUtils

---
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 43 +++++++++++++++++++
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.h   |  5 +++
 2 files changed, 48 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 26b3819f7fd566..624ef20e41c828 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -241,6 +241,49 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
   return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
 }
 
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+                               StringRef FnAttr) {
+  KernelRoot->removeFnAttr(FnAttr);
+
+  SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
+  if (!Tmp.back())
+    return;
+
+  SmallPtrSet<Function *, 8> Visited;
+  bool SeenUnknownCall = false;
+
+  do {
+    Function *F = Tmp.pop_back_val();
+
+    for (auto &N : *CG[F]) {
+      if (!N.second)
+        continue;
+
+      Function *Callee = N.second->getFunction();
+      if (!Callee) {
+        if (!SeenUnknownCall) {
+          SeenUnknownCall = true;
+
+          // If we see any indirect calls, assume nothing about potential
+          // targets.
+          // TODO: This could be refined to possible LDS global users.
+          for (auto &N : *CG.getExternalCallingNode()) {
+            Function *PotentialCallee = N.second->getFunction();
+            if (!isKernelLDS(PotentialCallee))
+              PotentialCallee->removeFnAttr(FnAttr);
+          }
+
+          continue;
+        }
+      }
+
+      Callee->removeFnAttr(FnAttr);
+      if (Visited.insert(Callee).second)
+        Tmp.push_back(Callee);
+    }
+  } while (!Tmp.empty());
+}
+
 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
   Instruction *DefInst = Def->getMemoryInst();
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index a199b927a28b6c..1df9c28e65888c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -51,6 +51,11 @@ bool isKernelLDS(const Function *F);
 
 LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M);
 
+/// Strip FnAttr attribute from any functions where we may have
+/// introduced its use.
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+                               StringRef FnAttr);
+
 /// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
 /// if this is actually a memory update or an artificial clobber to facilitate
 /// ordering constraints.

>From 973b1d9974da27a5af49a42ad855ba35c5122f5e Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 18 Apr 2024 10:53:08 +0530
Subject: [PATCH 3/4] [AMDGPU] Update removeFnAttrFromReachable

---
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 33 +++++++++----------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 624ef20e41c828..dc68a49b99204d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -245,21 +245,18 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
                                StringRef FnAttr) {
   KernelRoot->removeFnAttr(FnAttr);
 
-  SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
-  if (!Tmp.back())
-    return;
-
+  SmallVector<Function *> WorkList({CG[KernelRoot]->getFunction()});
   SmallPtrSet<Function *, 8> Visited;
   bool SeenUnknownCall = false;
 
-  do {
-    Function *F = Tmp.pop_back_val();
+  while (!WorkList.empty()) {
+    Function *F = WorkList.pop_back_val();
 
-    for (auto &N : *CG[F]) {
-      if (!N.second)
+    for (auto &CallRecord : *CG[F]) {
+      if (!CallRecord.second)
         continue;
 
-      Function *Callee = N.second->getFunction();
+      Function *Callee = CallRecord.second->getFunction();
       if (!Callee) {
         if (!SeenUnknownCall) {
           SeenUnknownCall = true;
@@ -267,21 +264,21 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
           // If we see any indirect calls, assume nothing about potential
           // targets.
           // TODO: This could be refined to possible LDS global users.
-          for (auto &N : *CG.getExternalCallingNode()) {
-            Function *PotentialCallee = N.second->getFunction();
+          for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
+            Function *PotentialCallee =
+                ExternalCallRecord.second->getFunction();
+            assert(PotentialCallee);
             if (!isKernelLDS(PotentialCallee))
               PotentialCallee->removeFnAttr(FnAttr);
           }
-
-          continue;
         }
+      } else {
+        Callee->removeFnAttr(FnAttr);
+        if (Visited.insert(Callee).second)
+          WorkList.push_back(Callee);
       }
-
-      Callee->removeFnAttr(FnAttr);
-      if (Visited.insert(Callee).second)
-        Tmp.push_back(Callee);
     }
-  } while (!Tmp.empty());
+  }
 }
 
 bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {

>From 952591e1f15f862ec5a90f5e90c3bdbea098821a Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 19 Apr 2024 16:33:40 +0530
Subject: [PATCH 4/4] [AMDGPU] Update patch based on review:1

---
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 67 +++++++++----------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index dc68a49b99204d..fdb6978c687926 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -89,23 +89,21 @@ bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
   return convertUsersOfConstantsToInstructions(LDSGlobals);
 }
 
-void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
                             FunctionVariableMap &kernels,
-                            FunctionVariableMap &functions) {
-  // Get uses from the current function, excluding uses by called functions
+                            FunctionVariableMap &Functions) {
+  // Get uses from the current function, excluding uses by called Functions
   // Two output variables to avoid walking the globals list twice
   for (auto &GV : M.globals()) {
-    if (!AMDGPU::isLDSVariableToLower(GV)) {
+    if (!AMDGPU::isLDSVariableToLower(GV))
       continue;
-    }
     for (User *V : GV.users()) {
       if (auto *I = dyn_cast<Instruction>(V)) {
         Function *F = I->getFunction();
-        if (isKernelLDS(F)) {
+        if (isKernelLDS(F))
           kernels[F].insert(&GV);
-        } else {
-          functions[F].insert(&GV);
-        }
+        else
+          Functions[F].insert(&GV);
       }
     }
   }
@@ -124,9 +122,9 @@ bool isKernelLDS(const Function *F) {
 
 LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
 
-  FunctionVariableMap direct_map_kernel;
-  FunctionVariableMap direct_map_function;
-  getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
+  FunctionVariableMap DirectMapKernel;
+  FunctionVariableMap DirectMapFunction;
+  getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
 
   // Collect variables that are used by functions whose address has escaped
   DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
@@ -138,29 +136,28 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
                             /* IgnoreLLVMUsed */ true,
                             /* IgnoreArcAttachedCall */ false)) {
         set_union(VariablesReachableThroughFunctionPointer,
-                  direct_map_function[&F]);
+                  DirectMapFunction[&F]);
       }
   }
 
-  auto functionMakesUnknownCall = [&](const Function *F) -> bool {
+  auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
     assert(!F->isDeclaration());
     for (const CallGraphNode::CallRecord &R : *CG[F]) {
-      if (!R.second->getFunction()) {
+      if (!R.second->getFunction())
         return true;
-      }
     }
     return false;
   };
 
   // Work out which variables are reachable through function calls
-  FunctionVariableMap transitive_map_function = direct_map_function;
+  FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
 
   // If the function makes any unknown call, assume the worst case that it can
   // access all variables accessed by functions whose address escaped
   for (Function &F : M.functions()) {
-    if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
+    if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
       if (!isKernelLDS(&F)) {
-        set_union(transitive_map_function[&F],
+        set_union(TransitiveMapFunction[&F],
                   VariablesReachableThroughFunctionPointer);
       }
     }
@@ -173,41 +170,41 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
       continue;
 
     DenseSet<Function *> seen; // catches cycles
-    SmallVector<Function *, 4> wip{&Func};
+    SmallVector<Function *, 4> wip = {&Func};
 
     while (!wip.empty()) {
       Function *F = wip.pop_back_val();
 
       // Can accelerate this by referring to transitive map for functions that
       // have already been computed, with more care than this
-      set_union(transitive_map_function[&Func], direct_map_function[F]);
+      set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
 
       for (const CallGraphNode::CallRecord &R : *CG[F]) {
-        Function *ith = R.second->getFunction();
-        if (ith) {
-          if (!seen.contains(ith)) {
-            seen.insert(ith);
-            wip.push_back(ith);
+        Function *Ith = R.second->getFunction();
+        if (Ith) {
+          if (!seen.contains(Ith)) {
+            seen.insert(Ith);
+            wip.push_back(Ith);
           }
         }
       }
     }
   }
 
-  // direct_map_kernel lists which variables are used by the kernel
+  // DirectMapKernel lists which variables are used by the kernel
   // find the variables which are used through a function call
-  FunctionVariableMap indirect_map_kernel;
+  FunctionVariableMap IndirectMapKernel;
 
   for (Function &Func : M.functions()) {
     if (Func.isDeclaration() || !isKernelLDS(&Func))
       continue;
 
     for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
-      Function *ith = R.second->getFunction();
-      if (ith) {
-        set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
+      Function *Ith = R.second->getFunction();
+      if (Ith) {
+        set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
       } else {
-        set_union(indirect_map_kernel[&Func],
+        set_union(IndirectMapKernel[&Func],
                   VariablesReachableThroughFunctionPointer);
       }
     }
@@ -218,7 +215,7 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
   //      so we don't have anything to do.
   //    - No variables are absolute.
   std::optional<bool> HasAbsoluteGVs;
-  for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
+  for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
     for (auto &[Fn, GVs] : Map) {
       for (auto *GV : GVs) {
         bool IsAbsolute = GV->isAbsoluteSymbolRef();
@@ -238,14 +235,14 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
   if (HasAbsoluteGVs && *HasAbsoluteGVs)
     return {FunctionVariableMap(), FunctionVariableMap()};
 
-  return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
+  return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};
 }
 
 void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
                                StringRef FnAttr) {
   KernelRoot->removeFnAttr(FnAttr);
 
-  SmallVector<Function *> WorkList({CG[KernelRoot]->getFunction()});
+  SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
   SmallPtrSet<Function *, 8> Visited;
   bool SeenUnknownCall = false;
 



More information about the llvm-commits mailing list