[llvm] f0e3b39 - [amdgpu][nfc] Move non-shared code out of LDSUtils

Jon Chesterfield via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 8 08:23:12 PST 2021


Author: Jon Chesterfield
Date: 2021-12-08T16:23:03Z
New Revision: f0e3b39a5d0168b30bae84703bbd23c820f41a83

URL: https://github.com/llvm/llvm-project/commit/f0e3b39a5d0168b30bae84703bbd23c820f41a83
DIFF: https://github.com/llvm/llvm-project/commit/f0e3b39a5d0168b30bae84703bbd23c820f41a83.diff

LOG: [amdgpu][nfc] Move non-shared code out of LDSUtils

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
    llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 12d6d35a69178..5310c542d3569 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -24,13 +24,6 @@
 // A possible future refinement is to specialise the structure per-kernel, so
 // that fields can be elided based on more expensive analysis.
 //
-// NOTE: Since this pass will directly pack LDS (assume large LDS) into a struct
-// type which would cause allocating huge memory for struct instance within
-// every kernel. Hence, before running this pass, it is advisable to run the
-// pass "amdgpu-replace-lds-use-with-pointer" which will replace LDS uses within
-// non-kernel functions by pointers and thereby minimizes the unnecessary per
-// kernel allocation of LDS memory.
-//
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
@@ -62,6 +55,20 @@ static cl::opt<bool> SuperAlignLDSGlobals(
 
 namespace {
 
+SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
+  SmallPtrSet<GlobalValue *, 32> UsedList;
+
+  SmallVector<GlobalValue *, 32> TmpVec;
+  collectUsedGlobalVariables(M, TmpVec, true);
+  UsedList.insert(TmpVec.begin(), TmpVec.end());
+
+  TmpVec.clear();
+  collectUsedGlobalVariables(M, TmpVec, false);
+  UsedList.insert(TmpVec.begin(), TmpVec.end());
+
+  return UsedList;
+}
+
 class AMDGPULowerModuleLDS : public ModulePass {
 
   static void removeFromUsedList(Module &M, StringRef Name,
@@ -158,7 +165,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
   }
 
   bool runOnModule(Module &M) override {
-    UsedList = AMDGPU::getUsedList(M);
+    UsedList = getUsedList(M);
 
     bool Changed = processUsedLDS(M);
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index d55bf3917e9c1..d251dd46f1f87 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -87,6 +87,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -110,6 +111,18 @@ using namespace llvm;
 
 namespace {
 
+namespace AMDGPU {
+/// Collect all the instructions where user \p U belongs to. \p U could be
+/// instruction itself or it could be a constant expression which is used within
+/// an instruction. If \p CollectKernelInsts is true, collect instructions only
+/// from kernels, otherwise collect instructions only from non-kernel functions.
+DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
+getFunctionToInstsMap(User *U, bool CollectKernelInsts);
+
+SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
+
+} // namespace AMDGPU
+
 class ReplaceLDSUseImpl {
   Module &M;
   LLVMContext &Ctx;
@@ -127,7 +140,8 @@ class ReplaceLDSUseImpl {
   // Collect LDS which requires their uses to be replaced by pointer.
   std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() {
     // Collect LDS which requires module lowering.
-    std::vector<GlobalVariable *> LDSGlobals = AMDGPU::findVariablesToLower(M);
+    std::vector<GlobalVariable *> LDSGlobals =
+        llvm::AMDGPU::findVariablesToLower(M);
 
     // Remove LDS which don't qualify for replacement.
     llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
@@ -172,7 +186,7 @@ class ReplaceLDSUseImpl {
         AMDGPUAS::LOCAL_ADDRESS);
 
     LDSPointer->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-    LDSPointer->setAlignment(AMDGPU::getAlign(DL, LDSPointer));
+    LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer));
 
     // Mark that an associated LDS pointer is created for LDS.
     LDSToPointer[GV] = LDSPointer;
@@ -378,6 +392,184 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
   return true;
 }
 
+namespace AMDGPU {
+
+// An helper class for collecting all reachable callees for each kernel defined
+// within the module.
+class CollectReachableCallees {
+  Module &M;
+  CallGraph CG;
+  SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
+
+  // Collect all address taken functions within the module.
+  void collectAddressTakenFunctions() {
+    auto *ECNode = CG.getExternalCallingNode();
+
+    for (auto GI = ECNode->begin(), GE = ECNode->end(); GI != GE; ++GI) {
+      auto *CGN = GI->second;
+      auto *F = CGN->getFunction();
+      if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F))
+        continue;
+      AddressTakenFunctions.insert(CGN);
+    }
+  }
+
+  // For given kernel, collect all its reachable non-kernel functions.
+  SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
+    SmallPtrSet<Function *, 8> ReachableCallees;
+
+    // Call graph node which represents this kernel.
+    auto *KCGN = CG[K];
+
+    // Go through all call graph nodes reachable from the node representing this
+    // kernel, visit all their call sites, if the call site is direct, add
+    // corresponding callee to reachable callee set, if it is indirect, resolve
+    // the indirect call site to potential reachable callees, add them to
+    // reachable callee set, and repeat the process for the newly added
+    // potential callee nodes.
+    //
+    // FIXME: Need to handle bit-casted function pointers.
+    //
+    SmallVector<CallGraphNode *, 8> CGNStack(df_begin(KCGN), df_end(KCGN));
+    SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
+    while (!CGNStack.empty()) {
+      auto *CGN = CGNStack.pop_back_val();
+
+      if (!VisitedCGNodes.insert(CGN).second)
+        continue;
+
+      // Ignore call graph node which does not have associated function or
+      // associated function is not a definition.
+      if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
+        continue;
+
+      for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) {
+        auto *RCB = cast<CallBase>(GI->first.getValue());
+        auto *RCGN = GI->second;
+
+        if (auto *DCallee = RCGN->getFunction()) {
+          ReachableCallees.insert(DCallee);
+        } else if (RCB->isIndirectCall()) {
+          auto *RCBFTy = RCB->getFunctionType();
+          for (auto *ACGN : AddressTakenFunctions) {
+            auto *ACallee = ACGN->getFunction();
+            if (ACallee->getFunctionType() == RCBFTy) {
+              ReachableCallees.insert(ACallee);
+              CGNStack.append(df_begin(ACGN), df_end(ACGN));
+            }
+          }
+        }
+      }
+    }
+
+    return ReachableCallees;
+  }
+
+public:
+  explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
+    // Collect address taken functions.
+    collectAddressTakenFunctions();
+  }
+
+  void collectReachableCallees(
+      DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
+    // Collect reachable callee set for each kernel defined in the module.
+    for (Function &F : M.functions()) {
+      if (!llvm::AMDGPU::isKernelCC(&F))
+        continue;
+      Function *K = &F;
+      KernelToCallees[K] = collectReachableCallees(K);
+    }
+  }
+};
+
+/// Collect reachable callees for each kernel defined in the module \p M and
+/// return collected callees at \p KernelToCallees.
+void collectReachableCallees(
+    Module &M,
+    DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
+  CollectReachableCallees CRC{M};
+  CRC.collectReachableCallees(KernelToCallees);
+}
+
+/// For the given LDS global \p GV, visit all its users and collect all
+/// non-kernel functions within which \p GV is used and return collected list of
+/// such non-kernel functions.
+SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
+  SmallPtrSet<Function *, 8> LDSAccessors;
+  SmallVector<User *, 8> UserStack(GV->users());
+  SmallPtrSet<User *, 8> VisitedUsers;
+
+  while (!UserStack.empty()) {
+    auto *U = UserStack.pop_back_val();
+
+    // `U` is already visited? continue to next one.
+    if (!VisitedUsers.insert(U).second)
+      continue;
+
+    // `U` is a global variable which is initialized with LDS. Ignore LDS.
+    if (isa<GlobalValue>(U))
+      return SmallPtrSet<Function *, 8>();
+
+    // Recursively explore constant users.
+    if (isa<Constant>(U)) {
+      append_range(UserStack, U->users());
+      continue;
+    }
+
+    // `U` should be an instruction, if it belongs to a non-kernel function F,
+    // then collect F.
+    Function *F = cast<Instruction>(U)->getFunction();
+    if (!llvm::AMDGPU::isKernelCC(F))
+      LDSAccessors.insert(F);
+  }
+
+  return LDSAccessors;
+}
+
+DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
+getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
+  DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts;
+  SmallVector<User *, 8> UserStack;
+  SmallPtrSet<User *, 8> VisitedUsers;
+
+  UserStack.push_back(U);
+
+  while (!UserStack.empty()) {
+    auto *UU = UserStack.pop_back_val();
+
+    if (!VisitedUsers.insert(UU).second)
+      continue;
+
+    if (isa<GlobalValue>(UU))
+      continue;
+
+    if (isa<Constant>(UU)) {
+      append_range(UserStack, UU->users());
+      continue;
+    }
+
+    auto *I = cast<Instruction>(UU);
+    Function *F = I->getFunction();
+    if (CollectKernelInsts) {
+      if (!llvm::AMDGPU::isKernelCC(F)) {
+        continue;
+      }
+    } else {
+      if (llvm::AMDGPU::isKernelCC(F)) {
+        continue;
+      }
+    }
+
+    FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>()));
+    FunctionToInsts[F].insert(I);
+  }
+
+  return FunctionToInsts;
+}
+
+} // namespace AMDGPU
+
 // Entry-point function which interface ReplaceLDSUseImpl with outside of the
 // class.
 bool ReplaceLDSUseImpl::replaceLDSUse() {

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index 2e4d83fbbc39c..4040dc5d8b2fd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -15,7 +15,6 @@
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/ReplaceConstant.h"
 
@@ -25,175 +24,6 @@ namespace llvm {
 
 namespace AMDGPU {
 
-// An helper class for collecting all reachable callees for each kernel defined
-// within the module.
-class CollectReachableCallees {
-  Module &M;
-  CallGraph CG;
-  SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
-
-  // Collect all address taken functions within the module.
-  void collectAddressTakenFunctions() {
-    auto *ECNode = CG.getExternalCallingNode();
-
-    for (auto GI = ECNode->begin(), GE = ECNode->end(); GI != GE; ++GI) {
-      auto *CGN = GI->second;
-      auto *F = CGN->getFunction();
-      if (!F || F->isDeclaration() || AMDGPU::isKernelCC(F))
-        continue;
-      AddressTakenFunctions.insert(CGN);
-    }
-  }
-
-  // For given kernel, collect all its reachable non-kernel functions.
-  SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
-    SmallPtrSet<Function *, 8> ReachableCallees;
-
-    // Call graph node which represents this kernel.
-    auto *KCGN = CG[K];
-
-    // Go through all call graph nodes reachable from the node representing this
-    // kernel, visit all their call sites, if the call site is direct, add
-    // corresponding callee to reachable callee set, if it is indirect, resolve
-    // the indirect call site to potential reachable callees, add them to
-    // reachable callee set, and repeat the process for the newly added
-    // potential callee nodes.
-    //
-    // FIXME: Need to handle bit-casted function pointers.
-    //
-    SmallVector<CallGraphNode *, 8> CGNStack(df_begin(KCGN), df_end(KCGN));
-    SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
-    while (!CGNStack.empty()) {
-      auto *CGN = CGNStack.pop_back_val();
-
-      if (!VisitedCGNodes.insert(CGN).second)
-        continue;
-
-      // Ignore call graph node which does not have associated function or
-      // associated function is not a definition.
-      if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
-        continue;
-
-      for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) {
-        auto *RCB = cast<CallBase>(GI->first.getValue());
-        auto *RCGN = GI->second;
-
-        if (auto *DCallee = RCGN->getFunction()) {
-          ReachableCallees.insert(DCallee);
-        } else if (RCB->isIndirectCall()) {
-          auto *RCBFTy = RCB->getFunctionType();
-          for (auto *ACGN : AddressTakenFunctions) {
-            auto *ACallee = ACGN->getFunction();
-            if (ACallee->getFunctionType() == RCBFTy) {
-              ReachableCallees.insert(ACallee);
-              CGNStack.append(df_begin(ACGN), df_end(ACGN));
-            }
-          }
-        }
-      }
-    }
-
-    return ReachableCallees;
-  }
-
-public:
-  explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
-    // Collect address taken functions.
-    collectAddressTakenFunctions();
-  }
-
-  void collectReachableCallees(
-      DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
-    // Collect reachable callee set for each kernel defined in the module.
-    for (Function &F : M.functions()) {
-      if (!AMDGPU::isKernelCC(&F))
-        continue;
-      Function *K = &F;
-      KernelToCallees[K] = collectReachableCallees(K);
-    }
-  }
-};
-
-void collectReachableCallees(
-    Module &M,
-    DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
-  CollectReachableCallees CRC{M};
-  CRC.collectReachableCallees(KernelToCallees);
-}
-
-SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
-  SmallPtrSet<Function *, 8> LDSAccessors;
-  SmallVector<User *, 8> UserStack(GV->users());
-  SmallPtrSet<User *, 8> VisitedUsers;
-
-  while (!UserStack.empty()) {
-    auto *U = UserStack.pop_back_val();
-
-    // `U` is already visited? continue to next one.
-    if (!VisitedUsers.insert(U).second)
-      continue;
-
-    // `U` is a global variable which is initialized with LDS. Ignore LDS.
-    if (isa<GlobalValue>(U))
-      return SmallPtrSet<Function *, 8>();
-
-    // Recursively explore constant users.
-    if (isa<Constant>(U)) {
-      append_range(UserStack, U->users());
-      continue;
-    }
-
-    // `U` should be an instruction, if it belongs to a non-kernel function F,
-    // then collect F.
-    Function *F = cast<Instruction>(U)->getFunction();
-    if (!AMDGPU::isKernelCC(F))
-      LDSAccessors.insert(F);
-  }
-
-  return LDSAccessors;
-}
-
-DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
-getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
-  DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts;
-  SmallVector<User *, 8> UserStack;
-  SmallPtrSet<User *, 8> VisitedUsers;
-
-  UserStack.push_back(U);
-
-  while (!UserStack.empty()) {
-    auto *UU = UserStack.pop_back_val();
-
-    if (!VisitedUsers.insert(UU).second)
-      continue;
-
-    if (isa<GlobalValue>(UU))
-      continue;
-
-    if (isa<Constant>(UU)) {
-      append_range(UserStack, UU->users());
-      continue;
-    }
-
-    auto *I = cast<Instruction>(UU);
-    Function *F = I->getFunction();
-    if (CollectKernelInsts) {
-      if (!AMDGPU::isKernelCC(F)) {
-        continue;
-      }
-    } else {
-      if (AMDGPU::isKernelCC(F)) {
-        continue;
-      }
-    }
-
-    FunctionToInsts.insert(std::make_pair(F, SmallPtrSet<Instruction *, 8>()));
-    FunctionToInsts[F].insert(I);
-  }
-
-  return FunctionToInsts;
-}
-
 bool isKernelCC(const Function *Func) {
   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
 }
@@ -251,7 +81,12 @@ bool hasUserInstruction(const GlobalValue *GV) {
   return false;
 }
 
-bool shouldLowerLDSToStruct(const GlobalVariable &GV, const Function *F) {
+/// \returns true if an LDS global requires lowering to a module LDS structure
+/// if \p F is not given. If \p F is given it must be a kernel and function
+/// \returns true if an LDS global is directly used from that kernel and it
+/// is safe to replace its uses with a kernel LDS structure member.
+static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
+                                   const Function *F) {
   // We are not interested in kernel LDS lowering for module LDS itself.
   if (F && GV.getName() == "llvm.amdgcn.module.lds")
     return false;
@@ -324,7 +159,7 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M,
       continue;
     }
     if (!isa<UndefValue>(GV.getInitializer())) {
-      // Initializers are unimplemented for local address space.
+      // Initializers are unimplemented for LDS address space.
       // Leave such variables in place for consistent error reporting.
       continue;
     }
@@ -342,20 +177,6 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M,
   return LocalVars;
 }
 
-SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
-  SmallPtrSet<GlobalValue *, 32> UsedList;
-
-  SmallVector<GlobalValue *, 32> TmpVec;
-  collectUsedGlobalVariables(M, TmpVec, true);
-  UsedList.insert(TmpVec.begin(), TmpVec.end());
-
-  TmpVec.clear();
-  collectUsedGlobalVariables(M, TmpVec, false);
-  UsedList.insert(TmpVec.begin(), TmpVec.end());
-
-  return UsedList;
-}
-
 } // end namespace AMDGPU
 
 } // end namespace llvm

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index d1c9229bc3362..ddcaeed6809c4 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -22,24 +22,6 @@ class ConstantExpr;
 
 namespace AMDGPU {
 
-/// Collect reachable callees for each kernel defined in the module \p M and
-/// return collected callees at \p KernelToCallees.
-void collectReachableCallees(
-    Module &M,
-    DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees);
-
-/// For the given LDS global \p GV, visit all its users and collect all
-/// non-kernel functions within which \p GV is used and return collected list of
-/// such non-kernel functions.
-SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
-
-/// Collect all the instructions where user \p U belongs to. \p U could be
-/// instruction itself or it could be a constant expression which is used within
-/// an instruction. If \p CollectKernelInsts is true, collect instructions only
-/// from kernels, otherwise collect instructions only from non-kernel functions.
-DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
-getFunctionToInstsMap(User *U, bool CollectKernelInsts);
-
 bool isKernelCC(const Function *Func);
 
 Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
@@ -48,18 +30,9 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
 /// as an use within some instruction (either from kernel or from non-kernel).
 bool hasUserInstruction(const GlobalValue *GV);
 
-/// \returns true if an LDS global requires lowering to a module LDS structure
-/// if \p F is not given. If \p F is given it must be a kernel and function
-/// \returns true if an LDS global is directly used from that kernel and it
-/// is safe to replace its uses with a kernel LDS structure member.
-bool shouldLowerLDSToStruct(const GlobalVariable &GV,
-                            const Function *F = nullptr);
-
 std::vector<GlobalVariable *> findVariablesToLower(Module &M,
                                                    const Function *F = nullptr);
 
-SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M);
-
 /// Replace all uses of constant \p C with instructions in \p F.
 void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
 } // end namespace AMDGPU


        


More information about the llvm-commits mailing list