[llvm] d70e7ea - [amdgpu][nfc] Extract more functions in LowerModuleLDS, mark more methods static

Tue Mar 21 18:25:45 PDT 2023

Author: Jon Chesterfield
Date: 2023-03-22T01:25:28Z
New Revision: d70e7ea0d1e556037ef5b6874734cf51396b033b

URL: https://github.com/llvm/llvm-project/commit/d70e7ea0d1e556037ef5b6874734cf51396b033b
DIFF: https://github.com/llvm/llvm-project/commit/d70e7ea0d1e556037ef5b6874734cf51396b033b.diff

LOG: [amdgpu][nfc] Extract more functions in LowerModuleLDS, mark more methods static

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 455d76b0cecd..274597cb5607 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -631,7 +631,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
     return tableKernelIndexCache[F];
   }
 
-  std::vector<Function *> assignLDSKernelIDToEachKernel(
+  static std::vector<Function *> assignLDSKernelIDToEachKernel(
       Module *M, DenseSet<Function *> const &KernelsThatAllocateTableLDS) {
     // Associate kernels in the set with an arbirary but reproducible order and
     // annotate them with that order in metadata. This metadata is recognised by
@@ -680,168 +680,157 @@ class AMDGPULowerModuleLDS : public ModulePass {
     return OrderedKernels;
   }
 
-  bool runOnModule(Module &M) override {
-    LLVMContext &Ctx = M.getContext();
-    CallGraph CG = CallGraph(M);
-    bool Changed = superAlignLDSGlobals(M);
+  static void partitionVariablesIntoIndirectStrategies(
+      Module &M, LDSUsesInfoTy const &LDSUsesInfo,
+      VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly,
+      DenseSet<GlobalVariable *> &ModuleScopeVariables,
+      DenseSet<GlobalVariable *> &TableLookupVariables,
+      DenseSet<GlobalVariable *> &KernelAccessVariables) {
+
+    GlobalVariable *HybridModuleRoot =
+        LoweringKindLoc != LoweringKind::hybrid
+            ? nullptr
+            : chooseBestVariableForModuleStrategy(
+                  M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
+
+    DenseSet<Function *> const EmptySet;
+    DenseSet<Function *> const &HybridModuleRootKernels =
+        HybridModuleRoot
+            ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
+            : EmptySet;
+
+    for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
+      // Each iteration of this loop assigns exactly one global variable to
+      // exactly one of the implementation strategies.
 
-    Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
+      GlobalVariable *GV = K.first;
+      assert(AMDGPU::isLDSVariableToLower(*GV));
+      assert(K.second.size() != 0);
 
-    Changed = true; // todo: narrow this down
+      switch (LoweringKindLoc) {
+      case LoweringKind::module:
+        ModuleScopeVariables.insert(GV);
+        break;
 
-    // For each kernel, what variables does it access directly or through
-    // callees
-    LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
-
-    // For each variable accessed through callees, which kernels access it
-    VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
-    for (auto &K : LDSUsesInfo.indirect_access) {
-      Function *F = K.first;
-      assert(isKernelLDS(F));
-      for (GlobalVariable *GV : K.second) {
-        LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
-      }
-    }
+      case LoweringKind::table:
+        TableLookupVariables.insert(GV);
+        break;
 
-    // Partition variables accessed indirectly into the 
diff erent strategies
-    DenseSet<GlobalVariable *> ModuleScopeVariables;
-    DenseSet<GlobalVariable *> TableLookupVariables;
-    DenseSet<GlobalVariable *> KernelAccessVariables;
+      case LoweringKind::kernel:
+        if (K.second.size() == 1) {
+          KernelAccessVariables.insert(GV);
+        } else {
+          report_fatal_error(
+              "cannot lower LDS '" + GV->getName() +
+              "' to kernel access as it is reachable from multiple kernels");
+        }
+        break;
 
-    {
-      GlobalVariable *HybridModuleRoot =
-          LoweringKindLoc != LoweringKind::hybrid
-              ? nullptr
-              : chooseBestVariableForModuleStrategy(
-                    M.getDataLayout(),
-                    LDSToKernelsThatNeedToAccessItIndirectly);
-
-      DenseSet<Function *> const EmptySet;
-      DenseSet<Function *> const &HybridModuleRootKernels =
-          HybridModuleRoot
-              ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
-              : EmptySet;
-
-      for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
-        // Each iteration of this loop assigns exactly one global variable to
-        // exactly one of the implementation strategies.
-
-        GlobalVariable *GV = K.first;
-        assert(AMDGPU::isLDSVariableToLower(*GV));
-        assert(K.second.size() != 0);
-
-        switch (LoweringKindLoc) {
-        case LoweringKind::module:
+      case LoweringKind::hybrid: {
+        if (GV == HybridModuleRoot) {
+          assert(K.second.size() != 1);
           ModuleScopeVariables.insert(GV);
-          break;
-
-        case LoweringKind::table:
+        } else if (K.second.size() == 1) {
+          KernelAccessVariables.insert(GV);
+        } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
+          ModuleScopeVariables.insert(GV);
+        } else {
           TableLookupVariables.insert(GV);
-          break;
-
-        case LoweringKind::kernel:
-          if (K.second.size() == 1) {
-            KernelAccessVariables.insert(GV);
-          } else {
-            report_fatal_error(
-                "cannot lower LDS '" + GV->getName() +
-                "' to kernel access as it is reachable from multiple kernels");
-          }
-          break;
-
-        case LoweringKind::hybrid: {
-          if (GV == HybridModuleRoot) {
-            assert(K.second.size() != 1);
-            ModuleScopeVariables.insert(GV);
-          } else if (K.second.size() == 1) {
-            KernelAccessVariables.insert(GV);
-          } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
-            ModuleScopeVariables.insert(GV);
-          } else {
-            TableLookupVariables.insert(GV);
-          }
-          break;
-        }
         }
+        break;
       }
+      }
+    }
 
-      // All LDS variables accessed indirectly have now been partitioned into
-      // the distinct lowering strategies.
-      assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
-                 KernelAccessVariables.size() ==
-             LDSToKernelsThatNeedToAccessItIndirectly.size());
+    // All LDS variables accessed indirectly have now been partitioned into
+    // the distinct lowering strategies.
+    assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
+               KernelAccessVariables.size() ==
+           LDSToKernelsThatNeedToAccessItIndirectly.size());
+  }
+
+  static GlobalVariable *lowerModuleScopeStructVariables(
+      Module &M, DenseSet<GlobalVariable *> const &ModuleScopeVariables,
+      DenseSet<Function *> const &KernelsThatAllocateModuleLDS) {
+    // Create a struct to hold the ModuleScopeVariables
+    // Replace all uses of those variables from non-kernel functions with the
+    // new struct instance Replace only the uses from kernel functions that will
+    // allocate this instance. That is a space optimisation - kernels that use a
+    // subset of the module scope struct and do not need to allocate it for
+    // indirect calls will only allocate the subset they use (they do so as part
+    // of the per-kernel lowering).
+    if (ModuleScopeVariables.empty()) {
+      return nullptr;
     }
 
-    // If the kernel accesses a variable that is going to be stored in the
-    // module instance through a call then that kernel needs to allocate the
-    // module instance
-    DenseSet<Function *> KernelsThatAllocateModuleLDS =
-        kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
-                                                        ModuleScopeVariables);
-    DenseSet<Function *> KernelsThatAllocateTableLDS =
-        kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
-                                                        TableLookupVariables);
+    LLVMContext &Ctx = M.getContext();
 
-    GlobalVariable *MaybeModuleScopeStruct = nullptr;
-    if (!ModuleScopeVariables.empty()) {
-      LDSVariableReplacement ModuleScopeReplacement =
-          createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
-                                       ModuleScopeVariables);
-      MaybeModuleScopeStruct = ModuleScopeReplacement.SGV;
-      appendToCompilerUsed(M,
-                           {static_cast<GlobalValue *>(
-                               ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-                                   cast<Constant>(ModuleScopeReplacement.SGV),
-                                   Type::getInt8PtrTy(Ctx)))});
-
-      // module.lds will be allocated at zero in any kernel that allocates it
-      recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
-
-      // historic
-      removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
-
-      // Replace all uses of module scope variable from non-kernel functions
-      replaceLDSVariablesWithStruct(
-          M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
-            Instruction *I = dyn_cast<Instruction>(U.getUser());
-            if (!I) {
-              return false;
-            }
-            Function *F = I->getFunction();
-            return !isKernelLDS(F);
-          });
+    LDSVariableReplacement ModuleScopeReplacement =
+        createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
+                                     ModuleScopeVariables);
 
-      // Replace uses of module scope variable from kernel functions that
-      // allocate the module scope variable, otherwise leave them unchanged
-      // Record on each kernel whether the module scope global is used by it
+    appendToCompilerUsed(M, {static_cast<GlobalValue *>(
+                                ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+                                    cast<Constant>(ModuleScopeReplacement.SGV),
+                                    Type::getInt8PtrTy(Ctx)))});
 
-      LLVMContext &Ctx = M.getContext();
-      IRBuilder<> Builder(Ctx);
+    // module.lds will be allocated at zero in any kernel that allocates it
+    recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
 
-      for (Function &Func : M.functions()) {
-        if (Func.isDeclaration() || !isKernelLDS(&Func))
-          continue;
+    // historic
+    removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
+
+    // Replace all uses of module scope variable from non-kernel functions
+    replaceLDSVariablesWithStruct(
+        M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
+          Instruction *I = dyn_cast<Instruction>(U.getUser());
+          if (!I) {
+            return false;
+          }
+          Function *F = I->getFunction();
+          return !isKernelLDS(F);
+        });
 
-        if (KernelsThatAllocateModuleLDS.contains(&Func)) {
-          replaceLDSVariablesWithStruct(
-              M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
-                Instruction *I = dyn_cast<Instruction>(U.getUser());
-                if (!I) {
-                  return false;
-                }
-                Function *F = I->getFunction();
-                return F == &Func;
-              });
+    // Replace uses of module scope variable from kernel functions that
+    // allocate the module scope variable, otherwise leave them unchanged
+    // Record on each kernel whether the module scope global is used by it
 
-          markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
+    IRBuilder<> Builder(Ctx);
 
-        } else {
-          Func.addFnAttr("amdgpu-elide-module-lds");
-        }
+    for (Function &Func : M.functions()) {
+      if (Func.isDeclaration() || !isKernelLDS(&Func))
+        continue;
+
+      if (KernelsThatAllocateModuleLDS.contains(&Func)) {
+        replaceLDSVariablesWithStruct(
+            M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
+              Instruction *I = dyn_cast<Instruction>(U.getUser());
+              if (!I) {
+                return false;
+              }
+              Function *F = I->getFunction();
+              return F == &Func;
+            });
+
+        markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
+
+      } else {
+        Func.addFnAttr("amdgpu-elide-module-lds");
       }
     }
 
-    // Create a struct for each kernel for the non-module-scope variables
+    return ModuleScopeReplacement.SGV;
+  }
+
+  static DenseMap<Function *, LDSVariableReplacement>
+  lowerKernelScopeStructVariables(
+      Module &M, LDSUsesInfoTy &LDSUsesInfo,
+      DenseSet<GlobalVariable *> const &ModuleScopeVariables,
+      DenseSet<Function *> const &KernelsThatAllocateModuleLDS,
+      GlobalVariable *MaybeModuleScopeStruct) {
+
+    // Create a struct for each kernel for the non-module-scope variables.
+
     DenseMap<Function *, LDSVariableReplacement> KernelToReplacement;
     for (Function &Func : M.functions()) {
       if (Func.isDeclaration() || !isKernelLDS(&Func))
@@ -927,6 +916,55 @@ class AMDGPULowerModuleLDS : public ModulePass {
             return I && I->getFunction() == &Func;
           });
     }
+    return KernelToReplacement;
+  }
+
+  bool runOnModule(Module &M) override {
+    CallGraph CG = CallGraph(M);
+    bool Changed = superAlignLDSGlobals(M);
+
+    Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
+
+    Changed = true; // todo: narrow this down
+
+    // For each kernel, what variables does it access directly or through
+    // callees
+    LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
+
+    // For each variable accessed through callees, which kernels access it
+    VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
+    for (auto &K : LDSUsesInfo.indirect_access) {
+      Function *F = K.first;
+      assert(isKernelLDS(F));
+      for (GlobalVariable *GV : K.second) {
+        LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
+      }
+    }
+
+    DenseSet<GlobalVariable *> ModuleScopeVariables;
+    DenseSet<GlobalVariable *> TableLookupVariables;
+    DenseSet<GlobalVariable *> KernelAccessVariables;
+    partitionVariablesIntoIndirectStrategies(
+        M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
+        ModuleScopeVariables, TableLookupVariables, KernelAccessVariables);
+
+    // If the kernel accesses a variable that is going to be stored in the
+    // module instance through a call then that kernel needs to allocate the
+    // module instance
+    DenseSet<Function *> KernelsThatAllocateModuleLDS =
+        kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+                                                        ModuleScopeVariables);
+    DenseSet<Function *> KernelsThatAllocateTableLDS =
+        kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+                                                        TableLookupVariables);
+
+    GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
+        M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
+
+    DenseMap<Function *, LDSVariableReplacement> KernelToReplacement =
+        lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
+                                        KernelsThatAllocateModuleLDS,
+                                        MaybeModuleScopeStruct);
 
     // Lower zero cost accesses to the kernel instances just created
     for (auto &GV : KernelAccessVariables) {
@@ -1132,7 +1170,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
   }
 
   template <typename PredicateTy>
-  void replaceLDSVariablesWithStruct(
+  static void replaceLDSVariablesWithStruct(
       Module &M, DenseSet<GlobalVariable *> const &LDSVarsToTransformArg,
       LDSVariableReplacement Replacement, PredicateTy Predicate) {
     LLVMContext &Ctx = M.getContext();
@@ -1190,9 +1228,9 @@ class AMDGPULowerModuleLDS : public ModulePass {
     }
   }
 
-  void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
-                                MDNode *AliasScope, MDNode *NoAlias,
-                                unsigned MaxDepth = 5) {
+  static void refineUsesAlignmentAndAA(Value *Ptr, Align A,
+                                       const DataLayout &DL, MDNode *AliasScope,
+                                       MDNode *NoAlias, unsigned MaxDepth = 5) {
     if (!MaxDepth || (A == 1 && !AliasScope))
       return;