[llvm] d70e7ea - [amdgpu][nfc] Extract more functions in LowerModuleLDS, mark more methods static
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 21 18:25:45 PDT 2023
Author: Jon Chesterfield
Date: 2023-03-22T01:25:28Z
New Revision: d70e7ea0d1e556037ef5b6874734cf51396b033b
URL: https://github.com/llvm/llvm-project/commit/d70e7ea0d1e556037ef5b6874734cf51396b033b
DIFF: https://github.com/llvm/llvm-project/commit/d70e7ea0d1e556037ef5b6874734cf51396b033b.diff
LOG: [amdgpu][nfc] Extract more functions in LowerModuleLDS, mark more methods static
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 455d76b0cecd..274597cb5607 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -631,7 +631,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
return tableKernelIndexCache[F];
}
- std::vector<Function *> assignLDSKernelIDToEachKernel(
+ static std::vector<Function *> assignLDSKernelIDToEachKernel(
Module *M, DenseSet<Function *> const &KernelsThatAllocateTableLDS) {
// Associate kernels in the set with an arbirary but reproducible order and
// annotate them with that order in metadata. This metadata is recognised by
@@ -680,168 +680,157 @@ class AMDGPULowerModuleLDS : public ModulePass {
return OrderedKernels;
}
- bool runOnModule(Module &M) override {
- LLVMContext &Ctx = M.getContext();
- CallGraph CG = CallGraph(M);
- bool Changed = superAlignLDSGlobals(M);
+ static void partitionVariablesIntoIndirectStrategies(
+ Module &M, LDSUsesInfoTy const &LDSUsesInfo,
+ VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly,
+ DenseSet<GlobalVariable *> &ModuleScopeVariables,
+ DenseSet<GlobalVariable *> &TableLookupVariables,
+ DenseSet<GlobalVariable *> &KernelAccessVariables) {
+
+ GlobalVariable *HybridModuleRoot =
+ LoweringKindLoc != LoweringKind::hybrid
+ ? nullptr
+ : chooseBestVariableForModuleStrategy(
+ M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
+
+ DenseSet<Function *> const EmptySet;
+ DenseSet<Function *> const &HybridModuleRootKernels =
+ HybridModuleRoot
+ ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
+ : EmptySet;
+
+ for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
+ // Each iteration of this loop assigns exactly one global variable to
+ // exactly one of the implementation strategies.
- Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
+ GlobalVariable *GV = K.first;
+ assert(AMDGPU::isLDSVariableToLower(*GV));
+ assert(K.second.size() != 0);
- Changed = true; // todo: narrow this down
+ switch (LoweringKindLoc) {
+ case LoweringKind::module:
+ ModuleScopeVariables.insert(GV);
+ break;
- // For each kernel, what variables does it access directly or through
- // callees
- LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
-
- // For each variable accessed through callees, which kernels access it
- VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
- for (auto &K : LDSUsesInfo.indirect_access) {
- Function *F = K.first;
- assert(isKernelLDS(F));
- for (GlobalVariable *GV : K.second) {
- LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
- }
- }
+ case LoweringKind::table:
+ TableLookupVariables.insert(GV);
+ break;
- // Partition variables accessed indirectly into the
diff erent strategies
- DenseSet<GlobalVariable *> ModuleScopeVariables;
- DenseSet<GlobalVariable *> TableLookupVariables;
- DenseSet<GlobalVariable *> KernelAccessVariables;
+ case LoweringKind::kernel:
+ if (K.second.size() == 1) {
+ KernelAccessVariables.insert(GV);
+ } else {
+ report_fatal_error(
+ "cannot lower LDS '" + GV->getName() +
+ "' to kernel access as it is reachable from multiple kernels");
+ }
+ break;
- {
- GlobalVariable *HybridModuleRoot =
- LoweringKindLoc != LoweringKind::hybrid
- ? nullptr
- : chooseBestVariableForModuleStrategy(
- M.getDataLayout(),
- LDSToKernelsThatNeedToAccessItIndirectly);
-
- DenseSet<Function *> const EmptySet;
- DenseSet<Function *> const &HybridModuleRootKernels =
- HybridModuleRoot
- ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
- : EmptySet;
-
- for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
- // Each iteration of this loop assigns exactly one global variable to
- // exactly one of the implementation strategies.
-
- GlobalVariable *GV = K.first;
- assert(AMDGPU::isLDSVariableToLower(*GV));
- assert(K.second.size() != 0);
-
- switch (LoweringKindLoc) {
- case LoweringKind::module:
+ case LoweringKind::hybrid: {
+ if (GV == HybridModuleRoot) {
+ assert(K.second.size() != 1);
ModuleScopeVariables.insert(GV);
- break;
-
- case LoweringKind::table:
+ } else if (K.second.size() == 1) {
+ KernelAccessVariables.insert(GV);
+ } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
+ ModuleScopeVariables.insert(GV);
+ } else {
TableLookupVariables.insert(GV);
- break;
-
- case LoweringKind::kernel:
- if (K.second.size() == 1) {
- KernelAccessVariables.insert(GV);
- } else {
- report_fatal_error(
- "cannot lower LDS '" + GV->getName() +
- "' to kernel access as it is reachable from multiple kernels");
- }
- break;
-
- case LoweringKind::hybrid: {
- if (GV == HybridModuleRoot) {
- assert(K.second.size() != 1);
- ModuleScopeVariables.insert(GV);
- } else if (K.second.size() == 1) {
- KernelAccessVariables.insert(GV);
- } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
- ModuleScopeVariables.insert(GV);
- } else {
- TableLookupVariables.insert(GV);
- }
- break;
- }
}
+ break;
}
+ }
+ }
- // All LDS variables accessed indirectly have now been partitioned into
- // the distinct lowering strategies.
- assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
- KernelAccessVariables.size() ==
- LDSToKernelsThatNeedToAccessItIndirectly.size());
+ // All LDS variables accessed indirectly have now been partitioned into
+ // the distinct lowering strategies.
+ assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
+ KernelAccessVariables.size() ==
+ LDSToKernelsThatNeedToAccessItIndirectly.size());
+ }
+
+ static GlobalVariable *lowerModuleScopeStructVariables(
+ Module &M, DenseSet<GlobalVariable *> const &ModuleScopeVariables,
+ DenseSet<Function *> const &KernelsThatAllocateModuleLDS) {
+ // Create a struct to hold the ModuleScopeVariables
+ // Replace all uses of those variables from non-kernel functions with the
+ // new struct instance Replace only the uses from kernel functions that will
+ // allocate this instance. That is a space optimisation - kernels that use a
+ // subset of the module scope struct and do not need to allocate it for
+ // indirect calls will only allocate the subset they use (they do so as part
+ // of the per-kernel lowering).
+ if (ModuleScopeVariables.empty()) {
+ return nullptr;
}
- // If the kernel accesses a variable that is going to be stored in the
- // module instance through a call then that kernel needs to allocate the
- // module instance
- DenseSet<Function *> KernelsThatAllocateModuleLDS =
- kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
- ModuleScopeVariables);
- DenseSet<Function *> KernelsThatAllocateTableLDS =
- kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
- TableLookupVariables);
+ LLVMContext &Ctx = M.getContext();
- GlobalVariable *MaybeModuleScopeStruct = nullptr;
- if (!ModuleScopeVariables.empty()) {
- LDSVariableReplacement ModuleScopeReplacement =
- createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
- ModuleScopeVariables);
- MaybeModuleScopeStruct = ModuleScopeReplacement.SGV;
- appendToCompilerUsed(M,
- {static_cast<GlobalValue *>(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- cast<Constant>(ModuleScopeReplacement.SGV),
- Type::getInt8PtrTy(Ctx)))});
-
- // module.lds will be allocated at zero in any kernel that allocates it
- recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
-
- // historic
- removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
-
- // Replace all uses of module scope variable from non-kernel functions
- replaceLDSVariablesWithStruct(
- M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
- Instruction *I = dyn_cast<Instruction>(U.getUser());
- if (!I) {
- return false;
- }
- Function *F = I->getFunction();
- return !isKernelLDS(F);
- });
+ LDSVariableReplacement ModuleScopeReplacement =
+ createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
+ ModuleScopeVariables);
- // Replace uses of module scope variable from kernel functions that
- // allocate the module scope variable, otherwise leave them unchanged
- // Record on each kernel whether the module scope global is used by it
+ appendToCompilerUsed(M, {static_cast<GlobalValue *>(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ cast<Constant>(ModuleScopeReplacement.SGV),
+ Type::getInt8PtrTy(Ctx)))});
- LLVMContext &Ctx = M.getContext();
- IRBuilder<> Builder(Ctx);
+ // module.lds will be allocated at zero in any kernel that allocates it
+ recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration() || !isKernelLDS(&Func))
- continue;
+ // historic
+ removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
+
+ // Replace all uses of module scope variable from non-kernel functions
+ replaceLDSVariablesWithStruct(
+ M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+ if (!I) {
+ return false;
+ }
+ Function *F = I->getFunction();
+ return !isKernelLDS(F);
+ });
- if (KernelsThatAllocateModuleLDS.contains(&Func)) {
- replaceLDSVariablesWithStruct(
- M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
- Instruction *I = dyn_cast<Instruction>(U.getUser());
- if (!I) {
- return false;
- }
- Function *F = I->getFunction();
- return F == &Func;
- });
+ // Replace uses of module scope variable from kernel functions that
+ // allocate the module scope variable, otherwise leave them unchanged
+ // Record on each kernel whether the module scope global is used by it
- markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
+ IRBuilder<> Builder(Ctx);
- } else {
- Func.addFnAttr("amdgpu-elide-module-lds");
- }
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || !isKernelLDS(&Func))
+ continue;
+
+ if (KernelsThatAllocateModuleLDS.contains(&Func)) {
+ replaceLDSVariablesWithStruct(
+ M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+ if (!I) {
+ return false;
+ }
+ Function *F = I->getFunction();
+ return F == &Func;
+ });
+
+ markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
+
+ } else {
+ Func.addFnAttr("amdgpu-elide-module-lds");
}
}
- // Create a struct for each kernel for the non-module-scope variables
+ return ModuleScopeReplacement.SGV;
+ }
+
+ static DenseMap<Function *, LDSVariableReplacement>
+ lowerKernelScopeStructVariables(
+ Module &M, LDSUsesInfoTy &LDSUsesInfo,
+ DenseSet<GlobalVariable *> const &ModuleScopeVariables,
+ DenseSet<Function *> const &KernelsThatAllocateModuleLDS,
+ GlobalVariable *MaybeModuleScopeStruct) {
+
+ // Create a struct for each kernel for the non-module-scope variables.
+
DenseMap<Function *, LDSVariableReplacement> KernelToReplacement;
for (Function &Func : M.functions()) {
if (Func.isDeclaration() || !isKernelLDS(&Func))
@@ -927,6 +916,55 @@ class AMDGPULowerModuleLDS : public ModulePass {
return I && I->getFunction() == &Func;
});
}
+ return KernelToReplacement;
+ }
+
+ bool runOnModule(Module &M) override {
+ CallGraph CG = CallGraph(M);
+ bool Changed = superAlignLDSGlobals(M);
+
+ Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
+
+ Changed = true; // todo: narrow this down
+
+ // For each kernel, what variables does it access directly or through
+ // callees
+ LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
+
+ // For each variable accessed through callees, which kernels access it
+ VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
+ for (auto &K : LDSUsesInfo.indirect_access) {
+ Function *F = K.first;
+ assert(isKernelLDS(F));
+ for (GlobalVariable *GV : K.second) {
+ LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
+ }
+ }
+
+ DenseSet<GlobalVariable *> ModuleScopeVariables;
+ DenseSet<GlobalVariable *> TableLookupVariables;
+ DenseSet<GlobalVariable *> KernelAccessVariables;
+ partitionVariablesIntoIndirectStrategies(
+ M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
+ ModuleScopeVariables, TableLookupVariables, KernelAccessVariables);
+
+ // If the kernel accesses a variable that is going to be stored in the
+ // module instance through a call then that kernel needs to allocate the
+ // module instance
+ DenseSet<Function *> KernelsThatAllocateModuleLDS =
+ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+ ModuleScopeVariables);
+ DenseSet<Function *> KernelsThatAllocateTableLDS =
+ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+ TableLookupVariables);
+
+ GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
+ M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
+
+ DenseMap<Function *, LDSVariableReplacement> KernelToReplacement =
+ lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
+ KernelsThatAllocateModuleLDS,
+ MaybeModuleScopeStruct);
// Lower zero cost accesses to the kernel instances just created
for (auto &GV : KernelAccessVariables) {
@@ -1132,7 +1170,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
}
template <typename PredicateTy>
- void replaceLDSVariablesWithStruct(
+ static void replaceLDSVariablesWithStruct(
Module &M, DenseSet<GlobalVariable *> const &LDSVarsToTransformArg,
LDSVariableReplacement Replacement, PredicateTy Predicate) {
LLVMContext &Ctx = M.getContext();
@@ -1190,9 +1228,9 @@ class AMDGPULowerModuleLDS : public ModulePass {
}
}
- void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
- MDNode *AliasScope, MDNode *NoAlias,
- unsigned MaxDepth = 5) {
+ static void refineUsesAlignmentAndAA(Value *Ptr, Align A,
+ const DataLayout &DL, MDNode *AliasScope,
+ MDNode *NoAlias, unsigned MaxDepth = 5) {
if (!MaxDepth || (A == 1 && !AliasScope))
return;
More information about the llvm-commits
mailing list