[llvm] [AMDGPU] Introduce "amdgpu-sw-lower-lds" pass to lower LDS accesses to use device global memory. (PR #87265)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 19 04:10:44 PDT 2024
================
@@ -65,6 +68,219 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {
return true;
}
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
+ // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
+ // global may have uses from multiple different functions as a result.
+ // This pass specialises LDS variables with respect to the kernel that
+ // allocates them.
+
+ // This is semantically equivalent to (the unimplemented as slow):
+ // for (auto &F : M.functions())
+ // for (auto &BB : F)
+ // for (auto &I : BB)
+ // for (Use &Op : I.operands())
+ // if (constantExprUsesLDS(Op))
+ // replaceConstantExprInFunction(I, Op);
+
+ SmallVector<Constant *> LDSGlobals;
+ for (auto &GV : M.globals())
+ if (AMDGPU::isLDSVariableToLower(GV))
+ LDSGlobals.push_back(&GV);
+ return convertUsersOfConstantsToInstructions(LDSGlobals);
+}
+
+void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+ FunctionVariableMap &kernels,
+ FunctionVariableMap &functions) {
+ // Get uses from the current function, excluding uses by called functions
+ // Two output variables to avoid walking the globals list twice
+ for (auto &GV : M.globals()) {
+ if (!AMDGPU::isLDSVariableToLower(GV)) {
+ continue;
+ }
+ for (User *V : GV.users()) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *F = I->getFunction();
+ if (isKernelLDS(F)) {
+ kernels[F].insert(&GV);
+ } else {
+ functions[F].insert(&GV);
+ }
+ }
+ }
+ }
+}
+
+bool isKernelLDS(const Function *F) {
+ // Some weirdness here. AMDGPU::isKernelCC does not call into
+ // AMDGPU::isKernel with the calling conv, it instead calls into
+ // isModuleEntryFunction which returns true for more calling conventions
+ // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
+ // There's also a test that checks that the LDS lowering does not hit on
+ // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
+ // Putting LDS in the name of the function to draw attention to this.
+ return AMDGPU::isKernel(F->getCallingConv());
+}
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
+
+ FunctionVariableMap direct_map_kernel;
+ FunctionVariableMap direct_map_function;
+ getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
+
+ // Collect variables that are used by functions whose address has escaped
+ DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
+ for (Function &F : M.functions()) {
+ if (!isKernelLDS(&F))
+ if (F.hasAddressTaken(nullptr,
+ /* IgnoreCallbackUses */ false,
+ /* IgnoreAssumeLikeCalls */ false,
+ /* IgnoreLLVMUsed */ true,
+ /* IgnoreArcAttachedCall */ false)) {
+ set_union(VariablesReachableThroughFunctionPointer,
+ direct_map_function[&F]);
+ }
+ }
+
+ auto functionMakesUnknownCall = [&](const Function *F) -> bool {
+ assert(!F->isDeclaration());
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ if (!R.second->getFunction()) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Work out which variables are reachable through function calls
+ FunctionVariableMap transitive_map_function = direct_map_function;
+
+ // If the function makes any unknown call, assume the worst case that it can
+ // access all variables accessed by functions whose address escaped
+ for (Function &F : M.functions()) {
+ if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
+ if (!isKernelLDS(&F)) {
+ set_union(transitive_map_function[&F],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Direct implementation of collecting all variables reachable from each
+ // function
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || isKernelLDS(&Func))
+ continue;
+
+ DenseSet<Function *> seen; // catches cycles
+ SmallVector<Function *, 4> wip{&Func};
----------------
skc7 wrote:
Updated pre-requisite PR #88002 which has AMDGPUmemoryUtils changes. Changes here will be removed once #88002 gets merged.
https://github.com/llvm/llvm-project/pull/87265
More information about the llvm-commits
mailing list