[llvm] [AMDGPU][LTO] Introduce AMDGPUCloneModuleLDS (PR #89683)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 08:25:36 PDT 2024
================
@@ -0,0 +1,201 @@
+//===-- AMDGPUCloneModuleLDSPass.cpp ------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to ensure that the combined module contains
+// as many LDS global variables as there are kernels that (indirectly) access
+// them. As LDS variables behave like C++ static variables, it is important that
+// each partition contains a unique copy of the variable on a per kernel basis.
+// This representation also prepares the combined module to eliminate
+// cross-module dependencies of LDS variables.
+//
+// This pass operates as follows:
+// 1. Firstly, traverse the call graph from each kernel to determine the number
+// of kernels calling each device function.
+// 2. For each LDS global variable GV, determine the function F that defines it.
+// Collect it's caller functions. Clone F and GV, and finally insert a
+// call/invoke instruction in each caller function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-clone-module-lds"
+
+static cl::opt<unsigned int> MaxCountForClonedFunctions(
+ "clone-lds-functions-max-count", cl::init(16), cl::Hidden,
+ cl::desc("Specify a limit to the number of clones of a function"));
+
+/// Return the function that defines \p GV
+/// \param GV The global variable in question
+/// \return The function defining \p GV
+static Function *getFunctionDefiningGV(GlobalVariable &GV) {
+ SmallVector<User *> Worklist(GV.users());
+ while (!Worklist.empty()) {
+ User *U = Worklist.pop_back_val();
+ if (auto *Inst = dyn_cast<Instruction>(U))
+ return Inst->getFunction();
+ if (auto *Op = dyn_cast<Operator>(U))
+ append_range(Worklist, Op->users());
+ }
+ llvm_unreachable("GV must be used in a function.");
+};
+
+/// Replace all references to \p OldGV in \p NewF with \p NewGV
+/// \param OldGV The global variable to be replaced
+/// \param NewGV The global variable taking the place of \p OldGV
+/// \param NewF The function in which the replacement occurs
+static void replaceUsesOfWith(GlobalVariable *OldGV, GlobalVariable *NewGV,
+ Function *NewF) {
+ // ReplaceOperatorUses takes in an instruction Inst, which is assumed to
+ // contain OldGV as an operator, inserts an instruction correponding the
+ // OldGV-operand and update Inst accordingly.
+ auto ReplaceOperatorUses = [&OldGV, &NewGV](Instruction *Inst) {
+ Inst->replaceUsesOfWith(OldGV, NewGV);
+ SmallVector<Value *, 8> Worklist(Inst->operands());
+ while (!Worklist.empty()) {
+ auto *V = Worklist.pop_back_val();
+ if (auto *I = dyn_cast<AddrSpaceCastOperator>(V)) {
+ auto *Cast = new AddrSpaceCastInst(NewGV, I->getType(), "", Inst);
+ Inst->replaceUsesOfWith(I, Cast);
+ } else if (auto *I = dyn_cast<GEPOperator>(V)) {
+ SmallVector<Value *, 8> Indices(I->indices());
+ auto *GEP = GetElementPtrInst::Create(NewGV->getValueType(), NewGV,
+ Indices, "", Inst);
+ Inst->replaceUsesOfWith(I, GEP);
+ }
+ }
+ };
+
+ // Collect all user instructions of OldGV using a Worklist algorithm.
+ // If a user is an operator, collect the instruction wrapping
+ // the operator.
+ SmallVector<Instruction *, 8> InstsToReplace;
+ SmallVector<User *, 8> UsersWorklist(OldGV->users());
+ while (!UsersWorklist.empty()) {
+ auto *U = UsersWorklist.pop_back_val();
+ if (auto *Inst = dyn_cast<Instruction>(U)) {
+ InstsToReplace.push_back(Inst);
+ } else if (auto *Op = dyn_cast<Operator>(U)) {
+ append_range(UsersWorklist, Op->users());
+ }
----------------
arsenm wrote:
I mean you can initialize a global variable to contain a pointer value (e.g. in llvm.used). Instructions / Operators are not the only possible users:
```
@llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @var1 to ptr)], section "llvm.metadata"
```
```
@lds = addrspace(3) global i32 0
@b = addrspace(1) global ptr addrspace(3) @lds
```
https://github.com/llvm/llvm-project/pull/89683
More information about the llvm-commits
mailing list