[llvm] 2c5f470 - [AMDGPU] Move LDS utilities from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils (#88002)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 22:19:53 PDT 2024
Author: Chaitanya
Date: 2024-05-10T10:49:48+05:30
New Revision: 2c5f470da6ab313f7c6a1aa53fb40dbcbde338f1
URL: https://github.com/llvm/llvm-project/commit/2c5f470da6ab313f7c6a1aa53fb40dbcbde338f1
DIFF: https://github.com/llvm/llvm-project/commit/2c5f470da6ab313f7c6a1aa53fb40dbcbde338f1.diff
LOG: [AMDGPU] Move LDS utilities from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils (#88002)
This moves some of the utility methods from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index c8bf9dd39e389..2c7163a775372 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -212,6 +212,7 @@
#define DEBUG_TYPE "amdgpu-lower-module-lds"
using namespace llvm;
+using namespace AMDGPU;
namespace {
@@ -234,17 +235,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
clEnumValN(LoweringKind::hybrid, "hybrid",
"Lower via mixture of above strategies")));
-bool isKernelLDS(const Function *F) {
- // Some weirdness here. AMDGPU::isKernelCC does not call into
- // AMDGPU::isKernel with the calling conv, it instead calls into
- // isModuleEntryFunction which returns true for more calling conventions
- // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
- // There's also a test that checks that the LDS lowering does not hit on
- // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
- // Putting LDS in the name of the function to draw attention to this.
- return AMDGPU::isKernel(F->getCallingConv());
-}
-
template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
return L->getName() < R->getName();
@@ -305,183 +295,9 @@ class AMDGPULowerModuleLDS {
Decl, {}, {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)});
}
- static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
- // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
- // global may have uses from multiple
diff erent functions as a result.
- // This pass specialises LDS variables with respect to the kernel that
- // allocates them.
-
- // This is semantically equivalent to (the unimplemented as slow):
- // for (auto &F : M.functions())
- // for (auto &BB : F)
- // for (auto &I : BB)
- // for (Use &Op : I.operands())
- // if (constantExprUsesLDS(Op))
- // replaceConstantExprInFunction(I, Op);
-
- SmallVector<Constant *> LDSGlobals;
- for (auto &GV : M.globals())
- if (AMDGPU::isLDSVariableToLower(GV))
- LDSGlobals.push_back(&GV);
-
- return convertUsersOfConstantsToInstructions(LDSGlobals);
- }
-
public:
AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
- using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
-
- using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
-
- static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
- FunctionVariableMap &kernels,
- FunctionVariableMap &functions) {
-
- // Get uses from the current function, excluding uses by called functions
- // Two output variables to avoid walking the globals list twice
- for (auto &GV : M.globals()) {
- if (!AMDGPU::isLDSVariableToLower(GV)) {
- continue;
- }
-
- for (User *V : GV.users()) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- Function *F = I->getFunction();
- if (isKernelLDS(F)) {
- kernels[F].insert(&GV);
- } else {
- functions[F].insert(&GV);
- }
- }
- }
- }
- }
-
- struct LDSUsesInfoTy {
- FunctionVariableMap direct_access;
- FunctionVariableMap indirect_access;
- };
-
- static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
-
- FunctionVariableMap direct_map_kernel;
- FunctionVariableMap direct_map_function;
- getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
-
- // Collect variables that are used by functions whose address has escaped
- DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
- for (Function &F : M.functions()) {
- if (!isKernelLDS(&F))
- if (F.hasAddressTaken(nullptr,
- /* IgnoreCallbackUses */ false,
- /* IgnoreAssumeLikeCalls */ false,
- /* IgnoreLLVMUsed */ true,
- /* IgnoreArcAttachedCall */ false)) {
- set_union(VariablesReachableThroughFunctionPointer,
- direct_map_function[&F]);
- }
- }
-
- auto functionMakesUnknownCall = [&](const Function *F) -> bool {
- assert(!F->isDeclaration());
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
- if (!R.second->getFunction()) {
- return true;
- }
- }
- return false;
- };
-
- // Work out which variables are reachable through function calls
- FunctionVariableMap transitive_map_function = direct_map_function;
-
- // If the function makes any unknown call, assume the worst case that it can
- // access all variables accessed by functions whose address escaped
- for (Function &F : M.functions()) {
- if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
- if (!isKernelLDS(&F)) {
- set_union(transitive_map_function[&F],
- VariablesReachableThroughFunctionPointer);
- }
- }
- }
-
- // Direct implementation of collecting all variables reachable from each
- // function
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration() || isKernelLDS(&Func))
- continue;
-
- DenseSet<Function *> seen; // catches cycles
- SmallVector<Function *, 4> wip{&Func};
-
- while (!wip.empty()) {
- Function *F = wip.pop_back_val();
-
- // Can accelerate this by referring to transitive map for functions that
- // have already been computed, with more care than this
- set_union(transitive_map_function[&Func], direct_map_function[F]);
-
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
- Function *ith = R.second->getFunction();
- if (ith) {
- if (!seen.contains(ith)) {
- seen.insert(ith);
- wip.push_back(ith);
- }
- }
- }
- }
- }
-
- // direct_map_kernel lists which variables are used by the kernel
- // find the variables which are used through a function call
- FunctionVariableMap indirect_map_kernel;
-
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration() || !isKernelLDS(&Func))
- continue;
-
- for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
- Function *ith = R.second->getFunction();
- if (ith) {
- set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
- } else {
- set_union(indirect_map_kernel[&Func],
- VariablesReachableThroughFunctionPointer);
- }
- }
- }
-
- // Verify that we fall into one of 2 cases:
- // - All variables are absolute: this is a re-run of the pass
- // so we don't have anything to do.
- // - No variables are absolute.
- std::optional<bool> HasAbsoluteGVs;
- for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
- for (auto &[Fn, GVs] : Map) {
- for (auto *GV : GVs) {
- bool IsAbsolute = GV->isAbsoluteSymbolRef();
- if (HasAbsoluteGVs.has_value()) {
- if (*HasAbsoluteGVs != IsAbsolute) {
- report_fatal_error(
- "Module cannot mix absolute and non-absolute LDS GVs");
- }
- } else
- HasAbsoluteGVs = IsAbsolute;
- }
- }
- }
-
- // If we only had absolute GVs, we have nothing to do, return an empty
- // result.
- if (HasAbsoluteGVs && *HasAbsoluteGVs)
- return {FunctionVariableMap(), FunctionVariableMap()};
-
- return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
- }
-
struct LDSVariableReplacement {
GlobalVariable *SGV = nullptr;
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 79c359a575545..239e0ee705729 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -9,13 +9,16 @@
#include "AMDGPUMemoryUtils.h"
#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/ReplaceConstant.h"
#define DEBUG_TYPE "amdgpu-memory-utils"
@@ -26,7 +29,7 @@ namespace llvm {
namespace AMDGPU {
-Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
+Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
GV->getValueType());
}
@@ -61,6 +64,216 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {
return true;
}
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
+ // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
+ // global may have uses from multiple
diff erent functions as a result.
+ // This pass specialises LDS variables with respect to the kernel that
+ // allocates them.
+
+ // This is semantically equivalent to (the unimplemented as slow):
+ // for (auto &F : M.functions())
+ // for (auto &BB : F)
+ // for (auto &I : BB)
+ // for (Use &Op : I.operands())
+ // if (constantExprUsesLDS(Op))
+ // replaceConstantExprInFunction(I, Op);
+
+ SmallVector<Constant *> LDSGlobals;
+ for (auto &GV : M.globals())
+ if (AMDGPU::isLDSVariableToLower(GV))
+ LDSGlobals.push_back(&GV);
+ return convertUsersOfConstantsToInstructions(LDSGlobals);
+}
+
+void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
+ FunctionVariableMap &kernels,
+ FunctionVariableMap &Functions) {
+ // Get uses from the current function, excluding uses by called Functions
+ // Two output variables to avoid walking the globals list twice
+ for (auto &GV : M.globals()) {
+ if (!AMDGPU::isLDSVariableToLower(GV))
+ continue;
+ for (User *V : GV.users()) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *F = I->getFunction();
+ if (isKernelLDS(F))
+ kernels[F].insert(&GV);
+ else
+ Functions[F].insert(&GV);
+ }
+ }
+ }
+}
+
+bool isKernelLDS(const Function *F) {
+ // Some weirdness here. AMDGPU::isKernelCC does not call into
+ // AMDGPU::isKernel with the calling conv, it instead calls into
+ // isModuleEntryFunction which returns true for more calling conventions
+ // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
+ // There's also a test that checks that the LDS lowering does not hit on
+ // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
+ // Putting LDS in the name of the function to draw attention to this.
+ return AMDGPU::isKernel(F->getCallingConv());
+}
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
+
+ FunctionVariableMap DirectMapKernel;
+ FunctionVariableMap DirectMapFunction;
+ getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
+
+ // Collect variables that are used by functions whose address has escaped
+ DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
+ for (Function &F : M.functions()) {
+ if (!isKernelLDS(&F))
+ if (F.hasAddressTaken(nullptr,
+ /* IgnoreCallbackUses */ false,
+ /* IgnoreAssumeLikeCalls */ false,
+ /* IgnoreLLVMUsed */ true,
+ /* IgnoreArcAttachedCall */ false)) {
+ set_union(VariablesReachableThroughFunctionPointer,
+ DirectMapFunction[&F]);
+ }
+ }
+
+ auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
+ assert(!F->isDeclaration());
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ if (!R.second->getFunction())
+ return true;
+ }
+ return false;
+ };
+
+ // Work out which variables are reachable through function calls
+ FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
+
+ // If the function makes any unknown call, assume the worst case that it can
+ // access all variables accessed by functions whose address escaped
+ for (Function &F : M.functions()) {
+ if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
+ if (!isKernelLDS(&F)) {
+ set_union(TransitiveMapFunction[&F],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Direct implementation of collecting all variables reachable from each
+ // function
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || isKernelLDS(&Func))
+ continue;
+
+ DenseSet<Function *> seen; // catches cycles
+ SmallVector<Function *, 4> wip = {&Func};
+
+ while (!wip.empty()) {
+ Function *F = wip.pop_back_val();
+
+ // Can accelerate this by referring to transitive map for functions that
+ // have already been computed, with more care than this
+ set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
+
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ Function *Ith = R.second->getFunction();
+ if (Ith) {
+ if (!seen.contains(Ith)) {
+ seen.insert(Ith);
+ wip.push_back(Ith);
+ }
+ }
+ }
+ }
+ }
+
+ // DirectMapKernel lists which variables are used by the kernel
+ // find the variables which are used through a function call
+ FunctionVariableMap IndirectMapKernel;
+
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || !isKernelLDS(&Func))
+ continue;
+
+ for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
+ Function *Ith = R.second->getFunction();
+ if (Ith) {
+ set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
+ } else {
+ set_union(IndirectMapKernel[&Func],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Verify that we fall into one of 2 cases:
+ // - All variables are absolute: this is a re-run of the pass
+ // so we don't have anything to do.
+ // - No variables are absolute.
+ std::optional<bool> HasAbsoluteGVs;
+ for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
+ for (auto &[Fn, GVs] : Map) {
+ for (auto *GV : GVs) {
+ bool IsAbsolute = GV->isAbsoluteSymbolRef();
+ if (HasAbsoluteGVs.has_value()) {
+ if (*HasAbsoluteGVs != IsAbsolute) {
+ report_fatal_error(
+ "Module cannot mix absolute and non-absolute LDS GVs");
+ }
+ } else
+ HasAbsoluteGVs = IsAbsolute;
+ }
+ }
+ }
+
+ // If we only had absolute GVs, we have nothing to do, return an empty
+ // result.
+ if (HasAbsoluteGVs && *HasAbsoluteGVs)
+ return {FunctionVariableMap(), FunctionVariableMap()};
+
+ return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};
+}
+
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+ StringRef FnAttr) {
+ KernelRoot->removeFnAttr(FnAttr);
+
+ SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
+ SmallPtrSet<Function *, 8> Visited;
+ bool SeenUnknownCall = false;
+
+ while (!WorkList.empty()) {
+ Function *F = WorkList.pop_back_val();
+
+ for (auto &CallRecord : *CG[F]) {
+ if (!CallRecord.second)
+ continue;
+
+ Function *Callee = CallRecord.second->getFunction();
+ if (!Callee) {
+ if (!SeenUnknownCall) {
+ SeenUnknownCall = true;
+
+ // If we see any indirect calls, assume nothing about potential
+ // targets.
+ // TODO: This could be refined to possible LDS global users.
+ for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee =
+ ExternalCallRecord.second->getFunction();
+ assert(PotentialCallee);
+ if (!isKernelLDS(PotentialCallee))
+ PotentialCallee->removeFnAttr(FnAttr);
+ }
+ }
+ } else {
+ Callee->removeFnAttr(FnAttr);
+ if (Visited.insert(Callee).second)
+ WorkList.push_back(Callee);
+ }
+ }
+ }
+}
+
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
Instruction *DefInst = Def->getMemoryInst();
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index e42b27f8e09e1..4d3ad328e1310 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -9,6 +9,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+
namespace llvm {
struct Align;
@@ -19,14 +22,40 @@ class LoadInst;
class MemoryDef;
class MemorySSA;
class Value;
+class Function;
+class CallGraph;
+class Module;
namespace AMDGPU {
-Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
+using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
+using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
+
+Align getAlign(const DataLayout &DL, const GlobalVariable *GV);
bool isDynamicLDS(const GlobalVariable &GV);
bool isLDSVariableToLower(const GlobalVariable &GV);
+struct LDSUsesInfoTy {
+ FunctionVariableMap direct_access;
+ FunctionVariableMap indirect_access;
+};
+
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M);
+
+void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
+ FunctionVariableMap &kernels,
+ FunctionVariableMap &functions);
+
+bool isKernelLDS(const Function *F);
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M);
+
+/// Strip FnAttr attribute from any functions where we may have
+/// introduced its use.
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+ StringRef FnAttr);
+
/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
/// if this is actually a memory update or an artificial clobber to facilitate
/// ordering constraints.
More information about the llvm-commits
mailing list