[llvm] [AMDGPU] Move LDS utilities from amdgpu-lower-module-lds pass to AMDGPUMemoryUtils (PR #88002)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 19 04:05:14 PDT 2024
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/88002
>From 4e67582cfa296889549e240c9714ed990fdd0078 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 5 Apr 2024 13:23:48 +0530
Subject: [PATCH 1/4] [AMDGPU] Move LDS utilities from amdgpu-lower-module-lds
pass to AMDGPUMemoryUtils
---
.../AMDGPU/AMDGPULowerModuleLDSPass.cpp | 186 +-----------------
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 176 +++++++++++++++++
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.h | 24 +++
3 files changed, 201 insertions(+), 185 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index c8bf9dd39e389c..2c7163a7753725 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -212,6 +212,7 @@
#define DEBUG_TYPE "amdgpu-lower-module-lds"
using namespace llvm;
+using namespace AMDGPU;
namespace {
@@ -234,17 +235,6 @@ cl::opt<LoweringKind> LoweringKindLoc(
clEnumValN(LoweringKind::hybrid, "hybrid",
"Lower via mixture of above strategies")));
-bool isKernelLDS(const Function *F) {
- // Some weirdness here. AMDGPU::isKernelCC does not call into
- // AMDGPU::isKernel with the calling conv, it instead calls into
- // isModuleEntryFunction which returns true for more calling conventions
- // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
- // There's also a test that checks that the LDS lowering does not hit on
- // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
- // Putting LDS in the name of the function to draw attention to this.
- return AMDGPU::isKernel(F->getCallingConv());
-}
-
template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
return L->getName() < R->getName();
@@ -305,183 +295,9 @@ class AMDGPULowerModuleLDS {
Decl, {}, {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)});
}
- static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
- // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
- // global may have uses from multiple different functions as a result.
- // This pass specialises LDS variables with respect to the kernel that
- // allocates them.
-
- // This is semantically equivalent to (the unimplemented as slow):
- // for (auto &F : M.functions())
- // for (auto &BB : F)
- // for (auto &I : BB)
- // for (Use &Op : I.operands())
- // if (constantExprUsesLDS(Op))
- // replaceConstantExprInFunction(I, Op);
-
- SmallVector<Constant *> LDSGlobals;
- for (auto &GV : M.globals())
- if (AMDGPU::isLDSVariableToLower(GV))
- LDSGlobals.push_back(&GV);
-
- return convertUsersOfConstantsToInstructions(LDSGlobals);
- }
-
public:
AMDGPULowerModuleLDS(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
- using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
-
- using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
-
- static void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
- FunctionVariableMap &kernels,
- FunctionVariableMap &functions) {
-
- // Get uses from the current function, excluding uses by called functions
- // Two output variables to avoid walking the globals list twice
- for (auto &GV : M.globals()) {
- if (!AMDGPU::isLDSVariableToLower(GV)) {
- continue;
- }
-
- for (User *V : GV.users()) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- Function *F = I->getFunction();
- if (isKernelLDS(F)) {
- kernels[F].insert(&GV);
- } else {
- functions[F].insert(&GV);
- }
- }
- }
- }
- }
-
- struct LDSUsesInfoTy {
- FunctionVariableMap direct_access;
- FunctionVariableMap indirect_access;
- };
-
- static LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
-
- FunctionVariableMap direct_map_kernel;
- FunctionVariableMap direct_map_function;
- getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
-
- // Collect variables that are used by functions whose address has escaped
- DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
- for (Function &F : M.functions()) {
- if (!isKernelLDS(&F))
- if (F.hasAddressTaken(nullptr,
- /* IgnoreCallbackUses */ false,
- /* IgnoreAssumeLikeCalls */ false,
- /* IgnoreLLVMUsed */ true,
- /* IgnoreArcAttachedCall */ false)) {
- set_union(VariablesReachableThroughFunctionPointer,
- direct_map_function[&F]);
- }
- }
-
- auto functionMakesUnknownCall = [&](const Function *F) -> bool {
- assert(!F->isDeclaration());
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
- if (!R.second->getFunction()) {
- return true;
- }
- }
- return false;
- };
-
- // Work out which variables are reachable through function calls
- FunctionVariableMap transitive_map_function = direct_map_function;
-
- // If the function makes any unknown call, assume the worst case that it can
- // access all variables accessed by functions whose address escaped
- for (Function &F : M.functions()) {
- if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
- if (!isKernelLDS(&F)) {
- set_union(transitive_map_function[&F],
- VariablesReachableThroughFunctionPointer);
- }
- }
- }
-
- // Direct implementation of collecting all variables reachable from each
- // function
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration() || isKernelLDS(&Func))
- continue;
-
- DenseSet<Function *> seen; // catches cycles
- SmallVector<Function *, 4> wip{&Func};
-
- while (!wip.empty()) {
- Function *F = wip.pop_back_val();
-
- // Can accelerate this by referring to transitive map for functions that
- // have already been computed, with more care than this
- set_union(transitive_map_function[&Func], direct_map_function[F]);
-
- for (const CallGraphNode::CallRecord &R : *CG[F]) {
- Function *ith = R.second->getFunction();
- if (ith) {
- if (!seen.contains(ith)) {
- seen.insert(ith);
- wip.push_back(ith);
- }
- }
- }
- }
- }
-
- // direct_map_kernel lists which variables are used by the kernel
- // find the variables which are used through a function call
- FunctionVariableMap indirect_map_kernel;
-
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration() || !isKernelLDS(&Func))
- continue;
-
- for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
- Function *ith = R.second->getFunction();
- if (ith) {
- set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
- } else {
- set_union(indirect_map_kernel[&Func],
- VariablesReachableThroughFunctionPointer);
- }
- }
- }
-
- // Verify that we fall into one of 2 cases:
- // - All variables are absolute: this is a re-run of the pass
- // so we don't have anything to do.
- // - No variables are absolute.
- std::optional<bool> HasAbsoluteGVs;
- for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
- for (auto &[Fn, GVs] : Map) {
- for (auto *GV : GVs) {
- bool IsAbsolute = GV->isAbsoluteSymbolRef();
- if (HasAbsoluteGVs.has_value()) {
- if (*HasAbsoluteGVs != IsAbsolute) {
- report_fatal_error(
- "Module cannot mix absolute and non-absolute LDS GVs");
- }
- } else
- HasAbsoluteGVs = IsAbsolute;
- }
- }
- }
-
- // If we only had absolute GVs, we have nothing to do, return an empty
- // result.
- if (HasAbsoluteGVs && *HasAbsoluteGVs)
- return {FunctionVariableMap(), FunctionVariableMap()};
-
- return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
- }
-
struct LDSVariableReplacement {
GlobalVariable *SGV = nullptr;
DenseMap<GlobalVariable *, Constant *> LDSVarsToConstantGEP;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 25e628e5cbc558..26b3819f7fd566 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -9,13 +9,16 @@
#include "AMDGPUMemoryUtils.h"
#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/ReplaceConstant.h"
#define DEBUG_TYPE "amdgpu-memory-utils"
@@ -65,6 +68,179 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {
return true;
}
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
+ // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
+ // global may have uses from multiple different functions as a result.
+ // This pass specialises LDS variables with respect to the kernel that
+ // allocates them.
+
+ // This is semantically equivalent to (the unimplemented as slow):
+ // for (auto &F : M.functions())
+ // for (auto &BB : F)
+ // for (auto &I : BB)
+ // for (Use &Op : I.operands())
+ // if (constantExprUsesLDS(Op))
+ // replaceConstantExprInFunction(I, Op);
+
+ SmallVector<Constant *> LDSGlobals;
+ for (auto &GV : M.globals())
+ if (AMDGPU::isLDSVariableToLower(GV))
+ LDSGlobals.push_back(&GV);
+ return convertUsersOfConstantsToInstructions(LDSGlobals);
+}
+
+void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+ FunctionVariableMap &kernels,
+ FunctionVariableMap &functions) {
+ // Get uses from the current function, excluding uses by called functions
+ // Two output variables to avoid walking the globals list twice
+ for (auto &GV : M.globals()) {
+ if (!AMDGPU::isLDSVariableToLower(GV)) {
+ continue;
+ }
+ for (User *V : GV.users()) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *F = I->getFunction();
+ if (isKernelLDS(F)) {
+ kernels[F].insert(&GV);
+ } else {
+ functions[F].insert(&GV);
+ }
+ }
+ }
+ }
+}
+
+bool isKernelLDS(const Function *F) {
+ // Some weirdness here. AMDGPU::isKernelCC does not call into
+ // AMDGPU::isKernel with the calling conv, it instead calls into
+ // isModuleEntryFunction which returns true for more calling conventions
+ // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
+ // There's also a test that checks that the LDS lowering does not hit on
+ // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
+ // Putting LDS in the name of the function to draw attention to this.
+ return AMDGPU::isKernel(F->getCallingConv());
+}
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
+
+ FunctionVariableMap direct_map_kernel;
+ FunctionVariableMap direct_map_function;
+ getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
+
+ // Collect variables that are used by functions whose address has escaped
+ DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
+ for (Function &F : M.functions()) {
+ if (!isKernelLDS(&F))
+ if (F.hasAddressTaken(nullptr,
+ /* IgnoreCallbackUses */ false,
+ /* IgnoreAssumeLikeCalls */ false,
+ /* IgnoreLLVMUsed */ true,
+ /* IgnoreArcAttachedCall */ false)) {
+ set_union(VariablesReachableThroughFunctionPointer,
+ direct_map_function[&F]);
+ }
+ }
+
+ auto functionMakesUnknownCall = [&](const Function *F) -> bool {
+ assert(!F->isDeclaration());
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ if (!R.second->getFunction()) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Work out which variables are reachable through function calls
+ FunctionVariableMap transitive_map_function = direct_map_function;
+
+ // If the function makes any unknown call, assume the worst case that it can
+ // access all variables accessed by functions whose address escaped
+ for (Function &F : M.functions()) {
+ if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
+ if (!isKernelLDS(&F)) {
+ set_union(transitive_map_function[&F],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Direct implementation of collecting all variables reachable from each
+ // function
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || isKernelLDS(&Func))
+ continue;
+
+ DenseSet<Function *> seen; // catches cycles
+ SmallVector<Function *, 4> wip{&Func};
+
+ while (!wip.empty()) {
+ Function *F = wip.pop_back_val();
+
+ // Can accelerate this by referring to transitive map for functions that
+ // have already been computed, with more care than this
+ set_union(transitive_map_function[&Func], direct_map_function[F]);
+
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
+ Function *ith = R.second->getFunction();
+ if (ith) {
+ if (!seen.contains(ith)) {
+ seen.insert(ith);
+ wip.push_back(ith);
+ }
+ }
+ }
+ }
+ }
+
+ // direct_map_kernel lists which variables are used by the kernel
+ // find the variables which are used through a function call
+ FunctionVariableMap indirect_map_kernel;
+
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || !isKernelLDS(&Func))
+ continue;
+
+ for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
+ Function *ith = R.second->getFunction();
+ if (ith) {
+ set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
+ } else {
+ set_union(indirect_map_kernel[&Func],
+ VariablesReachableThroughFunctionPointer);
+ }
+ }
+ }
+
+ // Verify that we fall into one of 2 cases:
+ // - All variables are absolute: this is a re-run of the pass
+ // so we don't have anything to do.
+ // - No variables are absolute.
+ std::optional<bool> HasAbsoluteGVs;
+ for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
+ for (auto &[Fn, GVs] : Map) {
+ for (auto *GV : GVs) {
+ bool IsAbsolute = GV->isAbsoluteSymbolRef();
+ if (HasAbsoluteGVs.has_value()) {
+ if (*HasAbsoluteGVs != IsAbsolute) {
+ report_fatal_error(
+ "Module cannot mix absolute and non-absolute LDS GVs");
+ }
+ } else
+ HasAbsoluteGVs = IsAbsolute;
+ }
+ }
+ }
+
+ // If we only had absolute GVs, we have nothing to do, return an empty
+ // result.
+ if (HasAbsoluteGVs && *HasAbsoluteGVs)
+ return {FunctionVariableMap(), FunctionVariableMap()};
+
+ return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
+}
+
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
Instruction *DefInst = Def->getMemoryInst();
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index e42b27f8e09e14..a199b927a28b6c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -9,6 +9,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+
namespace llvm {
struct Align;
@@ -19,14 +22,35 @@ class LoadInst;
class MemoryDef;
class MemorySSA;
class Value;
+class Function;
+class CallGraph;
+class Module;
namespace AMDGPU {
+using FunctionVariableMap = DenseMap<Function *, DenseSet<GlobalVariable *>>;
+using VariableFunctionMap = DenseMap<GlobalVariable *, DenseSet<Function *>>;
+
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
bool isDynamicLDS(const GlobalVariable &GV);
bool isLDSVariableToLower(const GlobalVariable &GV);
+struct LDSUsesInfoTy {
+ FunctionVariableMap direct_access;
+ FunctionVariableMap indirect_access;
+};
+
+bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M);
+
+void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+ FunctionVariableMap &kernels,
+ FunctionVariableMap &functions);
+
+bool isKernelLDS(const Function *F);
+
+LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M);
+
/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
/// if this is actually a memory update or an artificial clobber to facilitate
/// ordering constraints.
>From b92b55524dc10cc871b045acbfc4730f25c38a75 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 9 Apr 2024 17:17:03 +0530
Subject: [PATCH 2/4] [AMDGPU] Move removeNoLdsKernelIdFromReachable from
amdgpu-lower-module-lds pass to AMDGPUMemoryUtils
---
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 43 +++++++++++++++++++
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.h | 5 +++
2 files changed, 48 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 26b3819f7fd566..624ef20e41c828 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -241,6 +241,49 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
}
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+ StringRef FnAttr) {
+ KernelRoot->removeFnAttr(FnAttr);
+
+ SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
+ if (!Tmp.back())
+ return;
+
+ SmallPtrSet<Function *, 8> Visited;
+ bool SeenUnknownCall = false;
+
+ do {
+ Function *F = Tmp.pop_back_val();
+
+ for (auto &N : *CG[F]) {
+ if (!N.second)
+ continue;
+
+ Function *Callee = N.second->getFunction();
+ if (!Callee) {
+ if (!SeenUnknownCall) {
+ SeenUnknownCall = true;
+
+ // If we see any indirect calls, assume nothing about potential
+ // targets.
+ // TODO: This could be refined to possible LDS global users.
+ for (auto &N : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee = N.second->getFunction();
+ if (!isKernelLDS(PotentialCallee))
+ PotentialCallee->removeFnAttr(FnAttr);
+ }
+
+ continue;
+ }
+ }
+
+ Callee->removeFnAttr(FnAttr);
+ if (Visited.insert(Callee).second)
+ Tmp.push_back(Callee);
+ }
+ } while (!Tmp.empty());
+}
+
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
Instruction *DefInst = Def->getMemoryInst();
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index a199b927a28b6c..1df9c28e65888c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -51,6 +51,11 @@ bool isKernelLDS(const Function *F);
LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M);
+/// Strip FnAttr attribute from any functions where we may have
+/// introduced its use.
+void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
+ StringRef FnAttr);
+
/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
/// if this is actually a memory update or an artificial clobber to facilitate
/// ordering constraints.
>From 973b1d9974da27a5af49a42ad855ba35c5122f5e Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 18 Apr 2024 10:53:08 +0530
Subject: [PATCH 3/4] [AMDGPU] Update removeFnAttrFromReachable
---
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 33 +++++++++----------
1 file changed, 15 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 624ef20e41c828..dc68a49b99204d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -245,21 +245,18 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
StringRef FnAttr) {
KernelRoot->removeFnAttr(FnAttr);
- SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
- if (!Tmp.back())
- return;
-
+ SmallVector<Function *> WorkList({CG[KernelRoot]->getFunction()});
SmallPtrSet<Function *, 8> Visited;
bool SeenUnknownCall = false;
- do {
- Function *F = Tmp.pop_back_val();
+ while (!WorkList.empty()) {
+ Function *F = WorkList.pop_back_val();
- for (auto &N : *CG[F]) {
- if (!N.second)
+ for (auto &CallRecord : *CG[F]) {
+ if (!CallRecord.second)
continue;
- Function *Callee = N.second->getFunction();
+ Function *Callee = CallRecord.second->getFunction();
if (!Callee) {
if (!SeenUnknownCall) {
SeenUnknownCall = true;
@@ -267,21 +264,21 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
// If we see any indirect calls, assume nothing about potential
// targets.
// TODO: This could be refined to possible LDS global users.
- for (auto &N : *CG.getExternalCallingNode()) {
- Function *PotentialCallee = N.second->getFunction();
+ for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee =
+ ExternalCallRecord.second->getFunction();
+ assert(PotentialCallee);
if (!isKernelLDS(PotentialCallee))
PotentialCallee->removeFnAttr(FnAttr);
}
-
- continue;
}
+ } else {
+ Callee->removeFnAttr(FnAttr);
+ if (Visited.insert(Callee).second)
+ WorkList.push_back(Callee);
}
-
- Callee->removeFnAttr(FnAttr);
- if (Visited.insert(Callee).second)
- Tmp.push_back(Callee);
}
- } while (!Tmp.empty());
+ }
}
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
>From 952591e1f15f862ec5a90f5e90c3bdbea098821a Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 19 Apr 2024 16:33:40 +0530
Subject: [PATCH 4/4] [AMDGPU] Update patch based on review:1
---
.../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 67 +++++++++----------
1 file changed, 32 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index dc68a49b99204d..fdb6978c687926 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -89,23 +89,21 @@ bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
return convertUsersOfConstantsToInstructions(LDSGlobals);
}
-void getUsesOfLDSByFunction(CallGraph const &CG, Module &M,
+void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
FunctionVariableMap &kernels,
- FunctionVariableMap &functions) {
- // Get uses from the current function, excluding uses by called functions
+ FunctionVariableMap &Functions) {
+ // Get uses from the current function, excluding uses by called Functions
// Two output variables to avoid walking the globals list twice
for (auto &GV : M.globals()) {
- if (!AMDGPU::isLDSVariableToLower(GV)) {
+ if (!AMDGPU::isLDSVariableToLower(GV))
continue;
- }
for (User *V : GV.users()) {
if (auto *I = dyn_cast<Instruction>(V)) {
Function *F = I->getFunction();
- if (isKernelLDS(F)) {
+ if (isKernelLDS(F))
kernels[F].insert(&GV);
- } else {
- functions[F].insert(&GV);
- }
+ else
+ Functions[F].insert(&GV);
}
}
}
@@ -124,9 +122,9 @@ bool isKernelLDS(const Function *F) {
LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
- FunctionVariableMap direct_map_kernel;
- FunctionVariableMap direct_map_function;
- getUsesOfLDSByFunction(CG, M, direct_map_kernel, direct_map_function);
+ FunctionVariableMap DirectMapKernel;
+ FunctionVariableMap DirectMapFunction;
+ getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
// Collect variables that are used by functions whose address has escaped
DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
@@ -138,29 +136,28 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
/* IgnoreLLVMUsed */ true,
/* IgnoreArcAttachedCall */ false)) {
set_union(VariablesReachableThroughFunctionPointer,
- direct_map_function[&F]);
+ DirectMapFunction[&F]);
}
}
- auto functionMakesUnknownCall = [&](const Function *F) -> bool {
+ auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
assert(!F->isDeclaration());
for (const CallGraphNode::CallRecord &R : *CG[F]) {
- if (!R.second->getFunction()) {
+ if (!R.second->getFunction())
return true;
- }
}
return false;
};
// Work out which variables are reachable through function calls
- FunctionVariableMap transitive_map_function = direct_map_function;
+ FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
// If the function makes any unknown call, assume the worst case that it can
// access all variables accessed by functions whose address escaped
for (Function &F : M.functions()) {
- if (!F.isDeclaration() && functionMakesUnknownCall(&F)) {
+ if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
if (!isKernelLDS(&F)) {
- set_union(transitive_map_function[&F],
+ set_union(TransitiveMapFunction[&F],
VariablesReachableThroughFunctionPointer);
}
}
@@ -173,41 +170,41 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
continue;
DenseSet<Function *> seen; // catches cycles
- SmallVector<Function *, 4> wip{&Func};
+ SmallVector<Function *, 4> wip = {&Func};
while (!wip.empty()) {
Function *F = wip.pop_back_val();
// Can accelerate this by referring to transitive map for functions that
// have already been computed, with more care than this
- set_union(transitive_map_function[&Func], direct_map_function[F]);
+ set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
for (const CallGraphNode::CallRecord &R : *CG[F]) {
- Function *ith = R.second->getFunction();
- if (ith) {
- if (!seen.contains(ith)) {
- seen.insert(ith);
- wip.push_back(ith);
+ Function *Ith = R.second->getFunction();
+ if (Ith) {
+ if (!seen.contains(Ith)) {
+ seen.insert(Ith);
+ wip.push_back(Ith);
}
}
}
}
}
- // direct_map_kernel lists which variables are used by the kernel
+ // DirectMapKernel lists which variables are used by the kernel
// find the variables which are used through a function call
- FunctionVariableMap indirect_map_kernel;
+ FunctionVariableMap IndirectMapKernel;
for (Function &Func : M.functions()) {
if (Func.isDeclaration() || !isKernelLDS(&Func))
continue;
for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
- Function *ith = R.second->getFunction();
- if (ith) {
- set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
+ Function *Ith = R.second->getFunction();
+ if (Ith) {
+ set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
} else {
- set_union(indirect_map_kernel[&Func],
+ set_union(IndirectMapKernel[&Func],
VariablesReachableThroughFunctionPointer);
}
}
@@ -218,7 +215,7 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
// so we don't have anything to do.
// - No variables are absolute.
std::optional<bool> HasAbsoluteGVs;
- for (auto &Map : {direct_map_kernel, indirect_map_kernel}) {
+ for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
for (auto &[Fn, GVs] : Map) {
for (auto *GV : GVs) {
bool IsAbsolute = GV->isAbsoluteSymbolRef();
@@ -238,14 +235,14 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(CallGraph const &CG, Module &M) {
if (HasAbsoluteGVs && *HasAbsoluteGVs)
return {FunctionVariableMap(), FunctionVariableMap()};
- return {std::move(direct_map_kernel), std::move(indirect_map_kernel)};
+ return {std::move(DirectMapKernel), std::move(IndirectMapKernel)};
}
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
StringRef FnAttr) {
KernelRoot->removeFnAttr(FnAttr);
- SmallVector<Function *> WorkList({CG[KernelRoot]->getFunction()});
+ SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
SmallPtrSet<Function *, 8> Visited;
bool SeenUnknownCall = false;
More information about the llvm-commits
mailing list