[llvm] [AMDGPU] SplitModule: Better handling of aliases & inline assembly (PR #106528)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 03:19:22 PDT 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/106528
>From 4b95883bffc3c1a711745f1ebdf48629a502f74c Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 29 Aug 2024 13:04:15 +0200
Subject: [PATCH 1/2] [AMDGPU] SplitModule: Better handling of aliases & inline
assembly
The CallGraph quickly gives up whenever a call isn't obvious.
This caused us to misidentify inline assembly and calls to aliases as indirect calls.
Add a fallback that iterates over the function to fix whatever has to be fixed if the CG isn't certain.
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 106 ++++++++++++++----
.../AMDGPU/indirect-call-inline-asm.ll | 41 +++++++
.../kernels-alias-dependencies-debug.ll | 22 ++++
.../AMDGPU/kernels-alias-dependencies.ll | 16 +--
4 files changed, 153 insertions(+), 32 deletions(-)
create mode 100644 llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
create mode 100644 llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies-debug.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index ca371a883b897e..91f99b85b24efc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -103,6 +103,15 @@ static cl::opt<bool> NoExternalizeGlobals(
cl::desc("disables externalization of global variable with local linkage; "
"may cause globals to be duplicated which increases binary size"));
+static cl::opt<bool> NoExternalizeOnAddrTaken(
+ "amdgpu-module-splitting-no-externalize-address-taken", cl::Hidden,
+ cl::desc(
+ "disables externalization of functions whose addresses are taken"));
+
+static cl::opt<bool> InlineAsmIsIndirectCall(
+ "amdgpu-module-splitting-inline-asm-is-indirect-call", cl::Hidden,
+ cl::desc("consider inline assembly as na indirect call"));
+
static cl::opt<std::string>
ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
cl::Hidden,
@@ -501,6 +510,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
dbgs()
<< "[build graph] constructing graph representation of the input\n");
+ // FIXME(?): Is the callgraph really worth using if we have to iterate the
+ // function again whenever it fails to give us enough information?
+
// We build the graph by just iterating all functions in the module and
// working on their direct callees. At the end, all nodes should be linked
// together as expected.
@@ -511,29 +523,68 @@ void SplitGraph::buildGraph(CallGraph &CG) {
continue;
// Look at direct callees and create the necessary edges in the graph.
- bool HasIndirectCall = false;
- Node &N = getNode(Cache, Fn);
+ SetVector<const Function *> DirectCallees;
+ bool CallsExternal = false;
for (auto &CGEntry : *CG[&Fn]) {
auto *CGNode = CGEntry.second;
- auto *Callee = CGNode->getFunction();
- if (!Callee) {
- // TODO: Don't consider inline assembly as indirect calls.
- if (CGNode == CG.getCallsExternalNode())
- HasIndirectCall = true;
- continue;
- }
-
- if (!Callee->isDeclaration())
- createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
+ if (auto *Callee = CGNode->getFunction()) {
+ if (!Callee->isDeclaration())
+ DirectCallees.insert(Callee);
+ } else if (CGNode == CG.getCallsExternalNode())
+ CallsExternal = true;
}
// Keep track of this function if it contains an indirect call and/or if it
// can be indirectly called.
- if (HasIndirectCall) {
- LLVM_DEBUG(dbgs() << "indirect call found in " << Fn.getName() << "\n");
- FnsWithIndirectCalls.push_back(&Fn);
+ if (CallsExternal) {
+ LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for " << Fn.getName()
+ << " - analyzing function\n");
+
+ bool HasIndirectCall = false;
+ for (const auto &BB : Fn) {
+ for (const auto &Inst : BB) {
+ // look at all calls without a direct callee.
+ if (const auto *CB = dyn_cast<CallBase>(&Inst);
+ CB && !CB->getCalledFunction()) {
+ // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
+ // true.
+ if (CB->isInlineAsm()) {
+ if (InlineAsmIsIndirectCall)
+ HasIndirectCall = true;
+ LLVM_DEBUG(dbgs() << " found inline assembly\n");
+ continue;
+ }
+
+ // see through calls of aliases
+ const Value *CalledV = CB->getCalledOperand();
+ if (isa<GlobalAlias>(CalledV)) {
+ if (const auto *RealFn = dyn_cast<Function>(
+ CalledV->stripPointerCastsAndAliases());
+ RealFn && !RealFn->isDeclaration()) {
+ LLVM_DEBUG(dbgs()
+ << " resolved call to " << RealFn->getName()
+ << " in: " << Inst << '\n');
+ DirectCallees.insert(RealFn);
+ continue;
+ }
+ }
+
+ // everything else is handled conservatively.
+ HasIndirectCall = true;
+ }
+ }
+ }
+
+ if (HasIndirectCall) {
+ LLVM_DEBUG(dbgs() << " indirect call found\n");
+ FnsWithIndirectCalls.push_back(&Fn);
+ }
}
+ Node &N = getNode(Cache, Fn);
+ for (const auto *Callee : DirectCallees)
+ createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
+
if (canBeIndirectlyCalled(Fn))
IndirectlyCallableFns.push_back(&Fn);
}
@@ -1337,13 +1388,23 @@ static void splitAMDGPUModule(
//
// Additionally, it guides partitioning to not duplicate this function if it's
// called directly at some point.
- for (auto &Fn : M) {
- if (Fn.hasAddressTaken()) {
- if (Fn.hasLocalLinkage()) {
- LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
- << " because its address is taken\n");
+ //
+ // TODO: Could we be smarter about this ? This makes all functions whose
+ // addresses are taken non-copyable. We should probably model this type of
+ // constraint in the graph and use it to guide splitting, instead of
+ // externalizing like this. Maybe non-copyable should really mean "keep one
+ // visible copy, then internalize all other copies" for some functions?
+ if (!NoExternalizeOnAddrTaken) {
+ for (auto &Fn : M) {
+ // TODO: Should aliases count? Probably not but they're so rare I'm not
+ // sure it's worth fixing.
+ if (Fn.hasAddressTaken()) {
+ if (Fn.hasLocalLinkage()) {
+ LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
+ << " because its address is taken\n");
+ }
+ externalize(Fn);
}
- externalize(Fn);
}
}
@@ -1379,7 +1440,8 @@ static void splitAMDGPUModule(
dbgs() << "[graph] nodes:\n";
for (const SplitGraph::Node *N : SG.nodes()) {
dbgs() << " - [" << N->getID() << "]: " << N->getName() << " "
- << (N->isGraphEntryPoint() ? "(entry)" : "") << "\n";
+ << (N->isGraphEntryPoint() ? "(entry)" : "") << " "
+ << (N->isNonCopyable() ? "(noncopyable)" : "") << "\n";
}
});
diff --git a/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
new file mode 100644
index 00000000000000..5c7ddaeecd7044
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
@@ -0,0 +1,41 @@
+; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
+
+; RUN: llvm-split -o %t_as_indirect %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken -amdgpu-module-splitting-inline-asm-is-indirect-call
+; RUN: llvm-dis -o - %t_as_indirect0 | FileCheck --check-prefix=CHECK-INDIRECT0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t_as_indirect1 | FileCheck --check-prefix=CHECK-INDIRECT1 --implicit-check-not=define %s
+
+; CHECK0: define internal void @HelperB
+; CHECK0: define amdgpu_kernel void @B
+
+; CHECK1: define internal void @HelperA()
+; CHECK1: define amdgpu_kernel void @A()
+
+; CHECK-INDIRECT0: define internal void @HelperB
+; CHECK-INDIRECT0: define amdgpu_kernel void @B
+
+; CHECK-INDIRECT1: define internal void @HelperA()
+; CHECK-INDIRECT1: define internal void @HelperB()
+; CHECK-INDIRECT1: define amdgpu_kernel void @A()
+
+ at addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
+
+define internal void @HelperA() {
+ ret void
+}
+
+define internal void @HelperB() {
+ ret void
+}
+
+define amdgpu_kernel void @A() {
+ call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"()
+ call void @HelperA()
+ ret void
+}
+
+define amdgpu_kernel void @B(ptr %out) {
+ call void @HelperB()
+ ret void
+}
diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies-debug.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies-debug.ll
new file mode 100644
index 00000000000000..0bfa9150d9e90e
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies-debug.ll
@@ -0,0 +1,22 @@
+; REQUIRES: asserts
+
+; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -debug -amdgpu-module-splitting-no-externalize-address-taken 2>&1 | FileCheck %s
+
+; CHECK: [!] callgraph is incomplete for A - analyzing function
+; CHECK-NEXT: resolved call to PerryThePlatypus in: call void @Perry()
+
+ at Perry = internal alias ptr(), ptr @PerryThePlatypus
+
+define internal void @PerryThePlatypus() {
+ ret void
+}
+
+define amdgpu_kernel void @A() {
+ call void @Perry()
+ ret void
+}
+
+define amdgpu_kernel void @B() {
+ call void @PerryThePlatypus()
+ ret void
+}
diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
index d7e84abd5f968d..ed160cceaf8f28 100644
--- a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
+++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
+; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
@@ -7,18 +7,14 @@
; - B calls @PerryThePlatypus
; - C calls @Perry, an alias of @PerryThePlatypus
;
-; We should see through the alias and put B/C in the same
-; partition.
-;
-; Additionally, @PerryThePlatypus gets externalized as
-; the alias counts as taking its address.
+; We should see through the alias and treat C as-if it called PerryThePlatypus directly.
-; CHECK0: define amdgpu_kernel void @A
+; CHECK0: define internal void @PerryThePlatypus()
+; CHECK0: define amdgpu_kernel void @C
-; CHECK1: @Perry = internal alias ptr (), ptr @PerryThePlatypus
-; CHECK1: define hidden void @PerryThePlatypus()
+; CHECK1: define internal void @PerryThePlatypus()
+; CHECK1: define amdgpu_kernel void @A
; CHECK1: define amdgpu_kernel void @B
-; CHECK1: define amdgpu_kernel void @C
@Perry = internal alias ptr(), ptr @PerryThePlatypus
>From 3eb37305785b870e0d3c7ce8a23758bdb3ea6a0d Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 3 Sep 2024 12:19:11 +0200
Subject: [PATCH 2/2] fix typo
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 91f99b85b24efc..37327e8747e9fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -110,7 +110,7 @@ static cl::opt<bool> NoExternalizeOnAddrTaken(
static cl::opt<bool> InlineAsmIsIndirectCall(
"amdgpu-module-splitting-inline-asm-is-indirect-call", cl::Hidden,
- cl::desc("consider inline assembly as na indirect call"));
+ cl::desc("consider inline assembly as an indirect call"));
static cl::opt<std::string>
ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
More information about the llvm-commits
mailing list