[llvm] [AMDGPU][SplitModule] Handle !callees metadata (PR #108802)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 6 22:59:55 PDT 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/108802
>From dc18a3bba3b510b90e705df88f429464080971df Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 16 Sep 2024 09:41:49 +0200
Subject: [PATCH 1/7] [AMDGPU][SplitModule] Cleanup CallsExternal Handling
- Don't treat inline ASM as indirect calls
- Remove alias testing, which was broken (only working by pure luck right now) and isn't needed anyway. GlobalOpt should take care of them for us.
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 91 ++++++++++++++-----
.../AMDGPU/indirect-call-inline-asm.ll | 41 +++++++++
.../AMDGPU/kernels-alias-dependencies.ll | 41 ---------
.../AMDGPU/kernels-dependency-indirect.ll | 12 ---
4 files changed, 110 insertions(+), 75 deletions(-)
create mode 100644 llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
delete mode 100644 llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index e97a7f4e075f7f..e76fc937793669 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -43,6 +43,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/User.h"
@@ -103,6 +104,15 @@ static cl::opt<bool> NoExternalizeGlobals(
cl::desc("disables externalization of global variable with local linkage; "
"may cause globals to be duplicated which increases binary size"));
+static cl::opt<bool> NoExternalizeOnAddrTaken(
+ "amdgpu-module-splitting-no-externalize-address-taken", cl::Hidden,
+ cl::desc(
+ "disables externalization of functions whose addresses are taken"));
+
+static cl::opt<bool> InlineAsmIsIndirectCall(
+ "amdgpu-module-splitting-inline-asm-is-indirect-call", cl::Hidden,
+ cl::desc("consider inline assembly as an indirect call"));
+
static cl::opt<std::string>
ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
cl::Hidden,
@@ -482,6 +492,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
dbgs()
<< "[build graph] constructing graph representation of the input\n");
+ // FIXME(?): Is the callgraph really worth using if we have to iterate the
+ // function again whenever it fails to give us enough information?
+
// We build the graph by just iterating all functions in the module and
// working on their direct callees. At the end, all nodes should be linked
// together as expected.
@@ -492,29 +505,52 @@ void SplitGraph::buildGraph(CallGraph &CG) {
continue;
// Look at direct callees and create the necessary edges in the graph.
- bool HasIndirectCall = false;
- Node &N = getNode(Cache, Fn);
+ SetVector<const Function *> DirectCallees;
+ bool CallsExternal = false;
for (auto &CGEntry : *CG[&Fn]) {
auto *CGNode = CGEntry.second;
- auto *Callee = CGNode->getFunction();
- if (!Callee) {
- // TODO: Don't consider inline assembly as indirect calls.
- if (CGNode == CG.getCallsExternalNode())
- HasIndirectCall = true;
- continue;
- }
-
- if (!Callee->isDeclaration())
- createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
+ if (auto *Callee = CGNode->getFunction()) {
+ if (!Callee->isDeclaration())
+ DirectCallees.insert(Callee);
+ } else if (CGNode == CG.getCallsExternalNode())
+ CallsExternal = true;
}
// Keep track of this function if it contains an indirect call and/or if it
// can be indirectly called.
- if (HasIndirectCall) {
- LLVM_DEBUG(dbgs() << "indirect call found in " << Fn.getName() << "\n");
- FnsWithIndirectCalls.push_back(&Fn);
+ if (CallsExternal) {
+ LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for " << Fn.getName()
+ << " - analyzing function\n");
+
+ bool HasIndirectCall = false;
+ for (const auto &Inst : instructions(Fn)) {
+ // look at all calls without a direct callee.
+ if (const auto *CB = dyn_cast<CallBase>(&Inst);
+ CB && !CB->getCalledFunction()) {
+ // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
+ // true.
+ if (CB->isInlineAsm()) {
+ if (InlineAsmIsIndirectCall)
+ HasIndirectCall = true;
+ LLVM_DEBUG(dbgs() << " found inline assembly\n");
+ continue;
+ }
+
+ // everything else is handled conservatively.
+ HasIndirectCall = true;
+ }
+ }
+
+ if (HasIndirectCall) {
+ LLVM_DEBUG(dbgs() << " indirect call found\n");
+ FnsWithIndirectCalls.push_back(&Fn);
+ }
}
+ Node &N = getNode(Cache, Fn);
+ for (const auto *Callee : DirectCallees)
+ createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
+
if (canBeIndirectlyCalled(Fn))
IndirectlyCallableFns.push_back(&Fn);
}
@@ -1326,13 +1362,23 @@ static void splitAMDGPUModule(
//
// Additionally, it guides partitioning to not duplicate this function if it's
// called directly at some point.
- for (auto &Fn : M) {
- if (Fn.hasAddressTaken()) {
- if (Fn.hasLocalLinkage()) {
- LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
- << " because its address is taken\n");
+ //
+ // TODO: Could we be smarter about this ? This makes all functions whose
+ // addresses are taken non-copyable. We should probably model this type of
+ // constraint in the graph and use it to guide splitting, instead of
+ // externalizing like this. Maybe non-copyable should really mean "keep one
+ // visible copy, then internalize all other copies" for some functions?
+ if (!NoExternalizeOnAddrTaken) {
+ for (auto &Fn : M) {
+ // TODO: Should aliases count? Probably not but they're so rare I'm not
+ // sure it's worth fixing.
+ if (Fn.hasAddressTaken()) {
+ if (Fn.hasLocalLinkage()) {
+ LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
+ << " because its address is taken\n");
+ }
+ externalize(Fn);
}
- externalize(Fn);
}
}
@@ -1368,7 +1414,8 @@ static void splitAMDGPUModule(
dbgs() << "[graph] nodes:\n";
for (const SplitGraph::Node *N : SG.nodes()) {
dbgs() << " - [" << N->getID() << "]: " << N->getName() << " "
- << (N->isGraphEntryPoint() ? "(entry)" : "") << "\n";
+ << (N->isGraphEntryPoint() ? "(entry)" : "") << " "
+ << (N->isNonCopyable() ? "(noncopyable)" : "") << "\n";
}
});
diff --git a/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
new file mode 100644
index 00000000000000..5c7ddaeecd7044
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
@@ -0,0 +1,41 @@
+; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
+
+; RUN: llvm-split -o %t_as_indirect %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken -amdgpu-module-splitting-inline-asm-is-indirect-call
+; RUN: llvm-dis -o - %t_as_indirect0 | FileCheck --check-prefix=CHECK-INDIRECT0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t_as_indirect1 | FileCheck --check-prefix=CHECK-INDIRECT1 --implicit-check-not=define %s
+
+; CHECK0: define internal void @HelperB
+; CHECK0: define amdgpu_kernel void @B
+
+; CHECK1: define internal void @HelperA()
+; CHECK1: define amdgpu_kernel void @A()
+
+; CHECK-INDIRECT0: define internal void @HelperB
+; CHECK-INDIRECT0: define amdgpu_kernel void @B
+
+; CHECK-INDIRECT1: define internal void @HelperA()
+; CHECK-INDIRECT1: define internal void @HelperB()
+; CHECK-INDIRECT1: define amdgpu_kernel void @A()
+
+ at addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
+
+define internal void @HelperA() {
+ ret void
+}
+
+define internal void @HelperB() {
+ ret void
+}
+
+define amdgpu_kernel void @A() {
+ call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"()
+ call void @HelperA()
+ ret void
+}
+
+define amdgpu_kernel void @B(ptr %out) {
+ call void @HelperB()
+ ret void
+}
diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
deleted file mode 100644
index d7e84abd5f968d..00000000000000
--- a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa
-; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
-; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
-
-; 3 kernels:
-; - A calls nothing
-; - B calls @PerryThePlatypus
-; - C calls @Perry, an alias of @PerryThePlatypus
-;
-; We should see through the alias and put B/C in the same
-; partition.
-;
-; Additionally, @PerryThePlatypus gets externalized as
-; the alias counts as taking its address.
-
-; CHECK0: define amdgpu_kernel void @A
-
-; CHECK1: @Perry = internal alias ptr (), ptr @PerryThePlatypus
-; CHECK1: define hidden void @PerryThePlatypus()
-; CHECK1: define amdgpu_kernel void @B
-; CHECK1: define amdgpu_kernel void @C
-
- at Perry = internal alias ptr(), ptr @PerryThePlatypus
-
-define internal void @PerryThePlatypus() {
- ret void
-}
-
-define amdgpu_kernel void @A() {
- ret void
-}
-
-define amdgpu_kernel void @B() {
- call void @PerryThePlatypus()
- ret void
-}
-
-define amdgpu_kernel void @C() {
- call void @Perry()
- ret void
-}
diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll
index 5be945bda48bf4..c2acb06d3e72e5 100644
--- a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll
+++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll
@@ -3,18 +3,6 @@
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
-; We have 4 kernels:
-; - Each kernel has an internal helper
-; - @A and @B's helpers does an indirect call.
-;
-; We default to putting A/B in P0, alongside a copy
-; of all helpers who have their address taken.
-; The other kernels can still go into separate partitions.
-;
-; Note that dependency discovery shouldn't stop upon finding an
-; indirect call. HelperC/D should also end up in P0 as they
-; are dependencies of HelperB.
-
; CHECK0: define internal void @HelperD
; CHECK0: define amdgpu_kernel void @D
>From d5361ad4c4087241673758a96b1b99b0f886a8f0 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 25 Sep 2024 09:23:51 +0200
Subject: [PATCH 2/7] Comments + adapt inline asm test w/o CL opt
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 19 ++++---------
.../AMDGPU/indirect-call-inline-asm-debug.ll | 28 +++++++++++++++++++
.../AMDGPU/indirect-call-inline-asm.ll | 11 --------
3 files changed, 34 insertions(+), 24 deletions(-)
create mode 100644 llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm-debug.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index e76fc937793669..78218167e50108 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -109,10 +109,6 @@ static cl::opt<bool> NoExternalizeOnAddrTaken(
cl::desc(
"disables externalization of functions whose addresses are taken"));
-static cl::opt<bool> InlineAsmIsIndirectCall(
- "amdgpu-module-splitting-inline-asm-is-indirect-call", cl::Hidden,
- cl::desc("consider inline assembly as an indirect call"));
-
static cl::opt<std::string>
ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
cl::Hidden,
@@ -519,8 +515,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
// Keep track of this function if it contains an indirect call and/or if it
// can be indirectly called.
if (CallsExternal) {
- LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for " << Fn.getName()
- << " - analyzing function\n");
+ LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for ";
+ Fn.printAsOperand(dbgs());
+ dbgs() << " - analyzing function\n");
bool HasIndirectCall = false;
for (const auto &Inst : instructions(Fn)) {
@@ -530,8 +527,6 @@ void SplitGraph::buildGraph(CallGraph &CG) {
// inline assembly can be ignored, unless InlineAsmIsIndirectCall is
// true.
if (CB->isInlineAsm()) {
- if (InlineAsmIsIndirectCall)
- HasIndirectCall = true;
LLVM_DEBUG(dbgs() << " found inline assembly\n");
continue;
}
@@ -1372,11 +1367,9 @@ static void splitAMDGPUModule(
for (auto &Fn : M) {
// TODO: Should aliases count? Probably not but they're so rare I'm not
// sure it's worth fixing.
- if (Fn.hasAddressTaken()) {
- if (Fn.hasLocalLinkage()) {
- LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
- << " because its address is taken\n");
- }
+ if (Fn.hasLocalLinkage() && Fn.hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "[externalize] "; Fn.printAsOperand(dbgs());
+ dbgs() << " because its address is taken\n");
externalize(Fn);
}
}
diff --git a/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm-debug.ll b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm-debug.ll
new file mode 100644
index 00000000000000..5b15e740f76b96
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm-debug.ll
@@ -0,0 +1,28 @@
+; REQUIRES: asserts
+
+; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken -debug-only=amdgpu-split-module 2>&1 | FileCheck %s
+
+; CHECK: [!] callgraph is incomplete for ptr @A - analyzing function
+; CHECK-NEXT: found inline assembly
+; CHECK-NOT: indirect call found
+
+ at addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
+
+define internal void @HelperA() {
+ ret void
+}
+
+define internal void @HelperB() {
+ ret void
+}
+
+define amdgpu_kernel void @A() {
+ call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"()
+ call void @HelperA()
+ ret void
+}
+
+define amdgpu_kernel void @B(ptr %out) {
+ call void @HelperB()
+ ret void
+}
diff --git a/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
index 5c7ddaeecd7044..13c30c9e45e808 100644
--- a/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
+++ b/llvm/test/tools/llvm-split/AMDGPU/indirect-call-inline-asm.ll
@@ -2,23 +2,12 @@
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
-; RUN: llvm-split -o %t_as_indirect %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken -amdgpu-module-splitting-inline-asm-is-indirect-call
-; RUN: llvm-dis -o - %t_as_indirect0 | FileCheck --check-prefix=CHECK-INDIRECT0 --implicit-check-not=define %s
-; RUN: llvm-dis -o - %t_as_indirect1 | FileCheck --check-prefix=CHECK-INDIRECT1 --implicit-check-not=define %s
-
; CHECK0: define internal void @HelperB
; CHECK0: define amdgpu_kernel void @B
; CHECK1: define internal void @HelperA()
; CHECK1: define amdgpu_kernel void @A()
-; CHECK-INDIRECT0: define internal void @HelperB
-; CHECK-INDIRECT0: define amdgpu_kernel void @B
-
-; CHECK-INDIRECT1: define internal void @HelperA()
-; CHECK-INDIRECT1: define internal void @HelperB()
-; CHECK-INDIRECT1: define amdgpu_kernel void @A()
-
@addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
define internal void @HelperA() {
>From b5e322fed6fca2fb35eae63620f9fd4c60d8e369 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 7 Oct 2024 07:44:57 +0200
Subject: [PATCH 3/7] Add break
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 78218167e50108..a62c72d124825e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -533,6 +533,7 @@ void SplitGraph::buildGraph(CallGraph &CG) {
// everything else is handled conservatively.
HasIndirectCall = true;
+ break;
}
}
>From c2ea2112b7c1c69d30fe912f599147c648a0e594 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 29 Aug 2024 13:04:15 +0200
Subject: [PATCH 4/7] [AMDGPU][SplitModule] Handle !callees metadata
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 29 +++++++-
.../kernels-dependency-indirect-callee-md.ll | 69 +++++++++++++++++++
2 files changed, 97 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index a62c72d124825e..1c260f23ca822b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -482,6 +482,27 @@ void SplitGraph::Node::visitAllDependencies(
}
}
+/// Checks if \p I has MD_callees and if it does, parse it and put the function
+/// in \p Callees.
+///
+/// \returns true if there was metadata and it was parsed correctly. false if
+/// there was no MD or if it contained unknown entries.
+static bool handleCalleesMD(const Instruction &I,
+ SmallVector<Function *> &Callees) {
+ auto *MD = I.getMetadata(LLVMContext::MD_callees);
+ if (!MD)
+ return false;
+
+ for (const auto &Op : MD->operands()) {
+ Function *Callee = mdconst::extract_or_null<Function>(Op);
+ if (!Callee)
+ return false;
+ Callees.push_back(Callee);
+ }
+
+ return true;
+}
+
void SplitGraph::buildGraph(CallGraph &CG) {
SplitModuleTimer SMT("buildGraph", "graph construction");
LLVM_DEBUG(
@@ -519,6 +540,8 @@ void SplitGraph::buildGraph(CallGraph &CG) {
Fn.printAsOperand(dbgs());
dbgs() << " - analyzing function\n");
+ SmallVector<Function *> KnownCallees;
+
bool HasIndirectCall = false;
for (const auto &Inst : instructions(Fn)) {
// look at all calls without a direct callee.
@@ -531,6 +554,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
continue;
}
+ if (handleCalleesMD(Inst, KnownCallees))
+ continue;
+
// everything else is handled conservatively.
HasIndirectCall = true;
break;
@@ -540,7 +566,8 @@ void SplitGraph::buildGraph(CallGraph &CG) {
if (HasIndirectCall) {
LLVM_DEBUG(dbgs() << " indirect call found\n");
FnsWithIndirectCalls.push_back(&Fn);
- }
+ } else if (!KnownCallees.empty())
+ DirectCallees.insert(KnownCallees.begin(), KnownCallees.end());
}
Node &N = getNode(Cache, Fn);
diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll
new file mode 100644
index 00000000000000..f1ed02b2502a06
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll
@@ -0,0 +1,69 @@
+; RUN: sed -s 's/_MD_/, !callees !{ptr @CallCandidate0}/' %s | llvm-split -o %t -j 3 -mtriple amdgcn-amd-amdhsa
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
+
+; RUN: sed -s 's/_MD_//g' %s | llvm-split -o %t-nomd -j 3 -mtriple amdgcn-amd-amdhsa
+; RUN: llvm-dis -o - %t-nomd0 | FileCheck --check-prefix=CHECK-NOMD0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t-nomd1 | FileCheck --check-prefix=CHECK-NOMD1 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t-nomd2 | FileCheck --check-prefix=CHECK-NOMD2 --implicit-check-not=define %s
+
+; CHECK0: define internal void @HelperC
+; CHECK0: define amdgpu_kernel void @C
+
+; CHECK1: define hidden void @CallCandidate1
+; CHECK1: define internal void @HelperB
+; CHECK1: define amdgpu_kernel void @B
+
+; CHECK2: define internal void @HelperA
+; CHECK2: define hidden void @CallCandidate0
+; CHECK2: define amdgpu_kernel void @A
+
+; CHECK-NOMD0: define internal void @HelperC
+; CHECK-NOMD0: define amdgpu_kernel void @C
+
+; CHECK-NOMD1: define internal void @HelperB
+; CHECK-NOMD1: define amdgpu_kernel void @B
+
+; CHECK-NOMD2: define internal void @HelperA
+; CHECK-NOMD2: define hidden void @CallCandidate0
+; CHECK-NOMD2: define hidden void @CallCandidate1
+; CHECK-NOMD2: define amdgpu_kernel void @A
+
+ at addrthief = global [2 x ptr] [ptr @CallCandidate0, ptr @CallCandidate1]
+
+define internal void @HelperA(ptr %call) {
+ call void %call() _MD_
+ ret void
+}
+
+define internal void @CallCandidate0() {
+ ret void
+}
+
+define internal void @CallCandidate1() {
+ ret void
+}
+
+define internal void @HelperB() {
+ ret void
+}
+
+define internal void @HelperC() {
+ ret void
+}
+
+define amdgpu_kernel void @A(ptr %call) {
+ call void @HelperA(ptr %call)
+ ret void
+}
+
+define amdgpu_kernel void @B() {
+ call void @HelperB()
+ ret void
+}
+
+define amdgpu_kernel void @C() {
+ call void @HelperC()
+ ret void
+}
>From fb5c4a8e87df9ce1412ed835d535b9e9189a41ff Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 16 Sep 2024 15:31:15 +0200
Subject: [PATCH 5/7] Comments, minor refactor
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 39 +++++++++++---------
1 file changed, 21 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 1c260f23ca822b..995cfcbd73f7de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -488,7 +488,7 @@ void SplitGraph::Node::visitAllDependencies(
/// \returns true if there was metadata and it was parsed correctly. false if
/// there was no MD or if it contained unknown entries.
static bool handleCalleesMD(const Instruction &I,
- SmallVector<Function *> &Callees) {
+ SmallVectorImpl<Function *> &Callees) {
auto *MD = I.getMetadata(LLVMContext::MD_callees);
if (!MD)
return false;
@@ -541,29 +541,32 @@ void SplitGraph::buildGraph(CallGraph &CG) {
dbgs() << " - analyzing function\n");
SmallVector<Function *> KnownCallees;
-
- bool HasIndirectCall = false;
+ bool HasUnknownIndirectCall = false;
for (const auto &Inst : instructions(Fn)) {
// look at all calls without a direct callee.
- if (const auto *CB = dyn_cast<CallBase>(&Inst);
- CB && !CB->getCalledFunction()) {
- // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
- // true.
- if (CB->isInlineAsm()) {
- LLVM_DEBUG(dbgs() << " found inline assembly\n");
- continue;
- }
+ const auto *CB = dyn_cast<CallBase>(&Inst);
+ if (!CB || CB->getCalledFunction())
+ continue;
+
+ // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
+ // true.
+ if (CB->isInlineAsm()) {
+ if (InlineAsmIsIndirectCall)
+ HasUnknownIndirectCall = true;
+ LLVM_DEBUG(dbgs() << " found inline assembly\n");
+ continue;
+ }
- if (handleCalleesMD(Inst, KnownCallees))
- continue;
+ if (handleCalleesMD(Inst, KnownCallees))
+ continue;
- // everything else is handled conservatively.
- HasIndirectCall = true;
- break;
- }
+ // Everything else is handled conservatively. If we fall into the
+ // conservative case don't bother analyzing further.
+ HasUnknownIndirectCall = true;
+ break;
}
- if (HasIndirectCall) {
+ if (HasUnknownIndirectCall) {
LLVM_DEBUG(dbgs() << " indirect call found\n");
FnsWithIndirectCalls.push_back(&Fn);
} else if (!KnownCallees.empty())
>From 28baeb8b293a8fbf33630c9b5f220442990c4de8 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 17 Sep 2024 07:18:26 +0200
Subject: [PATCH 6/7] Fixes
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 995cfcbd73f7de..f1f7903562fbde 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -486,9 +486,11 @@ void SplitGraph::Node::visitAllDependencies(
/// in \p Callees.
///
/// \returns true if there was metadata and it was parsed correctly. false if
-/// there was no MD or if it contained unknown entries.
+/// there was no MD or if it contained unknown entries and parsing failed.
+/// If this returns false, \p Callees will contain incomplete information
+/// and must not be used.
static bool handleCalleesMD(const Instruction &I,
- SmallVectorImpl<Function *> &Callees) {
+ SetVector<Function *> &Callees) {
auto *MD = I.getMetadata(LLVMContext::MD_callees);
if (!MD)
return false;
@@ -497,7 +499,7 @@ static bool handleCalleesMD(const Instruction &I,
Function *Callee = mdconst::extract_or_null<Function>(Op);
if (!Callee)
return false;
- Callees.push_back(Callee);
+ Callees.insert(Callee);
}
return true;
@@ -540,7 +542,7 @@ void SplitGraph::buildGraph(CallGraph &CG) {
Fn.printAsOperand(dbgs());
dbgs() << " - analyzing function\n");
- SmallVector<Function *> KnownCallees;
+ SetVector<Function *> KnownCallees;
bool HasUnknownIndirectCall = false;
for (const auto &Inst : instructions(Fn)) {
// look at all calls without a direct callee.
@@ -559,6 +561,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
if (handleCalleesMD(Inst, KnownCallees))
continue;
+ // If we failed to parse any !callees MD, or some was missing,
+ // the entire KnownCallees list is now unreliable.
+ KnownCallees.clear();
// Everything else is handled conservatively. If we fall into the
// conservative case don't bother analyzing further.
>From 76fbf52e0e3ba4284812242ae06c2a4b09ff919e Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 7 Oct 2024 07:59:39 +0200
Subject: [PATCH 7/7] rebase
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index f1f7903562fbde..7d86e0c72bd075 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -553,8 +553,6 @@ void SplitGraph::buildGraph(CallGraph &CG) {
// inline assembly can be ignored, unless InlineAsmIsIndirectCall is
// true.
if (CB->isInlineAsm()) {
- if (InlineAsmIsIndirectCall)
- HasUnknownIndirectCall = true;
LLVM_DEBUG(dbgs() << " found inline assembly\n");
continue;
}
More information about the llvm-commits
mailing list