[llvm] [AMDGPU][Attributor] Add an option to turn on internalization (PR #108420)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 21:27:30 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108420
>From 5c6ad5d372053ff3de9a30ecb22ebb2bf371dc28 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 12 Sep 2024 12:21:31 -0400
Subject: [PATCH] [AMDGPU][Attributor] Add an option to turn on internalization
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 34 +++++-
.../AMDGPU/indirect-call-internalization.ll | 104 ++++++++++++++++++
2 files changed, 135 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/indirect-call-internalization.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 687a7339da379d..ba178e70477514 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,6 +38,11 @@ static cl::opt<unsigned> IndirectCallSpecializationThreshold(
"A threshold controls whether an indirect call will be specialized"),
cl::init(3));
+static cl::opt<bool>
+ DisableInternalization("amdgpu-disable-internalization",
+ cl::desc("Disable function internalization."),
+ cl::Hidden, cl::init(true));
+
#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
enum ImplicitArgumentPositions {
@@ -1031,9 +1036,32 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AMDGPUAttributorOptions Options) {
+ bool Changed = false;
+
+ DenseMap<Function *, Function *> InternalizedMap;
+ if (!DisableInternalization) {
+ auto IsCalled = [](Function &F) {
+ for (const User *U : F.users())
+ if (!isa<BlockAddress>(U))
+ return true;
+ return false;
+ };
+
+ SmallPtrSet<Function *, 16> InternalizeFns;
+ for (Function &F : M) {
+ if (F.isDeclaration() || AMDGPU::isEntryFunctionCC(F.getCallingConv()) ||
+ !IsCalled(F) || !Attributor::isInternalizable(F))
+ continue;
+ InternalizeFns.insert(&F);
+ }
+
+ Changed |=
+ Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
+ }
+
SetVector<Function *> Functions;
for (Function &F : M) {
- if (!F.isIntrinsic())
+ if (!F.isIntrinsic() && !InternalizedMap.lookup(&F))
Functions.insert(&F);
}
@@ -1094,8 +1122,8 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
}
}
- ChangeStatus Change = A.run();
- return Change == ChangeStatus::CHANGED;
+ Changed |= (A.run() == ChangeStatus::CHANGED);
+ return Changed;
}
class AMDGPUAttributorLegacy : public ModulePass {
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-internalization.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-internalization.ll
new file mode 100644
index 00000000000000..0457686c7994e7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-internalization.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=EXT %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -amdgpu-disable-internalization=0 %s | FileCheck --check-prefixes=INT %s
+
+ at G = global i32 0, align 4
+
+define void @callee0() {
+entry:
+ store i32 0, ptr @G, align 4
+ ret void
+}
+
+define void @callee1() {
+entry:
+ store i32 1, ptr @G, align 4
+ ret void
+}
+
+define void @helper(ptr %fn) {
+entry:
+ call void %fn()
+ ret void
+}
+
+define amdgpu_kernel void @foo(i1 %val) {
+entry:
+ %fn = select i1 %val, ptr @callee0, ptr @callee1
+ call void @helper(ptr %fn)
+ ret void
+}
+; EXT-LABEL: define void @callee0(
+; EXT-SAME: ) #[[ATTR0:[0-9]+]] {
+; EXT-NEXT: [[ENTRY:.*:]]
+; EXT-NEXT: store i32 0, ptr @G, align 4
+; EXT-NEXT: ret void
+;
+;
+; EXT-LABEL: define void @callee1(
+; EXT-SAME: ) #[[ATTR0]] {
+; EXT-NEXT: [[ENTRY:.*:]]
+; EXT-NEXT: store i32 1, ptr @G, align 4
+; EXT-NEXT: ret void
+;
+;
+; EXT-LABEL: define void @helper(
+; EXT-SAME: ptr [[FN:%.*]]) #[[ATTR1:[0-9]+]] {
+; EXT-NEXT: [[ENTRY:.*:]]
+; EXT-NEXT: call void [[FN]]()
+; EXT-NEXT: ret void
+;
+;
+; EXT-LABEL: define amdgpu_kernel void @foo(
+; EXT-SAME: i1 [[VAL:%.*]]) #[[ATTR2:[0-9]+]] {
+; EXT-NEXT: [[ENTRY:.*:]]
+; EXT-NEXT: [[FN:%.*]] = select i1 [[VAL]], ptr @callee0, ptr @callee1
+; EXT-NEXT: call void @helper(ptr [[FN]])
+; EXT-NEXT: ret void
+;
+;
+; INT-LABEL: define void @callee0() {
+; INT-NEXT: [[ENTRY:.*:]]
+; INT-NEXT: store i32 0, ptr @G, align 4
+; INT-NEXT: ret void
+;
+;
+; INT-LABEL: define void @callee1() {
+; INT-NEXT: [[ENTRY:.*:]]
+; INT-NEXT: store i32 1, ptr @G, align 4
+; INT-NEXT: ret void
+;
+;
+; INT-LABEL: define private void @helper.internalized(
+; INT-SAME: ptr [[FN:%.*]]) #[[ATTR0:[0-9]+]] {
+; INT-NEXT: [[ENTRY:.*:]]
+; INT-NEXT: [[TMP0:%.*]] = icmp eq ptr [[FN]], @callee1
+; INT-NEXT: br i1 [[TMP0]], label %[[BB1:.*]], label %[[BB2:.*]]
+; INT: [[BB1]]:
+; INT-NEXT: call void @callee1()
+; INT-NEXT: br label %[[BB5:.*]]
+; INT: [[BB2]]:
+; INT-NEXT: br i1 true, label %[[BB3:.*]], label %[[BB4:.*]]
+; INT: [[BB3]]:
+; INT-NEXT: call void @callee0()
+; INT-NEXT: br label %[[BB5]]
+; INT: [[BB4]]:
+; INT-NEXT: unreachable
+; INT: [[BB5]]:
+; INT-NEXT: ret void
+;
+;
+; INT-LABEL: define void @helper(
+; INT-SAME: ptr [[FN:%.*]]) {
+; INT-NEXT: [[ENTRY:.*:]]
+; INT-NEXT: call void [[FN]]()
+; INT-NEXT: ret void
+;
+;
+; INT-LABEL: define amdgpu_kernel void @foo(
+; INT-SAME: i1 [[VAL:%.*]]) #[[ATTR0]] {
+; INT-NEXT: [[ENTRY:.*:]]
+; INT-NEXT: [[FN:%.*]] = select i1 [[VAL]], ptr @callee0, ptr @callee1
+; INT-NEXT: call void @helper.internalized(ptr [[FN]])
+; INT-NEXT: ret void
+;
More information about the llvm-commits
mailing list