[llvm] [TailDuplicator] Only duplicate the blocks containing computed gotos (PR #114990)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 07:21:57 PST 2025
https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/114990
>From ba44cbfb93632c140aa15317df4aa28992c30ff7 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Tue, 5 Nov 2024 21:41:32 +0800
Subject: [PATCH 1/2] Add a computed goto test
---
.../CodeGen/X86/tail-dup-computed-goto.mir | 255 ++++++++++++++++++
1 file changed, 255 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
new file mode 100644
index 00000000000000..437989548b4f66
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -0,0 +1,255 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
+# Check that only the computed goto is not be restrict by tail-dup-pred-size and tail-dup-succ-size.
+--- |
+ declare i64 @f0()
+ declare i64 @f1()
+ declare i64 @f2()
+ declare i64 @f3()
+ declare i64 @f4()
+ declare i64 @f5()
+ @computed_goto.dispatch = external global [5 x ptr]
+ define void @computed_goto() { ret void }
+ define void @jump_table() { ret void }
+...
+---
+name: computed_goto
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: computed_goto
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[COPY]]
+ ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
+ ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
+ ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[COPY8]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64_nosp = PHI [[COPY1]], %bb.0, [[COPY9]], %bb.4, [[COPY7]], %bb.3, [[COPY5]], %bb.2, [[COPY3]], %bb.1
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[PHI]], @computed_goto.dispatch, $noreg
+ bb.0:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %6:gr64 = COPY $rax
+ %0:gr64 = COPY %6
+ JMP_1 %bb.5
+
+ bb.1:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %10:gr64 = COPY $rax
+ %1:gr64 = COPY %10
+ JMP_1 %bb.5
+
+ bb.2:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %9:gr64 = COPY $rax
+ %2:gr64 = COPY %9
+ JMP_1 %bb.5
+
+ bb.3:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %8:gr64 = COPY $rax
+ %3:gr64 = COPY %8
+ JMP_1 %bb.5
+
+ bb.4:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %7:gr64 = COPY $rax
+ %4:gr64 = COPY %7
+
+ bb.5:
+ successors: %bb.1, %bb.2, %bb.3, %bb.4
+
+ %5:gr64_nosp = PHI %0, %bb.0, %4, %bb.4, %3, %bb.3, %2, %bb.2, %1, %bb.1
+ JMP64m $noreg, 8, %5, @computed_goto.dispatch, $noreg
+
+...
+---
+name: jump_table
+tracksRegLiveness: true
+jumpTable:
+ kind: block-address
+ entries:
+ - id: 0
+ blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5', '%bb.6' ]
+body: |
+ ; CHECK-LABEL: name: jump_table
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[COPY]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a), %bb.7(0x1999999a)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY1]], %bb.0, %3, %bb.7, %4, %bb.6, %5, %bb.5, %6, %bb.4, %7, %bb.3
+ ; CHECK-NEXT: [[DEC64r:%[0-9]+]]:gr64_nosp = DEC64r [[PHI]], implicit-def dead $eflags
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[DEC64r]], %jump-table.0, $noreg :: (load (s64) from jump-table)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[COPY8]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f5, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64 = COPY [[COPY10]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ bb.0:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %7:gr64 = COPY $rax
+ %0:gr64 = COPY %7
+
+ bb.1:
+ %1:gr64 = PHI %0, %bb.0, %6, %bb.6, %5, %bb.5, %4, %bb.4, %3, %bb.3, %2, %bb.2
+ %8:gr64_nosp = DEC64r %1, implicit-def dead $eflags
+
+ bb.8:
+ successors: %bb.2(0x1999999a), %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a)
+
+ JMP64m $noreg, 8, %8, %jump-table.0, $noreg :: (load (s64) from jump-table)
+
+ bb.2:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %13:gr64 = COPY $rax
+ %2:gr64 = COPY %13
+ JMP_1 %bb.1
+
+ bb.3:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %12:gr64 = COPY $rax
+ %3:gr64 = COPY %12
+ JMP_1 %bb.1
+
+ bb.4:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %11:gr64 = COPY $rax
+ %4:gr64 = COPY %11
+ JMP_1 %bb.1
+
+ bb.5:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %10:gr64 = COPY $rax
+ %5:gr64 = COPY %10
+ JMP_1 %bb.1
+
+ bb.6:
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64pcrel32 target-flags(x86-plt) @f5, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %9:gr64 = COPY $rax
+ %6:gr64 = COPY %9
+ JMP_1 %bb.1
+
+ bb.7:
+
+...
>From 3acf7e75add8e33f894bb18052ea7e9ffef6a8ca Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Fri, 24 Jan 2025 22:08:45 +0800
Subject: [PATCH 2/2] [TailDuplicator] Do not restrict the computed gotos
---
llvm/include/llvm/CodeGen/MachineInstr.h | 13 ++++-
llvm/lib/CodeGen/TailDuplicator.cpp | 12 ++++-
.../CodeGen/X86/tail-dup-computed-goto.mir | 48 +++++++++----------
3 files changed, 45 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 102b1eb07358e4..b26cabe801ee87 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -994,8 +994,17 @@ class MachineInstr
/// Return true if this is an indirect branch, such as a
/// branch through a register.
- bool isIndirectBranch(QueryType Type = AnyInBundle) const {
- return hasProperty(MCID::IndirectBranch, Type);
+ bool isIndirectBranch(QueryType Type = AnyInBundle,
+ bool IncludeJumpTable = true) const {
+ return hasProperty(MCID::IndirectBranch, Type) &&
+ (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
+ return Op.isJTI();
+ }));
+ }
+
+ bool isComputedGoto(QueryType Type = AnyInBundle) const {
+ // Jump tables are not considered computed gotos.
+ return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
}
/// Return true if this is a branch which may fall
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 6c6d38462484a0..21f75458c90f35 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -601,8 +601,11 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// that rearrange the predecessors of the indirect branch.
bool HasIndirectbr = false;
- if (!TailBB.empty())
+ bool HasComputedGoto = false;
+ if (!TailBB.empty()) {
HasIndirectbr = TailBB.back().isIndirectBranch();
+ HasComputedGoto = TailBB.back().isComputedGoto();
+ }
if (HasIndirectbr && PreRegAlloc)
MaxDuplicateCount = TailDupIndirectBranchSize;
@@ -660,7 +663,12 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// Duplicating a BB which has both multiple predecessors and successors will
// may cause huge amount of PHI nodes. If we want to remove this limitation,
// we have to address https://github.com/llvm/llvm-project/issues/78578.
- if (TailBB.pred_size() > TailDupPredSize &&
+ // NB. This basically unfactors computed gotos that were factored early on in
+ // the compilation process to speed up edge based data flow. If we do not
+ // unfactor them again, it can seriously pessimize code with many computed
+ // jumps in the source code, such as interpreters. Therefore we do not
+ // restrict the computed gotos.
+ if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize) {
// If TailBB or any of its successors contains a phi, we may have to add a
// large number of additional phis with additional incoming values.
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
index 437989548b4f66..a472dc67d8d51c 100644
--- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -18,59 +18,59 @@ tracksRegLiveness: true
body: |
; CHECK-LABEL: name: computed_goto
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[COPY]]
- ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY1]], @computed_goto.dispatch, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rax
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY2]]
- ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64_nosp = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64_nosp = COPY [[COPY4]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY4]], @computed_goto.dispatch, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[COPY4]]
- ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64_nosp = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64_nosp = COPY [[COPY7]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY7]], @computed_goto.dispatch, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[COPY6]]
- ; CHECK-NEXT: JMP_1 %bb.5
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64_nosp = COPY [[COPY9]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64_nosp = COPY [[COPY10]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY10]], @computed_goto.dispatch, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY $rax
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[COPY8]]
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64_nosp = PHI [[COPY1]], %bb.0, [[COPY9]], %bb.4, [[COPY7]], %bb.3, [[COPY5]], %bb.2, [[COPY3]], %bb.1
- ; CHECK-NEXT: JMP64m $noreg, 8, [[PHI]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gr64_nosp = COPY [[COPY12]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gr64_nosp = COPY [[COPY13]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY13]], @computed_goto.dispatch, $noreg
bb.0:
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
More information about the llvm-commits
mailing list