[llvm] [MachineOutliner] Preserve regmasks in calls to outlined functions (PR #120940)
Zhaoxuan Jiang via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 03:11:47 PST 2025
https://github.com/nocchijiang updated https://github.com/llvm/llvm-project/pull/120940
>From 5e5937cacaa6b6321143a83ad0358f61f8ab72fc Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Mon, 23 Dec 2024 15:57:06 +0800
Subject: [PATCH 1/4] [MachineOutliner] Preserve regmasks in calls to outlined
functions
When emitting calls to an outlined function, the register masks from the
outlined sequence are lost. The AArch64CollectLOH pass, which I plan to
move to PreEmitPass2 (positioned after MachineOutliner), relies on
accurate register masks. This patch ensures that regmasks are correctly
preserved in the outlined calls, maintaining the required accuracy for
subsequent passes.
---
llvm/lib/CodeGen/MachineOutliner.cpp | 25 +++++
.../AArch64/machine-outliner-regmask.mir | 94 +++++++++++++++++++
2 files changed, 119 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index e3fe9494acef8..5ff79b64d63ba 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -1117,6 +1117,7 @@ bool MachineOutliner::outline(
// instruction. It also updates call site information for moved
// code.
SmallSet<Register, 2> UseRegs, DefRegs;
+ SmallPtrSet<const uint32_t *, 2> RegMasks;
// Copy over the defs in the outlined range.
// First inst in outlined range <-- Anything that's defined in this
// ... .. range has to be added as an
@@ -1130,6 +1131,12 @@ bool MachineOutliner::outline(
MachineInstr *MI = &*Iter;
SmallSet<Register, 2> InstrUseRegs;
for (MachineOperand &MOP : MI->operands()) {
+ // Collect all regmasks. Merge them in the end.
+ if (MOP.isRegMask()) {
+ RegMasks.insert(MOP.getRegMask());
+ continue;
+ }
+
// Skip over anything that isn't a register.
if (!MOP.isReg())
continue;
@@ -1153,6 +1160,24 @@ bool MachineOutliner::outline(
MI->getMF()->eraseAdditionalCallInfo(MI);
}
+ if (!RegMasks.empty()) {
+ if (RegMasks.size() == 1) {
+ CallInst->addOperand(
+ MachineOperand::CreateRegMask(*RegMasks.begin()));
+ } else {
+ uint32_t *RegMask = MF->allocateRegMask();
+ unsigned NumRegs =
+ MF->getSubtarget().getRegisterInfo()->getNumRegs();
+ unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
+ memset(RegMask, UINT32_MAX, Size * sizeof(RegMask[0]));
+ for (const uint32_t *Mask : RegMasks) {
+ for (unsigned I = 0; I < Size; ++I)
+ RegMask[I] &= Mask[I];
+ }
+ CallInst->addOperand(MachineOperand::CreateRegMask(RegMask));
+ }
+ }
+
for (const Register &I : DefRegs)
// If it's a def, add it to the call instruction.
CallInst->addOperand(
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir b/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
new file mode 100644
index 0000000000000..047a73f81dd2a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
@@ -0,0 +1,94 @@
+# RUN: llc -mtriple=aarch64-apple-ios -run-pass=prologepilog -run-pass=machine-outliner %s -o - | FileCheck %s
+--- |
+ declare swiftcc void @bar()
+ declare void @baz(i32, i32, i32) #0
+
+ define void @test_same_regmask() #0 {
+ ret void
+ }
+ define void @test_different_regmasks() #0 {
+ ret void
+ }
+ define void @foo(i32, i32, i32, i32, i32, i32, i32, i32) #0 {
+ ret void
+ }
+
+...
+---
+name: foo
+tracksRegLiveness: true
+body: |
+ bb.0:
+ RET undef $lr
+
+
+...
+---
+name: test_same_regmask
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_same_regmask
+ ; CHECK: bb.1:
+ ; CHECK-NEXT: BL @OUTLINED_FUNCTION_1, csr_aarch64_aapcs
+
+ bb.0:
+ $sp = frame-setup SUBXri $sp, 16, 0
+
+ bb.1:
+ $w0 = MOVZWi 1, 0
+ $w1 = MOVZWi 2, 0
+ $w2 = MOVZWi 3, 0
+ BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp
+ $w0 = MOVZWi 1, 0
+ $w1 = MOVZWi 2, 0
+ $w2 = MOVZWi 3, 0
+ BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp
+ $sp = ADDXri $sp, 16, 0
+ RET undef $lr
+
+
+...
+---
+name: test_different_regmasks
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_different_regmasks
+ ; CHECK: bb.1:
+ ; CHECK-NEXT: BL @OUTLINED_FUNCTION_0, CustomRegMask($fp,$lr,$wzr,$wzr_hi,$xzr,$b8,$b9,$b10,$b11,$b12,$b13,$b14,$b15,$d8,$d9,$d10,$d11,$d12,$d13,$d14,$d15,$h8,$h9,$h10,$h11,$h12,$h13,$h14,$h15,$s8,$s9,$s10,$s11,$s12,$s13,$s14,$s15,$w19,$w20,$w22,$w23,$w24,$w25,$w26,$w27,$w28,$w29,$w30,$x19,$x20,$x22,$x23,$x24,$x25,$x26,$x27,$x28,$b8_hi,$b9_hi,$b10_hi,$b11_hi,$b12_hi,$b13_hi,$b14_hi,$b15_hi,$h8_hi,$h9_hi,$h10_hi,$h11_hi,$h12_hi,$h13_hi,$h14_hi,$h15_hi,$s8_hi,$s9_hi,$s10_hi,$s11_hi,$s12_hi,$s13_hi,$s14_hi,$s15_hi,$w19_hi,$w20_hi,$w22_hi,$w23_hi,$w24_hi,$w25_hi,$w26_hi,$w27_hi,$w28_hi,$w29_hi,$w30_hi,$d8_d9,$d9_d10,$d10_d11,$d11_d12,$d12_d13,$d13_d14,$d14_d15,$d8_d9_d10_d11,$d9_d10_d11_d12,$d10_d11_d12_d13,$d11_d12_d13_d14,$d12_d13_d14_d15,$d8_d9_d10,$d9_d10_d11,$d10_d11_d12,$d11_d12_d13,$d12_d13_d14,$d13_d14_d15,$x22_x23_x24_x25_x26_x27_x28_fp,$w22_w23,$w24_w25,$w26_w27,$w28_w29,$x28_fp,$x22_x23,$x24_x25,$x26_x27)
+
+ bb.0:
+ $sp = frame-setup SUBXri $sp, 16, 0
+
+ bb.1:
+ $w0 = MOVZWi 1, 0
+ $w1 = MOVZWi 2, 0
+ $w2 = MOVZWi 3, 0
+ $w3 = MOVZWi 4, 0
+ $w4 = MOVZWi 5, 0
+ $w5 = MOVZWi 6, 0
+ $w6 = MOVZWi 7, 0
+ $w7 = MOVZWi 8, 0
+ BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
+ BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ $w0 = MOVZWi 1, 0
+ $w1 = MOVZWi 2, 0
+ $w2 = MOVZWi 3, 0
+ $w3 = MOVZWi 4, 0
+ $w4 = MOVZWi 5, 0
+ $w5 = MOVZWi 6, 0
+ $w6 = MOVZWi 7, 0
+ $w7 = MOVZWi 8, 0
+ BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
+ BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ $w0 = MOVZWi 1, 0
+ $w1 = MOVZWi 2, 0
+ $w2 = MOVZWi 3, 0
+ $w3 = MOVZWi 4, 0
+ $w4 = MOVZWi 5, 0
+ $w5 = MOVZWi 6, 0
+ $w6 = MOVZWi 7, 0
+ $w7 = MOVZWi 8, 0
+ BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
+ BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ $sp = ADDXri $sp, 16, 0
+ RET undef $lr
>From 18723beab1bebf851143d33767d87c0fc1c6d5cd Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Wed, 25 Dec 2024 08:53:50 +0800
Subject: [PATCH 2/4] add missing attribute
---
llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir b/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
index 047a73f81dd2a..eb996794e88de 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-regmask.mir
@@ -13,6 +13,7 @@
ret void
}
+ attributes #0 = { minsize }
...
---
name: foo
>From e837f7e0928e0205eed5a19b7ce20f0b36ba48b0 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Fri, 27 Dec 2024 13:20:30 +0800
Subject: [PATCH 3/4] remove redundant braces
Co-authored-by: Ellis Hoag <ellis.sparky.hoag at gmail.com>
---
llvm/lib/CodeGen/MachineOutliner.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 5ff79b64d63ba..c57977e11612f 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -1170,10 +1170,9 @@ bool MachineOutliner::outline(
MF->getSubtarget().getRegisterInfo()->getNumRegs();
unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
memset(RegMask, UINT32_MAX, Size * sizeof(RegMask[0]));
- for (const uint32_t *Mask : RegMasks) {
+ for (const uint32_t *Mask : RegMasks)
for (unsigned I = 0; I < Size; ++I)
RegMask[I] &= Mask[I];
- }
CallInst->addOperand(MachineOperand::CreateRegMask(RegMask));
}
}
>From 5b5032b0a5284635a184317ebc64d1b295328a24 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Wed, 19 Feb 2025 19:09:26 +0800
Subject: [PATCH 4/4] encapsulate regmask creation
---
llvm/include/llvm/CodeGen/MachineFunction.h | 2 ++
llvm/lib/CodeGen/MachineFunction.cpp | 8 ++++++++
llvm/lib/CodeGen/MachineOutliner.cpp | 13 ++++++-------
3 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index f1e595cde54e3..1dbd2edf96028 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -1148,6 +1148,8 @@ class LLVM_ABI MachineFunction {
/// Allocate and initialize a register mask with @p NumRegister bits.
uint32_t *allocateRegMask();
+ MutableArrayRef<uint32_t> allocateRegMaskArray();
+
ArrayRef<int> allocateShuffleMask(ArrayRef<int> Mask);
/// Allocate and construct an extra info structure for a `MachineInstr`.
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 6e0342a763d15..0b4453dacf703 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -630,6 +630,14 @@ uint32_t *MachineFunction::allocateRegMask() {
return Mask;
}
+MutableArrayRef<uint32_t> MachineFunction::allocateRegMaskArray() {
+ unsigned NumRegs = getSubtarget().getRegisterInfo()->getNumRegs();
+ unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
+ uint32_t *Mask = Allocator.Allocate<uint32_t>(Size);
+ memset(Mask, 0, Size * sizeof(Mask[0]));
+ return {Mask, Size};
+}
+
ArrayRef<int> MachineFunction::allocateShuffleMask(ArrayRef<int> Mask) {
int* AllocMask = Allocator.Allocate<int>(Mask.size());
copy(Mask, AllocMask);
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index c57977e11612f..ccaa34c3b84fe 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -78,6 +78,7 @@
#include "llvm/Support/SuffixTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cstdint>
#include <tuple>
#include <vector>
@@ -1165,15 +1166,13 @@ bool MachineOutliner::outline(
CallInst->addOperand(
MachineOperand::CreateRegMask(*RegMasks.begin()));
} else {
- uint32_t *RegMask = MF->allocateRegMask();
- unsigned NumRegs =
- MF->getSubtarget().getRegisterInfo()->getNumRegs();
- unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
- memset(RegMask, UINT32_MAX, Size * sizeof(RegMask[0]));
+ auto RegMask = MF->allocateRegMaskArray();
+ for (unsigned I = 0; I < RegMask.size(); ++I)
+ RegMask[I] = UINT32_MAX;
for (const uint32_t *Mask : RegMasks)
- for (unsigned I = 0; I < Size; ++I)
+ for (unsigned I = 0; I < RegMask.size(); ++I)
RegMask[I] &= Mask[I];
- CallInst->addOperand(MachineOperand::CreateRegMask(RegMask));
+ CallInst->addOperand(MachineOperand::CreateRegMask(RegMask.data()));
}
}
More information about the llvm-commits
mailing list