[clang] [clang][AArch64] Pass down stack clash protection options to LLVM/Backend (PR #68993)
Momchil Velikov via cfe-commits
cfe-commits at lists.llvm.org
Sat Oct 28 11:29:49 PDT 2023
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/68993
>From ed580b95157d7f423c5384fa2d51af00f1359a10 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 2 Oct 2023 14:46:27 +0100
Subject: [PATCH 1/3] [CFIFixup] Allow function prologues to span more than one
basic block
The CFIFixup pass assumes a function prologue is contained in a single
basic block. This assumption is broken with upcoming support for stack
probing (`-fstack-clash-protection`) in AArch64 - the emitted probing
sequence in a prologue may contain loops, i.e. more than one basic
block. The generated CFG is not arbitrary though:
* CFI instructions are outside of any loops
* for any two CFI instructions of the function prologue one dominates
and is post-dominated by the other
Thus, for the prologue CFI instructions, if one is
executed then all are executed, there is a total order of
executions, and the last instruction in that order can be considered
the end of the prologoue for the purpose of inserting the initial
`.cfi_remember_state` directive.
That last instruction is found by finding the first block in the
post-order traversal which contains prologue CFI instructions.
---
llvm/lib/CodeGen/CFIFixup.cpp | 62 ++--
.../cfi-fixup-multi-block-prologue.mir | 308 ++++++++++++++++++
2 files changed, 347 insertions(+), 23 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 837dbd77d07361a..964a8d56511fa1b 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -10,20 +10,25 @@
// This pass inserts the necessary instructions to adjust for the inconsistency
// of the call-frame information caused by final machine basic block layout.
// The pass relies in constraints LLVM imposes on the placement of
-// save/restore points (cf. ShrinkWrap):
-// * there is a single basic block, containing the function prologue
+// save/restore points (cf. ShrinkWrap) and has certain preconditions about
+// placement of CFI instructions:
+// * for any two CFI instructions of the function prologue one dominates
+// and is post-dominated by the other
// * possibly multiple epilogue blocks, where each epilogue block is
// complete and self-contained, i.e. CSR restore instructions (and the
// corresponding CFI instructions are not split across two or more blocks.
-// * prologue and epilogue blocks are outside of any loops
-// Thus, during execution, at the beginning and at the end of each basic block
-// the function can be in one of two states:
+// * CFI instructions are not contained in any loops
+// Thus, during execution, at the beginning and at the end of each basic block,
+// following the prologue, the function can be in one of two states:
// - "has a call frame", if the function has executed the prologue, and
// has not executed any epilogue
// - "does not have a call frame", if the function has not executed the
// prologue, or has executed an epilogue
// which can be computed by a single RPO traversal.
+// The location of the prologue is determined by finding the first block in the
+// post-order traversal which contains CFI instructions.
+
// In order to accommodate backends which do not generate unwind info in
// epilogues we compute an additional property "strong no call frame on entry",
// which is set for the entry point of the function and for every block
@@ -85,10 +90,6 @@ static bool isPrologueCFIInstruction(const MachineInstr &MI) {
MI.getFlag(MachineInstr::FrameSetup);
}
-static bool containsPrologue(const MachineBasicBlock &MBB) {
- return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
-}
-
static bool containsEpilogue(const MachineBasicBlock &MBB) {
return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
@@ -96,6 +97,25 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
});
}
+static MachineBasicBlock *
+findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
+ MachineBasicBlock *PrologueBlock = nullptr;
+ for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
+ ++It) {
+ MachineBasicBlock *MBB = *It;
+ llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) {
+ if (isPrologueCFIInstruction(MI)) {
+ PrologueBlock = MBB;
+ PrologueEnd = std::next(MI.getIterator());
+ }
+ });
+ if (PrologueBlock)
+ return PrologueBlock;
+ }
+
+ return nullptr;
+}
+
bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
if (!TFL.enableCFIFixup(MF))
@@ -105,6 +125,14 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
if (NumBlocks < 2)
return false;
+ // Find the prologue and the point where we can issue the first
+ // `.cfi_remember_state`.
+
+ MachineBasicBlock::iterator PrologueEnd;
+ MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd);
+ if (PrologueBlock == nullptr)
+ return false;
+
struct BlockFlags {
bool Reachable : 1;
bool StrongNoFrameOnEntry : 1;
@@ -116,21 +144,15 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
BlockInfo[0].StrongNoFrameOnEntry = true;
// Compute the presence/absence of frame at each basic block.
- MachineBasicBlock *PrologueBlock = nullptr;
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
for (MachineBasicBlock *MBB : RPOT) {
BlockFlags &Info = BlockInfo[MBB->getNumber()];
// Set to true if the current block contains the prologue or the epilogue,
// respectively.
- bool HasPrologue = false;
+ bool HasPrologue = MBB == PrologueBlock;
bool HasEpilogue = false;
- if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
- PrologueBlock = MBB;
- HasPrologue = true;
- }
-
if (Info.HasFrameOnEntry || HasPrologue)
HasEpilogue = containsEpilogue(*MBB);
@@ -149,9 +171,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (!PrologueBlock)
- return false;
-
// Walk the blocks of the function in "physical" order.
// Every block inherits the frame state (as recorded in the unwind tables)
// of the previous block. If the intended frame state is different, insert
@@ -162,10 +181,7 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
// insert a `.cfi_remember_state`, in the case that the current block needs a
// `.cfi_restore_state`.
MachineBasicBlock *InsertMBB = PrologueBlock;
- MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
- for (MachineInstr &MI : *PrologueBlock)
- if (isPrologueCFIInstruction(MI))
- InsertPt = std::next(MI.getIterator());
+ MachineBasicBlock::iterator InsertPt = PrologueEnd;
assert(InsertPt != PrologueBlock->begin() &&
"Inconsistent notion of \"prologue block\"");
diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
new file mode 100644
index 000000000000000..31fa3832367becc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
@@ -0,0 +1,308 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s
+--- |
+ source_filename = "cfi-fixup.ll"
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-linux"
+
+ define i32 @f(i32 %x) #0 {
+ entry:
+ %p = alloca i8, i32 30000, align 1
+ switch i32 %x, label %if.end7 [
+ i32 0, label %return
+ i32 1, label %if.then2
+ i32 2, label %if.then5
+ ]
+
+ if.then2: ; preds = %entry
+ %call = tail call i32 @g1(i32 1)
+ %add = add nsw i32 %call, 1
+ br label %return
+
+ if.then5: ; preds = %entry
+ %call6 = tail call i32 @g0(i32 2)
+ %sub = sub nsw i32 1, %call6
+ br label %return
+
+ if.end7: ; preds = %entry
+ br label %return
+
+ return: ; preds = %if.end7, %if.then5, %if.then2, %entry
+ %retval.0 = phi i32 [ %add, %if.then2 ], [ %sub, %if.then5 ], [ 0, %if.end7 ], [ 1, %entry ]
+ ret i32 %retval.0
+ }
+
+ declare i32 @g1(i32)
+
+ declare i32 @g0(i32)
+
+ attributes #0 = { uwtable "probe-stack"="inline-asm" }
+
+...
+---
+name: f
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: true
+registers: []
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 30016
+ offsetAdjustment: 0
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 30000
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: p, type: default, offset: -30016, size: 30000, alignment: 1,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -30000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ ; CHECK-LABEL: name: f
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $lr, $fp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 7, 12
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.entry:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $x9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12
+ ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: frame-setup Bcc 1, %bb.1, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.entry:
+ ; CHECK-NEXT: successors: %bb.6(0x20000000), %bb.3(0x60000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: CBZW renamable $w0, %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.entry:
+ ; CHECK-NEXT: successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.entry:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.8, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.if.then2:
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: CFI_INSTRUCTION restore_state
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
+ ; CHECK-NEXT: renamable $w0 = MOVZWi 1, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7.if.then5:
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CFI_INSTRUCTION restore_state
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
+ ; CHECK-NEXT: BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: renamable $w8 = MOVZWi 1, 0
+ ; CHECK-NEXT: $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8.if.end7:
+ ; CHECK-NEXT: CFI_INSTRUCTION restore_state
+ ; CHECK-NEXT: $w0 = ORRWrs $wzr, $wzr, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $w0, $lr, $fp
+
+ early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+ frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ $x9 = frame-setup SUBXri $sp, 7, 12
+ frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
+
+ bb.1.entry:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ liveins: $x9
+
+ $sp = frame-setup SUBXri $sp, 1, 12
+ $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+ frame-setup STRXui $xzr, $sp, 0
+ frame-setup Bcc 1, %bb.1, implicit killed $nzcv
+
+ bb.2.entry:
+ successors: %bb.6(0x20000000), %bb.3(0x60000000)
+ liveins: $w0
+
+ frame-setup CFI_INSTRUCTION def_cfa_register $wsp
+ $sp = frame-setup SUBXri $sp, 1328, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+ frame-setup STRXui $xzr, $sp, 0
+ CBZW renamable $w0, %bb.6
+
+ bb.3.entry:
+ successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555)
+ liveins: $w0
+
+ dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ Bcc 0, %bb.7, implicit killed $nzcv
+
+ bb.4.entry:
+ successors: %bb.5(0x40000000), %bb.8(0x40000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 1, %bb.8, implicit killed $nzcv
+
+ bb.5.if.then2:
+ liveins: $w0
+
+ BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+ bb.6:
+ renamable $w0 = MOVZWi 1, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+ bb.7.if.then5:
+ liveins: $w0
+
+ BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ renamable $w8 = MOVZWi 1, 0
+ $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+ bb.8.if.end7:
+ $w0 = ORRWrs $wzr, $wzr, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+...
>From c0c2083d623f56c3929c0101d8b1f1a85d0c888a Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Sat, 28 Oct 2023 13:33:28 +0100
Subject: [PATCH 2/3] Reverse iteration within a block when looking for
prologue CFI insns
---
llvm/lib/CodeGen/CFIFixup.cpp | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 964a8d56511fa1b..40a2a3a142e1758 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -99,20 +99,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
static MachineBasicBlock *
findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
- MachineBasicBlock *PrologueBlock = nullptr;
for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
++It) {
MachineBasicBlock *MBB = *It;
- llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) {
- if (isPrologueCFIInstruction(MI)) {
- PrologueBlock = MBB;
- PrologueEnd = std::next(MI.getIterator());
- }
- });
- if (PrologueBlock)
- return PrologueBlock;
+ for (MachineInstr &MI : reverse(MBB->instrs())) {
+ if (!isPrologueCFIInstruction(MI))
+ continue;
+ PrologueEnd = std::next(MI.getIterator());
+ return MBB;
+ }
}
-
return nullptr;
}
>From 2440b603aa8307364a4897e6782a60c2738c5912 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Wed, 11 Oct 2023 17:22:51 +0100
Subject: [PATCH 3/3] [clang][AArch64] Pass down stack clash protection options
to LLVM/Backend
---
clang/lib/CodeGen/CodeGenModule.cpp | 12 +++++++++++-
clang/lib/Driver/ToolChains/Clang.cpp | 2 +-
clang/test/CodeGen/stack-clash-protection.c | 16 ++++++++++++----
3 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index b1a6683a66bd052..517021cb3c89bf1 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1076,6 +1076,16 @@ void CodeGenModule::Release() {
"sign-return-address-with-bkey", 1);
}
+ if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
+ auto *InlineAsm = llvm::MDString::get(TheModule.getContext(), "inline-asm");
+ if (CodeGenOpts.StackClashProtector)
+ getModule().addModuleFlag(llvm::Module::Override, "probe-stack",
+ InlineAsm);
+ if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096)
+ getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size",
+ CodeGenOpts.StackProbeSize);
+ }
+
if (!CodeGenOpts.MemoryProfileOutput.empty()) {
llvm::LLVMContext &Ctx = TheModule.getContext();
getModule().addModuleFlag(
@@ -2287,7 +2297,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if ((!D || !D->hasAttr<NoUwtableAttr>()) && CodeGenOpts.UnwindTables)
B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables));
- if (CodeGenOpts.StackClashProtector)
+ if (CodeGenOpts.StackClashProtector && !getTarget().getTriple().isAArch64())
B.addAttribute("probe-stack", "inline-asm");
if (!hasUnwindExceptions(LangOpts))
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 43a92adbef64ba8..83a6e679fa19a6d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3507,7 +3507,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
- !EffectiveTriple.isPPC64())
+ !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
return;
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c
index 67571f5cdb2c14c..2f502ef453d42f4 100644
--- a/clang/test/CodeGen/stack-clash-protection.c
+++ b/clang/test/CodeGen/stack-clash-protection.c
@@ -1,10 +1,12 @@
// Check the correct function attributes are generated
-// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
-// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s --check-prefixes CHECK-AARCH64
// CHECK: define{{.*}} void @large_stack() #[[A:.*]] {
+// CHECK-AARCH64: define{{.*}} void @large_stack() #[[A:.*]] {
void large_stack(void) {
volatile int stack[20000], i;
for (i = 0; i < sizeof(stack) / sizeof(int); ++i)
@@ -12,14 +14,20 @@ void large_stack(void) {
}
// CHECK: define{{.*}} void @vla({{.*}}) #[[A:.*]] {
+// CHECK-AARCH64: define{{.*}} void @vla({{.*}}) #[[A:.*]] {
void vla(int n) {
volatile int vla[n];
__builtin_memset(&vla[0], 0, 1);
}
// CHECK: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] {
+// CHECK-AARCH64: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] {
void builtin_alloca(int n) {
volatile void *mem = __builtin_alloca(n);
}
// CHECK: attributes #[[A]] = {{.*}} "probe-stack"="inline-asm"
+// CHECK-AARCH64-NOT: attributes #[[A]] = {{.*}} "probe-stack"
+
+// CHECK-AARCH64: !{i32 4, !"probe-stack", !"inline-asm"}
+// CHECK-AARCH64: !{i32 8, !"stack-probe-size", i32 8192}
More information about the cfe-commits
mailing list