[clang] [llvm] [clang][AArch64] Pass down stack clash protection options to LLVM/Backend (PR #68993)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 11 07:58:36 PST 2023
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/68993
>From bfd551c181b8325382247eab80544e69212121aa Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Sat, 11 Nov 2023 11:41:48 +0000
Subject: [PATCH 1/6] [AArch64] Refactor allocation of locals and stack
realignment
Factor out some stack allocaton in a separate function. This patch
splits out the generatic portion of a larger refactoring done as
a part of stack clash protection support.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 114 +++++++++---------
.../lib/Target/AArch64/AArch64FrameLowering.h | 5 +
.../AArch64/framelayout-sve-basepointer.mir | 4 +-
.../framelayout-sve-fixed-width-access.mir | 2 +-
.../framelayout-sve-scavengingslot.mir | 4 +-
llvm/test/CodeGen/AArch64/framelayout-sve.mir | 55 ++++-----
...nging-call-disable-stackslot-scavenging.ll | 2 +-
.../AArch64/spill-stack-realignment.mir | 2 +-
llvm/test/CodeGen/AArch64/stack-guard-sve.ll | 4 +-
.../AArch64/sve-calling-convention-mixed.ll | 4 +-
.../CodeGen/AArch64/sve-fixed-length-fp128.ll | 4 +-
11 files changed, 105 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 18e3aa2b0ecec86..5f617e3a176a16e 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
emitCalleeSavedRestores(MBB, MBBI, true);
}
+void AArch64FrameLowering::allocateStackSpace(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
+
+ if (!AllocSize)
+ return;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ Register TargetReg =
+ NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
+ // SUB Xd/SP, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+
+ if (NeedsRealignment) {
+ const int64_t MaxAlign = MFI.getMaxAlign().value();
+ const uint64_t AndMask = ~(MaxAlign - 1);
+ // AND SP, Xd, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI.setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+}
+
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
@@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
@@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
- AllocateAfter = SVEStackSize - AllocateBefore;
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
}
// Allocate space for the callee saves (if any).
- emitFrameOffset(
- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
- MachineInstr::FrameSetup, false, false, nullptr,
- EmitAsyncCFI && !HasFP && AllocateBefore,
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+ allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
+ nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
- // Finally allocate remaining SVE stack space.
- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
- AllocateBefore + StackOffset::getFixed(
- (int64_t)MFI.getStackSize() - NumBytes));
-
- // Allocate space for the rest of the frame.
- if (NumBytes) {
- unsigned scratchSPReg = AArch64::SP;
-
- if (NeedsRealignment) {
- scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
- assert(scratchSPReg != AArch64::NoRegister);
- }
-
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- emitFrameOffset(
- MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
- }
- if (NeedsRealignment) {
- assert(MFI.getMaxAlign() > Align(1));
- assert(scratchSPReg != AArch64::SP);
-
- // SUB X9, SP, NumBytes
- // -- X9 is temporary register, so shouldn't contain any live data here,
- // -- free to use. This is already produced by emitFrameOffset above.
- // AND SP, X9, 0b11111...0000
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(scratchSPReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
- AFI->setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset);
}
// If we need a base pointer, set it up here. It's whatever the value of the
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 147b5c181be5e53..f3313f3b53fffe0 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -150,6 +150,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
+ void allocateStackSpace(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool NeedsRealignment, StackOffset AllocSize,
+ bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+ StackOffset InitialOffset) const;
/// Emit target zero call-used regs.
void emitZeroCallUsedRegs(BitVector RegsToZero,
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
index 623c0f240be4fd7..8d39b881395cdf6 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
@@ -4,8 +4,8 @@
name: hasBasepointer
# CHECK-LABEL: name: hasBasepointer
# CHECK: bb.0:
-# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
# CHECK: STRXui $x0, $x19, 0
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
index e367a380f8ba9f0..35fd7ca77d5cf3e 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
@@ -7,9 +7,9 @@
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
+ ; CHECK-NEXT: sub sp, sp, #2064
; CHECK-NEXT: addvl sp, sp, #-32
; CHECK-NEXT: addvl sp, sp, #-28
- ; CHECK-NEXT: sub sp, sp, #2064
; CHECK-NEXT: ldr x8, [sp, #2048]
; CHECK-NEXT: addvl sp, sp, #31
; CHECK-NEXT: addvl sp, sp, #29
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
index d54f67634d02a7b..680f9c335c250c5 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
@@ -4,9 +4,9 @@
name: LateScavengingSlot
# CHECK-LABEL: name: LateScavengingSlot
# CHECK: bb.0:
-# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12
+# CHECK: $sp = frame-setup SUBXri $sp, 8, 12
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
# CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1
# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 7c87587c6dc4e2c..213d7919e4a7270 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -60,10 +60,10 @@
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
@@ -77,7 +77,7 @@
# ASM-LABEL: test_allocate_sve:
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+# ASM: .cfi_def_cfa_offset 32
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
# ASM: .cfi_def_cfa wsp, 32
# ASM: .cfi_def_cfa_offset 16
@@ -87,7 +87,7 @@
#
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
@@ -125,10 +125,11 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w21, -16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+
#
# CHECK-NEXT: $x20 = IMPLICIT_DEF
# CHECK-NEXT: $x21 = IMPLICIT_DEF
@@ -149,7 +150,7 @@ body: |
# ASM: .cfi_offset w20, -8
# ASM-NEXT: .cfi_offset w21, -16
# ASM-NEXT: .cfi_offset w29, -32
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
+# ASM: .cfi_def_cfa_offset 48
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
#
# ASM: .cfi_def_cfa wsp, 48
@@ -164,7 +165,7 @@ body: |
# UNWINDINFO: DW_CFA_offset: reg20 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_offset: +48
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +48
@@ -205,9 +206,9 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
@@ -267,9 +268,9 @@ body: |
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
@@ -292,7 +293,7 @@ body: |
# ASM-LABEL: test_address_sve:
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+# ASM: .cfi_def_cfa_offset 32
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
#
# ASM: .cfi_def_cfa wsp, 32
@@ -302,7 +303,7 @@ body: |
#
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
@@ -353,8 +354,8 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
# CHECK-NEXT: STR_ZXI $z0, $fp, -1
# CHECK-NEXT: STR_ZXI $z1, $fp, -2
@@ -429,9 +430,9 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
@@ -448,7 +449,7 @@ body: |
# ASM-LABEL: test_stack_arg_sve:
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+# ASM: .cfi_def_cfa_offset 32
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
#
# ASM: .cfi_def_cfa wsp, 32
@@ -458,7 +459,7 @@ body: |
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
@@ -640,8 +641,8 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
# CHECK-NEXT: STRXui $xzr, $x19, 0
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
@@ -863,9 +864,9 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
-# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
@@ -916,7 +917,7 @@ body: |
# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
-# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
+# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 144 * VG
# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG
#
# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
@@ -950,7 +951,7 @@ body: |
# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +144, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
@@ -1031,9 +1032,9 @@ body: |
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -1
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18
# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
index e4cd4d6c05c5ee3..45ca7844b065513 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
@@ -20,8 +20,8 @@ define void @test_no_stackslot_scavenging(float %f) #0 {
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
diff --git a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
index 1b9411d07f433ab..f6fc627ac2d3d87 100644
--- a/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
+++ b/llvm/test/CodeGen/AArch64/spill-stack-realignment.mir
@@ -21,7 +21,7 @@ stack:
- { id: 1, size: 4, alignment: 4, local-offset: -68 }
# CHECK: body:
-# CHECK: $sp = ANDXri killed ${{x[0-9]+}}, 7865
+# CHECK: $sp = frame-setup ANDXri killed ${{x[0-9]+}}, 7865
# CHECK: STRSui $s0, $sp, 0
# CHECK: STRSui $s0, $fp, 7
body: |
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
index 1672a7eb8739779..5acbb22bf1ab5a4 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
@@ -148,9 +148,9 @@ entry:
; CHECK-LABEL: local_stack_alloc:
; CHECK: mov x29, sp
-; CHECK: addvl sp, sp, #-2
; CHECK: sub sp, sp, #16, lsl #12
; CHECK: sub sp, sp, #16
+; CHECK: addvl sp, sp, #-2
; Stack guard is placed below the SVE stack area (and above all fixed-width objects)
; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12
@@ -198,9 +198,9 @@ entry:
; CHECK-LABEL: local_stack_alloc_strong:
; CHECK: mov x29, sp
-; CHECK: addvl sp, sp, #-3
; CHECK: sub sp, sp, #16, lsl #12
; CHECK: sub sp, sp, #16
+; CHECK: addvl sp, sp, #-3
; Stack guard is placed at the top of the SVE stack area
; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
index b7505625cde9773..6738bddb8af3442 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -56,8 +56,8 @@ define float @foo2(ptr %x0, ptr %x1) nounwind {
; CHECK-LABEL: foo2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: add x8, sp, #16
@@ -699,8 +699,8 @@ define void @verify_all_operands_are_initialised() {
; CHECK-LABEL: verify_all_operands_are_initialised:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index da7e772461e28bc..9d9d4a64a5d1f58 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -9,8 +9,8 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: add x8, sp, #48
; CHECK-NEXT: mov x19, x1
@@ -59,8 +59,8 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: sub sp, sp, #128
+; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ldr q1, [x0, #64]
; CHECK-NEXT: ldr q0, [x0, #80]
; CHECK-NEXT: mov x19, x1
>From 93c4e5f7abceee7c4edd2eaf232a3184833c23de Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Sat, 11 Nov 2023 15:27:53 +0000
Subject: [PATCH 2/6] [CFIFixup] Precommit test ahead of multi-block prologues
support
---
.../cfi-fixup-multi-block-prologue.mir | 307 ++++++++++++++++++
1 file changed, 307 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
new file mode 100644
index 000000000000000..ddd9a9eaef55efb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
@@ -0,0 +1,307 @@
+# RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s
+--- |
+ source_filename = "cfi-fixup.ll"
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-linux"
+
+ define i32 @f(i32 %x) #0 {
+ entry:
+ %p = alloca i8, i32 30000, align 1
+ switch i32 %x, label %if.end7 [
+ i32 0, label %return
+ i32 1, label %if.then2
+ i32 2, label %if.then5
+ ]
+
+ if.then2: ; preds = %entry
+ %call = tail call i32 @g1(i32 1)
+ %add = add nsw i32 %call, 1
+ br label %return
+
+ if.then5: ; preds = %entry
+ %call6 = tail call i32 @g0(i32 2)
+ %sub = sub nsw i32 1, %call6
+ br label %return
+
+ if.end7: ; preds = %entry
+ br label %return
+
+ return: ; preds = %if.end7, %if.then5, %if.then2, %entry
+ %retval.0 = phi i32 [ %add, %if.then2 ], [ %sub, %if.then5 ], [ 0, %if.end7 ], [ 1, %entry ]
+ ret i32 %retval.0
+ }
+
+ declare i32 @g1(i32)
+
+ declare i32 @g0(i32)
+
+ attributes #0 = { uwtable "probe-stack"="inline-asm" }
+
+...
+---
+name: f
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: true
+registers: []
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 30016
+ offsetAdjustment: 0
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 30000
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: p, type: default, offset: -30016, size: 30000, alignment: 1,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -30000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ ; CHECK-LABEL: name: f
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $lr, $fp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 7, 12
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
+
+ ; CHECK: bb.1.entry:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $x9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12
+ ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: frame-setup Bcc 1, %bb.1, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.entry:
+ ; CHECK-NEXT: successors: %bb.6(0x20000000), %bb.3(0x60000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: CBZW renamable $w0, %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.entry:
+ ; CHECK-NEXT: successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.entry:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 1, %bb.8, implicit killed $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.if.then2:
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: CFI_INSTRUCTION restore_state
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
+ ; CHECK-NEXT: renamable $w0 = MOVZWi 1, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7.if.then5:
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CFI_INSTRUCTION restore_state
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
+ ; CHECK-NEXT: BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: renamable $w8 = MOVZWi 1, 0
+ ; CHECK-NEXT: $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8.if.end7:
+ ; CHECK-NEXT: CFI_INSTRUCTION restore_state
+ ; CHECK-NEXT: $w0 = ORRWrs $wzr, $wzr, 0
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 7, 12
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1328, 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+ ; CHECK-NEXT: RET undef $lr, implicit killed $w0
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $w0, $lr, $fp
+
+ early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+ frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ $x9 = frame-setup SUBXri $sp, 7, 12
+ frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
+
+ bb.1.entry:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ liveins: $x9
+
+ $sp = frame-setup SUBXri $sp, 1, 12
+ $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+ frame-setup STRXui $xzr, $sp, 0
+ frame-setup Bcc 1, %bb.1, implicit killed $nzcv
+
+ bb.2.entry:
+ successors: %bb.6(0x20000000), %bb.3(0x60000000)
+ liveins: $w0
+
+ frame-setup CFI_INSTRUCTION def_cfa_register $wsp
+ $sp = frame-setup SUBXri $sp, 1328, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+ frame-setup STRXui $xzr, $sp, 0
+ CBZW renamable $w0, %bb.6
+
+ bb.3.entry:
+ successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555)
+ liveins: $w0
+
+ dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+ Bcc 0, %bb.7, implicit killed $nzcv
+
+ bb.4.entry:
+ successors: %bb.5(0x40000000), %bb.8(0x40000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 1, %bb.8, implicit killed $nzcv
+
+ bb.5.if.then2:
+ liveins: $w0
+
+ BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+ bb.6:
+ renamable $w0 = MOVZWi 1, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+ bb.7.if.then5:
+ liveins: $w0
+
+ BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+ renamable $w8 = MOVZWi 1, 0
+ $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+ bb.8.if.end7:
+ $w0 = ORRWrs $wzr, $wzr, 0
+ $sp = frame-destroy ADDXri $sp, 7, 12
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+ $sp = frame-destroy ADDXri $sp, 1328, 0
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+ early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ frame-destroy CFI_INSTRUCTION restore $w30
+ frame-destroy CFI_INSTRUCTION restore $w29
+ RET undef $lr, implicit killed $w0
+
+...
>From fd103b19f2cb301f8961c447a6ff5f355b66d5d0 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 2 Oct 2023 14:46:27 +0100
Subject: [PATCH 3/6] [CFIFixup] Allow function prologues to span more than one
basic block
The CFIFixup pass assumes a function prologue is contained in a single
basic block. This assumption is broken with upcoming support for stack
probing (`-fstack-clash-protection`) in AArch64 - the emitted probing
sequence in a prologue may contain loops, i.e. more than one basic
block. The generated CFG is not arbitrary though:
* CFI instructions are outside of any loops
* for any two CFI instructions of the function prologue one dominates
and is post-dominated by the other
Thus, for the prologue CFI instructions, if one is
executed then all are executed, there is a total order of
executions, and the last instruction in that order can be considered
the end of the prologoue for the purpose of inserting the initial
`.cfi_remember_state` directive.
That last instruction is found by finding the first block in the
post-order traversal which contains prologue CFI instructions.
---
llvm/lib/CodeGen/CFIFixup.cpp | 62 ++++++++++++-------
.../cfi-fixup-multi-block-prologue.mir | 7 ++-
2 files changed, 43 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 837dbd77d07361a..964a8d56511fa1b 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -10,20 +10,25 @@
// This pass inserts the necessary instructions to adjust for the inconsistency
// of the call-frame information caused by final machine basic block layout.
// The pass relies in constraints LLVM imposes on the placement of
-// save/restore points (cf. ShrinkWrap):
-// * there is a single basic block, containing the function prologue
+// save/restore points (cf. ShrinkWrap) and has certain preconditions about
+// placement of CFI instructions:
+// * for any two CFI instructions of the function prologue one dominates
+// and is post-dominated by the other
// * possibly multiple epilogue blocks, where each epilogue block is
// complete and self-contained, i.e. CSR restore instructions (and the
// corresponding CFI instructions are not split across two or more blocks.
-// * prologue and epilogue blocks are outside of any loops
-// Thus, during execution, at the beginning and at the end of each basic block
-// the function can be in one of two states:
+// * CFI instructions are not contained in any loops
+// Thus, during execution, at the beginning and at the end of each basic block,
+// following the prologue, the function can be in one of two states:
// - "has a call frame", if the function has executed the prologue, and
// has not executed any epilogue
// - "does not have a call frame", if the function has not executed the
// prologue, or has executed an epilogue
// which can be computed by a single RPO traversal.
+// The location of the prologue is determined by finding the first block in the
+// post-order traversal which contains CFI instructions.
+
// In order to accommodate backends which do not generate unwind info in
// epilogues we compute an additional property "strong no call frame on entry",
// which is set for the entry point of the function and for every block
@@ -85,10 +90,6 @@ static bool isPrologueCFIInstruction(const MachineInstr &MI) {
MI.getFlag(MachineInstr::FrameSetup);
}
-static bool containsPrologue(const MachineBasicBlock &MBB) {
- return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
-}
-
static bool containsEpilogue(const MachineBasicBlock &MBB) {
return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
@@ -96,6 +97,25 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
});
}
+static MachineBasicBlock *
+findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
+ MachineBasicBlock *PrologueBlock = nullptr;
+ for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
+ ++It) {
+ MachineBasicBlock *MBB = *It;
+ llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) {
+ if (isPrologueCFIInstruction(MI)) {
+ PrologueBlock = MBB;
+ PrologueEnd = std::next(MI.getIterator());
+ }
+ });
+ if (PrologueBlock)
+ return PrologueBlock;
+ }
+
+ return nullptr;
+}
+
bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
if (!TFL.enableCFIFixup(MF))
@@ -105,6 +125,14 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
if (NumBlocks < 2)
return false;
+ // Find the prologue and the point where we can issue the first
+ // `.cfi_remember_state`.
+
+ MachineBasicBlock::iterator PrologueEnd;
+ MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd);
+ if (PrologueBlock == nullptr)
+ return false;
+
struct BlockFlags {
bool Reachable : 1;
bool StrongNoFrameOnEntry : 1;
@@ -116,21 +144,15 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
BlockInfo[0].StrongNoFrameOnEntry = true;
// Compute the presence/absence of frame at each basic block.
- MachineBasicBlock *PrologueBlock = nullptr;
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
for (MachineBasicBlock *MBB : RPOT) {
BlockFlags &Info = BlockInfo[MBB->getNumber()];
// Set to true if the current block contains the prologue or the epilogue,
// respectively.
- bool HasPrologue = false;
+ bool HasPrologue = MBB == PrologueBlock;
bool HasEpilogue = false;
- if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
- PrologueBlock = MBB;
- HasPrologue = true;
- }
-
if (Info.HasFrameOnEntry || HasPrologue)
HasEpilogue = containsEpilogue(*MBB);
@@ -149,9 +171,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (!PrologueBlock)
- return false;
-
// Walk the blocks of the function in "physical" order.
// Every block inherits the frame state (as recorded in the unwind tables)
// of the previous block. If the intended frame state is different, insert
@@ -162,10 +181,7 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
// insert a `.cfi_remember_state`, in the case that the current block needs a
// `.cfi_restore_state`.
MachineBasicBlock *InsertMBB = PrologueBlock;
- MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
- for (MachineInstr &MI : *PrologueBlock)
- if (isPrologueCFIInstruction(MI))
- InsertPt = std::next(MI.getIterator());
+ MachineBasicBlock::iterator InsertPt = PrologueEnd;
assert(InsertPt != PrologueBlock->begin() &&
"Inconsistent notion of \"prologue block\"");
diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
index ddd9a9eaef55efb..31fa3832367becc 100644
--- a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
+++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s
--- |
source_filename = "cfi-fixup.ll"
@@ -111,9 +112,8 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 7, 12
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
- ; CHECK-NEXT: CFI_INSTRUCTION remember_state
-
- ; CHECK: bb.1.entry:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.entry:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $x9
; CHECK-NEXT: {{ $}}
@@ -129,6 +129,7 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $wsp
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1328, 0
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+ ; CHECK-NEXT: CFI_INSTRUCTION remember_state
; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
; CHECK-NEXT: CBZW renamable $w0, %bb.6
; CHECK-NEXT: {{ $}}
>From 8811203236b6fdfff19f305b304261737c1c3217 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Sat, 28 Oct 2023 13:33:28 +0100
Subject: [PATCH 4/6] Reverse iteration within a block when looking for
prologue CFI insns
---
llvm/lib/CodeGen/CFIFixup.cpp | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 964a8d56511fa1b..40a2a3a142e1758 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -99,20 +99,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
static MachineBasicBlock *
findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
- MachineBasicBlock *PrologueBlock = nullptr;
for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
++It) {
MachineBasicBlock *MBB = *It;
- llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) {
- if (isPrologueCFIInstruction(MI)) {
- PrologueBlock = MBB;
- PrologueEnd = std::next(MI.getIterator());
- }
- });
- if (PrologueBlock)
- return PrologueBlock;
+ for (MachineInstr &MI : reverse(MBB->instrs())) {
+ if (!isPrologueCFIInstruction(MI))
+ continue;
+ PrologueEnd = std::next(MI.getIterator());
+ return MBB;
+ }
}
-
return nullptr;
}
>From ac04dc7894af4205a4903fcc5d6995d0aff8dfee Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 7 Nov 2023 14:08:26 +0000
Subject: [PATCH 5/6] Use simple reverse traversal of basic blocks
---
llvm/lib/CodeGen/CFIFixup.cpp | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 40a2a3a142e1758..61888a42666524b 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -12,12 +12,14 @@
// The pass relies in constraints LLVM imposes on the placement of
// save/restore points (cf. ShrinkWrap) and has certain preconditions about
// placement of CFI instructions:
-// * for any two CFI instructions of the function prologue one dominates
-// and is post-dominated by the other
-// * possibly multiple epilogue blocks, where each epilogue block is
-// complete and self-contained, i.e. CSR restore instructions (and the
-// corresponding CFI instructions are not split across two or more blocks.
-// * CFI instructions are not contained in any loops
+// * For any two CFI instructions of the function prologue one dominates
+// and is post-dominated by the other.
+// * The function possibly contains multiple epilogue blocks, where each
+// epilogue block is complete and self-contained, i.e. CSR restore
+// instructions (and the corresponding CFI instructions)
+// are not split across two or more blocks.
+// * CFI instructions are not contained in any loops.
+
// Thus, during execution, at the beginning and at the end of each basic block,
// following the prologue, the function can be in one of two states:
// - "has a call frame", if the function has executed the prologue, and
@@ -27,7 +29,7 @@
// which can be computed by a single RPO traversal.
// The location of the prologue is determined by finding the first block in the
-// post-order traversal which contains CFI instructions.
+// reverse traversal which contains CFI instructions.
// In order to accommodate backends which do not generate unwind info in
// epilogues we compute an additional property "strong no call frame on entry",
@@ -99,14 +101,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
static MachineBasicBlock *
findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
- for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
- ++It) {
- MachineBasicBlock *MBB = *It;
- for (MachineInstr &MI : reverse(MBB->instrs())) {
+ // Even though we should theoretically traverse the blocks in post-order, we
+ // can't encode correctly cases where prologue blocks are not laid out in
+ // topological order. Then, assuming topological order, we can just traverse
+ // the function in reverse.
+ for (MachineBasicBlock &MBB : reverse(MF)) {
+ for (MachineInstr &MI : reverse(MBB.instrs())) {
if (!isPrologueCFIInstruction(MI))
continue;
PrologueEnd = std::next(MI.getIterator());
- return MBB;
+ return &MBB;
}
}
return nullptr;
@@ -123,7 +127,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
// Find the prologue and the point where we can issue the first
// `.cfi_remember_state`.
-
MachineBasicBlock::iterator PrologueEnd;
MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd);
if (PrologueBlock == nullptr)
>From fdb47f7bbff0eb395c6bb2c7f469a77d19f53a26 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Wed, 11 Oct 2023 17:22:51 +0100
Subject: [PATCH 6/6] [clang][AArch64] Pass down stack clash protection options
to LLVM/Backend
---
clang/lib/CodeGen/CodeGenModule.cpp | 12 +++++++++++-
clang/lib/Driver/ToolChains/Clang.cpp | 2 +-
clang/test/CodeGen/stack-clash-protection.c | 16 ++++++++++++----
3 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 4c7f516e308ca00..bc496852b86fba5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1085,6 +1085,16 @@ void CodeGenModule::Release() {
"sign-return-address-with-bkey", 1);
}
+ if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
+ auto *InlineAsm = llvm::MDString::get(TheModule.getContext(), "inline-asm");
+ if (CodeGenOpts.StackClashProtector)
+ getModule().addModuleFlag(llvm::Module::Override, "probe-stack",
+ InlineAsm);
+ if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096)
+ getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size",
+ CodeGenOpts.StackProbeSize);
+ }
+
if (!CodeGenOpts.MemoryProfileOutput.empty()) {
llvm::LLVMContext &Ctx = TheModule.getContext();
getModule().addModuleFlag(
@@ -2296,7 +2306,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if ((!D || !D->hasAttr<NoUwtableAttr>()) && CodeGenOpts.UnwindTables)
B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables));
- if (CodeGenOpts.StackClashProtector)
+ if (CodeGenOpts.StackClashProtector && !getTarget().getTriple().isAArch64())
B.addAttribute("probe-stack", "inline-asm");
if (!hasUnwindExceptions(LangOpts))
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3b98c7ae6e6ec66..35133001f95c3f9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3507,7 +3507,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
- !EffectiveTriple.isPPC64())
+ !EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
return;
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,
diff --git a/clang/test/CodeGen/stack-clash-protection.c b/clang/test/CodeGen/stack-clash-protection.c
index 67571f5cdb2c14c..2f502ef453d42f4 100644
--- a/clang/test/CodeGen/stack-clash-protection.c
+++ b/clang/test/CodeGen/stack-clash-protection.c
@@ -1,10 +1,12 @@
// Check the correct function attributes are generated
-// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
-// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
-// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple s390x-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64le-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -O0 -S -emit-llvm -o- %s -fstack-clash-protection -mstack-probe-size=8192 | FileCheck %s --check-prefixes CHECK-AARCH64
// CHECK: define{{.*}} void @large_stack() #[[A:.*]] {
+// CHECK-AARCH64: define{{.*}} void @large_stack() #[[A:.*]] {
void large_stack(void) {
volatile int stack[20000], i;
for (i = 0; i < sizeof(stack) / sizeof(int); ++i)
@@ -12,14 +14,20 @@ void large_stack(void) {
}
// CHECK: define{{.*}} void @vla({{.*}}) #[[A:.*]] {
+// CHECK-AARCH64: define{{.*}} void @vla({{.*}}) #[[A:.*]] {
void vla(int n) {
volatile int vla[n];
__builtin_memset(&vla[0], 0, 1);
}
// CHECK: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] {
+// CHECK-AARCH64: define{{.*}} void @builtin_alloca({{.*}}) #[[A:.*]] {
void builtin_alloca(int n) {
volatile void *mem = __builtin_alloca(n);
}
// CHECK: attributes #[[A]] = {{.*}} "probe-stack"="inline-asm"
+// CHECK-AARCH64-NOT: attributes #[[A]] = {{.*}} "probe-stack"
+
+// CHECK-AARCH64: !{i32 4, !"probe-stack", !"inline-asm"}
+// CHECK-AARCH64: !{i32 8, !"stack-probe-size", i32 8192}
More information about the llvm-commits
mailing list