[llvm] [CFIFixup] Allow function prologues to span more than one basic block (PR #68984)

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 11 08:46:05 PST 2023


https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/68984

>From c55da9c253829610418e5fb22e2b0ecc3f1585b7 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Sat, 11 Nov 2023 15:27:53 +0000
Subject: [PATCH 1/4] [CFIFixup] Precommit test ahead of multi-block prologues
 support

---
 .../cfi-fixup-multi-block-prologue.mir        | 307 ++++++++++++++++++
 1 file changed, 307 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir

diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
new file mode 100644
index 000000000000000..ddd9a9eaef55efb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
@@ -0,0 +1,307 @@
+# RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s
+--- |
+  source_filename = "cfi-fixup.ll"
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-linux"
+
+  define i32 @f(i32 %x) #0 {
+  entry:
+    %p = alloca i8, i32 30000, align 1
+    switch i32 %x, label %if.end7 [
+      i32 0, label %return
+      i32 1, label %if.then2
+      i32 2, label %if.then5
+    ]
+
+  if.then2:                                         ; preds = %entry
+    %call = tail call i32 @g1(i32 1)
+    %add = add nsw i32 %call, 1
+    br label %return
+
+  if.then5:                                         ; preds = %entry
+    %call6 = tail call i32 @g0(i32 2)
+    %sub = sub nsw i32 1, %call6
+    br label %return
+
+  if.end7:                                          ; preds = %entry
+    br label %return
+
+  return:                                           ; preds = %if.end7, %if.then5, %if.then2, %entry
+    %retval.0 = phi i32 [ %add, %if.then2 ], [ %sub, %if.then5 ], [ 0, %if.end7 ], [ 1, %entry ]
+    ret i32 %retval.0
+  }
+
+  declare i32 @g1(i32)
+
+  declare i32 @g0(i32)
+
+  attributes #0 = { uwtable "probe-stack"="inline-asm" }
+
+...
+---
+name:            f
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: true
+registers:       []
+liveins:
+  - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       30016
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  30000
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: p, type: default, offset: -30016, size: 30000, alignment: 1,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -30000, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  hasRedZone:      false
+body:             |
+  ; CHECK-LABEL: name: f
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $lr, $fp
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w30, -8
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w29, -16
+  ; CHECK-NEXT:   $x9 = frame-setup SUBXri $sp, 7, 12
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
+  ; CHECK-NEXT:   CFI_INSTRUCTION remember_state
+  
+  ; CHECK:      bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x9
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 1, 12
+  ; CHECK-NEXT:   $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+  ; CHECK-NEXT:   frame-setup STRXui $xzr, $sp, 0
+  ; CHECK-NEXT:   frame-setup Bcc 1, %bb.1, implicit killed $nzcv
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.entry:
+  ; CHECK-NEXT:   successors: %bb.6(0x20000000), %bb.3(0x60000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_register $wsp
+  ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 1328, 0
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+  ; CHECK-NEXT:   frame-setup STRXui $xzr, $sp, 0
+  ; CHECK-NEXT:   CBZW renamable $w0, %bb.6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.entry:
+  ; CHECK-NEXT:   successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 0, %bb.7, implicit killed $nzcv
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.entry:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.8(0x40000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 1, %bb.8, implicit killed $nzcv
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5.if.then2:
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+  ; CHECK-NEXT:   renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 7, 12
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 1328, 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w30
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w29
+  ; CHECK-NEXT:   RET undef $lr, implicit killed $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   CFI_INSTRUCTION restore_state
+  ; CHECK-NEXT:   CFI_INSTRUCTION remember_state
+  ; CHECK-NEXT:   renamable $w0 = MOVZWi 1, 0
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 7, 12
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 1328, 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w30
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w29
+  ; CHECK-NEXT:   RET undef $lr, implicit killed $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7.if.then5:
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CFI_INSTRUCTION restore_state
+  ; CHECK-NEXT:   CFI_INSTRUCTION remember_state
+  ; CHECK-NEXT:   BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+  ; CHECK-NEXT:   renamable $w8 = MOVZWi 1, 0
+  ; CHECK-NEXT:   $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 7, 12
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 1328, 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w30
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w29
+  ; CHECK-NEXT:   RET undef $lr, implicit killed $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8.if.end7:
+  ; CHECK-NEXT:   CFI_INSTRUCTION restore_state
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $wzr, 0
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 7, 12
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+  ; CHECK-NEXT:   $sp = frame-destroy ADDXri $sp, 1328, 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w30
+  ; CHECK-NEXT:   frame-destroy CFI_INSTRUCTION restore $w29
+  ; CHECK-NEXT:   RET undef $lr, implicit killed $w0
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $w0, $lr, $fp
+
+    early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $w30, -8
+    frame-setup CFI_INSTRUCTION offset $w29, -16
+    $x9 = frame-setup SUBXri $sp, 7, 12
+    frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
+
+  bb.1.entry:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+    liveins: $x9
+
+    $sp = frame-setup SUBXri $sp, 1, 12
+    $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+    frame-setup STRXui $xzr, $sp, 0
+    frame-setup Bcc 1, %bb.1, implicit killed $nzcv
+
+  bb.2.entry:
+    successors: %bb.6(0x20000000), %bb.3(0x60000000)
+    liveins: $w0
+
+    frame-setup CFI_INSTRUCTION def_cfa_register $wsp
+    $sp = frame-setup SUBXri $sp, 1328, 0
+    frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+    frame-setup STRXui $xzr, $sp, 0
+    CBZW renamable $w0, %bb.6
+
+  bb.3.entry:
+    successors: %bb.7(0x2aaaaaab), %bb.4(0x55555555)
+    liveins: $w0
+
+    dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+    Bcc 0, %bb.7, implicit killed $nzcv
+
+  bb.4.entry:
+    successors: %bb.5(0x40000000), %bb.8(0x40000000)
+    liveins: $w0
+
+    dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv
+    Bcc 1, %bb.8, implicit killed $nzcv
+
+  bb.5.if.then2:
+    liveins: $w0
+
+    BL @g1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+    renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0
+    $sp = frame-destroy ADDXri $sp, 7, 12
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+    $sp = frame-destroy ADDXri $sp, 1328, 0
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+    early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+    frame-destroy CFI_INSTRUCTION restore $w30
+    frame-destroy CFI_INSTRUCTION restore $w29
+    RET undef $lr, implicit killed $w0
+
+  bb.6:
+    renamable $w0 = MOVZWi 1, 0
+    $sp = frame-destroy ADDXri $sp, 7, 12
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+    $sp = frame-destroy ADDXri $sp, 1328, 0
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+    early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+    frame-destroy CFI_INSTRUCTION restore $w30
+    frame-destroy CFI_INSTRUCTION restore $w29
+    RET undef $lr, implicit killed $w0
+
+  bb.7.if.then5:
+    liveins: $w0
+
+    BL @g0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0
+    renamable $w8 = MOVZWi 1, 0
+    $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0
+    $sp = frame-destroy ADDXri $sp, 7, 12
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+    $sp = frame-destroy ADDXri $sp, 1328, 0
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+    early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+    frame-destroy CFI_INSTRUCTION restore $w30
+    frame-destroy CFI_INSTRUCTION restore $w29
+    RET undef $lr, implicit killed $w0
+
+  bb.8.if.end7:
+    $w0 = ORRWrs $wzr, $wzr, 0
+    $sp = frame-destroy ADDXri $sp, 7, 12
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 1344
+    $sp = frame-destroy ADDXri $sp, 1328, 0
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+    early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+    frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+    frame-destroy CFI_INSTRUCTION restore $w30
+    frame-destroy CFI_INSTRUCTION restore $w29
+    RET undef $lr, implicit killed $w0
+
+...

>From 88afc3def6c2dfa80f83ced58d010efd1a608999 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 2 Oct 2023 14:46:27 +0100
Subject: [PATCH 2/4] [CFIFixup] Allow function prologues to span more than one
 basic block

The CFIFixup pass assumes a function prologue is contained in a single
basic block. This assumption is broken with upcoming support for stack
probing (`-fstack-clash-protection`) in AArch64 - the emitted probing
sequence in a prologue may contain loops, i.e. more than one basic
block. The generated CFG is not arbitrary though:
 * CFI instructions are outside of any loops
 * for any two CFI instructions of the function prologue one dominates
   and is post-dominated by the other

Thus, for the prologue CFI instructions, if one is
executed then all are executed, there is a total order of
executions, and the last instruction in that order can be considered
the end of the prologoue for the purpose of inserting the initial
`.cfi_remember_state` directive.

That last instruction is found by finding the first block in the
post-order traversal which contains prologue CFI instructions.
---
 llvm/lib/CodeGen/CFIFixup.cpp                 | 62 ++++++++++++-------
 .../cfi-fixup-multi-block-prologue.mir        |  7 ++-
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 837dbd77d07361a..964a8d56511fa1b 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -10,20 +10,25 @@
 // This pass inserts the necessary  instructions to adjust for the inconsistency
 // of the call-frame information caused by final machine basic block layout.
 // The pass relies in constraints LLVM imposes on the placement of
-// save/restore points (cf. ShrinkWrap):
-// * there is a single basic block, containing the function prologue
+// save/restore points (cf. ShrinkWrap) and has certain preconditions about
+// placement of CFI instructions:
+// * for any two CFI instructions of the function prologue one dominates
+//   and is post-dominated by the other
 // * possibly multiple epilogue blocks, where each epilogue block is
 //   complete and self-contained, i.e. CSR restore instructions (and the
 //   corresponding CFI instructions are not split across two or more blocks.
-// * prologue and epilogue blocks are outside of any loops
-// Thus, during execution, at the beginning and at the end of each basic block
-// the function can be in one of two states:
+// * CFI instructions are not contained in any loops
+// Thus, during execution, at the beginning and at the end of each basic block,
+// following the prologue, the function can be in one of two states:
 //  - "has a call frame", if the function has executed the prologue, and
 //    has not executed any epilogue
 //  - "does not have a call frame", if the function has not executed the
 //    prologue, or has executed an epilogue
 // which can be computed by a single RPO traversal.
 
+// The location of the prologue is determined by finding the first block in the
+// post-order traversal which contains CFI instructions.
+
 // In order to accommodate backends which do not generate unwind info in
 // epilogues we compute an additional property "strong no call frame on entry",
 // which is set for the entry point of the function and for every block
@@ -85,10 +90,6 @@ static bool isPrologueCFIInstruction(const MachineInstr &MI) {
          MI.getFlag(MachineInstr::FrameSetup);
 }
 
-static bool containsPrologue(const MachineBasicBlock &MBB) {
-  return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
-}
-
 static bool containsEpilogue(const MachineBasicBlock &MBB) {
   return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
     return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
@@ -96,6 +97,25 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
   });
 }
 
+static MachineBasicBlock *
+findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
+  MachineBasicBlock *PrologueBlock = nullptr;
+  for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
+       ++It) {
+    MachineBasicBlock *MBB = *It;
+    llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) {
+      if (isPrologueCFIInstruction(MI)) {
+        PrologueBlock = MBB;
+        PrologueEnd = std::next(MI.getIterator());
+      }
+    });
+    if (PrologueBlock)
+      return PrologueBlock;
+  }
+
+  return nullptr;
+}
+
 bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
   const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
   if (!TFL.enableCFIFixup(MF))
@@ -105,6 +125,14 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
   if (NumBlocks < 2)
     return false;
 
+  // Find the prologue and the point where we can issue the first
+  // `.cfi_remember_state`.
+
+  MachineBasicBlock::iterator PrologueEnd;
+  MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd);
+  if (PrologueBlock == nullptr)
+    return false;
+
   struct BlockFlags {
     bool Reachable : 1;
     bool StrongNoFrameOnEntry : 1;
@@ -116,21 +144,15 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
   BlockInfo[0].StrongNoFrameOnEntry = true;
 
   // Compute the presence/absence of frame at each basic block.
-  MachineBasicBlock *PrologueBlock = nullptr;
   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
   for (MachineBasicBlock *MBB : RPOT) {
     BlockFlags &Info = BlockInfo[MBB->getNumber()];
 
     // Set to true if the current block contains the prologue or the epilogue,
     // respectively.
-    bool HasPrologue = false;
+    bool HasPrologue = MBB == PrologueBlock;
     bool HasEpilogue = false;
 
-    if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
-      PrologueBlock = MBB;
-      HasPrologue = true;
-    }
-
     if (Info.HasFrameOnEntry || HasPrologue)
       HasEpilogue = containsEpilogue(*MBB);
 
@@ -149,9 +171,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  if (!PrologueBlock)
-    return false;
-
   // Walk the blocks of the function in "physical" order.
   // Every block inherits the frame state (as recorded in the unwind tables)
   // of the previous block. If the intended frame state is different, insert
@@ -162,10 +181,7 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
   // insert a `.cfi_remember_state`, in the case that the current block needs a
   // `.cfi_restore_state`.
   MachineBasicBlock *InsertMBB = PrologueBlock;
-  MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
-  for (MachineInstr &MI : *PrologueBlock)
-    if (isPrologueCFIInstruction(MI))
-      InsertPt = std::next(MI.getIterator());
+  MachineBasicBlock::iterator InsertPt = PrologueEnd;
 
   assert(InsertPt != PrologueBlock->begin() &&
          "Inconsistent notion of \"prologue block\"");
diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
index ddd9a9eaef55efb..31fa3832367becc 100644
--- a/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
+++ b/llvm/test/CodeGen/AArch64/cfi-fixup-multi-block-prologue.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
 # RUN: llc -run-pass=cfi-fixup %s -o - | FileCheck %s
 --- |
   source_filename = "cfi-fixup.ll"
@@ -111,9 +112,8 @@ body:             |
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $w29, -16
   ; CHECK-NEXT:   $x9 = frame-setup SUBXri $sp, 7, 12
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa $w9, 28688
-  ; CHECK-NEXT:   CFI_INSTRUCTION remember_state
-  
-  ; CHECK:      bb.1.entry:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.entry:
   ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
   ; CHECK-NEXT:   liveins: $x9
   ; CHECK-NEXT: {{  $}}
@@ -129,6 +129,7 @@ body:             |
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_register $wsp
   ; CHECK-NEXT:   $sp = frame-setup SUBXri $sp, 1328, 0
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 30016
+  ; CHECK-NEXT:   CFI_INSTRUCTION remember_state
   ; CHECK-NEXT:   frame-setup STRXui $xzr, $sp, 0
   ; CHECK-NEXT:   CBZW renamable $w0, %bb.6
   ; CHECK-NEXT: {{  $}}

>From 900f570784084c7f6adcfcf0f83cb4ddef4a8c13 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Sat, 28 Oct 2023 13:33:28 +0100
Subject: [PATCH 3/4] Reverse iteration within a block when looking for
 prologue CFI insns

---
 llvm/lib/CodeGen/CFIFixup.cpp | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 964a8d56511fa1b..40a2a3a142e1758 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -99,20 +99,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
 
 static MachineBasicBlock *
 findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
-  MachineBasicBlock *PrologueBlock = nullptr;
   for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
        ++It) {
     MachineBasicBlock *MBB = *It;
-    llvm::for_each(MBB->instrs(), [&](MachineInstr &MI) {
-      if (isPrologueCFIInstruction(MI)) {
-        PrologueBlock = MBB;
-        PrologueEnd = std::next(MI.getIterator());
-      }
-    });
-    if (PrologueBlock)
-      return PrologueBlock;
+    for (MachineInstr &MI : reverse(MBB->instrs())) {
+      if (!isPrologueCFIInstruction(MI))
+        continue;
+      PrologueEnd = std::next(MI.getIterator());
+      return MBB;
+    }
   }
-
   return nullptr;
 }
 

>From b53e545c88232e9244479acb70e3b6e1bba202b3 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 7 Nov 2023 14:08:26 +0000
Subject: [PATCH 4/4] Use simple reverse traversal of basic blocks

---
 llvm/lib/CodeGen/CFIFixup.cpp | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
index 40a2a3a142e1758..61888a42666524b 100644
--- a/llvm/lib/CodeGen/CFIFixup.cpp
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -12,12 +12,14 @@
 // The pass relies in constraints LLVM imposes on the placement of
 // save/restore points (cf. ShrinkWrap) and has certain preconditions about
 // placement of CFI instructions:
-// * for any two CFI instructions of the function prologue one dominates
-//   and is post-dominated by the other
-// * possibly multiple epilogue blocks, where each epilogue block is
-//   complete and self-contained, i.e. CSR restore instructions (and the
-//   corresponding CFI instructions are not split across two or more blocks.
-// * CFI instructions are not contained in any loops
+// * For any two CFI instructions of the function prologue one dominates
+//   and is post-dominated by the other.
+// * The function possibly contains multiple epilogue blocks, where each
+//   epilogue block is complete and self-contained, i.e. CSR restore
+//   instructions (and the corresponding CFI instructions)
+//   are not split across two or more blocks.
+// * CFI instructions are not contained in any loops.
+
 // Thus, during execution, at the beginning and at the end of each basic block,
 // following the prologue, the function can be in one of two states:
 //  - "has a call frame", if the function has executed the prologue, and
@@ -27,7 +29,7 @@
 // which can be computed by a single RPO traversal.
 
 // The location of the prologue is determined by finding the first block in the
-// post-order traversal which contains CFI instructions.
+// reverse traversal which contains CFI instructions.
 
 // In order to accommodate backends which do not generate unwind info in
 // epilogues we compute an additional property "strong no call frame on entry",
@@ -99,14 +101,16 @@ static bool containsEpilogue(const MachineBasicBlock &MBB) {
 
 static MachineBasicBlock *
 findPrologueEnd(MachineFunction &MF, MachineBasicBlock::iterator &PrologueEnd) {
-  for (auto It = po_begin(&MF.front()), End = po_end(&MF.front()); It != End;
-       ++It) {
-    MachineBasicBlock *MBB = *It;
-    for (MachineInstr &MI : reverse(MBB->instrs())) {
+  // Even though we should theoretically traverse the blocks in post-order, we
+  // can't encode correctly cases where prologue blocks are not laid out in
+  // topological order. Then, assuming topological order, we can just traverse
+  // the function in reverse.
+  for (MachineBasicBlock &MBB : reverse(MF)) {
+    for (MachineInstr &MI : reverse(MBB.instrs())) {
       if (!isPrologueCFIInstruction(MI))
         continue;
       PrologueEnd = std::next(MI.getIterator());
-      return MBB;
+      return &MBB;
     }
   }
   return nullptr;
@@ -123,7 +127,6 @@ bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
 
   // Find the prologue and the point where we can issue the first
   // `.cfi_remember_state`.
-
   MachineBasicBlock::iterator PrologueEnd;
   MachineBasicBlock *PrologueBlock = findPrologueEnd(MF, PrologueEnd);
   if (PrologueBlock == nullptr)



More information about the llvm-commits mailing list