[llvm] [AArch64] Refactor allocation of locals and stack realignment (PR #72028)

Sat Nov 11 08:10:29 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)

<details>
<summary>Changes</summary>

Factor out some stack allocation in a separate function. This patch splits out the generic portion of a larger refactoring done as a part of stack clash protection support.

---

Patch is 27.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72028.diff


11 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+59-55) 
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+5) 
- (modified) llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/framelayout-sve.mir (+28-27) 
- (modified) llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/spill-stack-realignment.mir (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/stack-guard-sve.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll (+2-2) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 18e3aa2b0ecec86..5f617e3a176a16e 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
 static bool produceCompactUnwindFrame(MachineFunction &MF);
 static bool needsWinCFI(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
 
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
@@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
   emitCalleeSavedRestores(MBB, MBBI, true);
 }
 
+void AArch64FrameLowering::allocateStackSpace(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
+    bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
+
+  if (!AllocSize)
+    return;
+
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  Register TargetReg =
+      NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
+  // SUB Xd/SP, SP, AllocSize
+  emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+                  MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+                  EmitCFI, InitialOffset);
+
+  if (NeedsRealignment) {
+    const int64_t MaxAlign = MFI.getMaxAlign().value();
+    const uint64_t AndMask = ~(MaxAlign - 1);
+    // AND SP, Xd, 0b11111...0000
+    BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+        .addReg(TargetReg, RegState::Kill)
+        .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+        .setMIFlags(MachineInstr::FrameSetup);
+    AFI.setStackRealigned(true);
+
+    // No need for SEH instructions here; if we're realigning the stack,
+    // we've set a frame pointer and already finished the SEH prologue.
+    assert(!NeedsWinCFI);
+  }
+}
+
 static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
   switch (Reg.id()) {
   default:
@@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+  StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
   MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
 
   // Process the SVE callee-saves to determine what space needs to be
@@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++MBBI;
     CalleeSavesEnd = MBBI;
 
-    AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
-    AllocateAfter = SVEStackSize - AllocateBefore;
+    SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+    SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
   }
 
   // Allocate space for the callee saves (if any).
-  emitFrameOffset(
-      MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
-      MachineInstr::FrameSetup, false, false, nullptr,
-      EmitAsyncCFI && !HasFP && AllocateBefore,
-      StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+  StackOffset CFAOffset =
+      StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+  allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
+                     nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
+  CFAOffset += SVECalleeSavesSize;
 
   if (EmitAsyncCFI)
     emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
 
-  // Finally allocate remaining SVE stack space.
-  emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
-                  -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
-                  nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
-                  AllocateBefore + StackOffset::getFixed(
-                                       (int64_t)MFI.getStackSize() - NumBytes));
-
-  // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    unsigned scratchSPReg = AArch64::SP;
-
-    if (NeedsRealignment) {
-      scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
-      assert(scratchSPReg != AArch64::NoRegister);
-    }
-
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF)) {
-      // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
-      // the correct value here, as NumBytes also includes padding bytes,
-      // which shouldn't be counted here.
-      emitFrameOffset(
-          MBB, MBBI, DL, scratchSPReg, AArch64::SP,
-          StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
-          false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
-          SVEStackSize +
-              StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
-    }
-    if (NeedsRealignment) {
-      assert(MFI.getMaxAlign() > Align(1));
-      assert(scratchSPReg != AArch64::SP);
-
-      // SUB X9, SP, NumBytes
-      //   -- X9 is temporary register, so shouldn't contain any live data here,
-      //   -- free to use. This is already produced by emitFrameOffset above.
-      // AND SP, X9, 0b11111...0000
-      uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
-          .addReg(scratchSPReg, RegState::Kill)
-          .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
-      AFI->setStackRealigned(true);
-
-      // No need for SEH instructions here; if we're realigning the stack,
-      // we've set a frame pointer and already finished the SEH prologue.
-      assert(!NeedsWinCFI);
-    }
+  // Allocate space for the rest of the frame including SVE locals. Align the
+  // stack as necessary.
+  assert(!(canUseRedZone(MF) && NeedsRealignment) &&
+         "Cannot use redzone with stack realignment");
+  if (!canUseRedZone(MF)) {
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
+                       SVELocalsSize + StackOffset::getFixed(NumBytes),
+                       NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+                       CFAOffset);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 147b5c181be5e53..f3313f3b53fffe0 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -150,6 +150,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
                                   MachineBasicBlock::iterator MBBI) const;
   void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator MBBI) const;
+  void allocateStackSpace(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          bool NeedsRealignment, StackOffset AllocSize,
+                          bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+                          StackOffset InitialOffset) const;
 
   /// Emit target zero call-used regs.
   void emitZeroCallUsedRegs(BitVector RegsToZero,
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
index 623c0f240be4fd7..8d39b881395cdf6 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir
@@ -4,8 +4,8 @@
 name: hasBasepointer
 # CHECK-LABEL: name: hasBasepointer
 # CHECK: bb.0:
-# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK:           $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT:      $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
 # CHECK:      STRXui $x0, $x19, 0
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
index e367a380f8ba9f0..35fd7ca77d5cf3e 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir
@@ -7,9 +7,9 @@
   ; CHECK:       // %bb.0: // %entry
   ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
   ; CHECK-NEXT:    mov x29, sp
+  ; CHECK-NEXT:    sub sp, sp, #2064
   ; CHECK-NEXT:    addvl sp, sp, #-32
   ; CHECK-NEXT:    addvl sp, sp, #-28
-  ; CHECK-NEXT:    sub sp, sp, #2064
   ; CHECK-NEXT:    ldr x8, [sp, #2048]
   ; CHECK-NEXT:    addvl sp, sp, #31
   ; CHECK-NEXT:    addvl sp, sp, #29
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
index d54f67634d02a7b..680f9c335c250c5 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir
@@ -4,9 +4,9 @@
 name: LateScavengingSlot
 # CHECK-LABEL: name: LateScavengingSlot
 # CHECK: bb.0:
-# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12
+# CHECK:      $sp = frame-setup SUBXri $sp, 8, 12
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK:      STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
 # CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1
 # CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 7c87587c6dc4e2c..213d7919e4a7270 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -60,10 +60,10 @@
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
 
 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
@@ -77,7 +77,7 @@
 # ASM-LABEL: test_allocate_sve:
 # ASM:       .cfi_def_cfa_offset 16
 # ASM-NEXT:  .cfi_offset w29, -16
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+# ASM:       .cfi_def_cfa_offset 32
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
 # ASM:       .cfi_def_cfa wsp, 32
 # ASM:       .cfi_def_cfa_offset 16
@@ -87,7 +87,7 @@
 #
 # UNWINDINFO:       DW_CFA_def_cfa_offset: +16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -16
-# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:       DW_CFA_def_cfa_offset: +32 
 # UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO:       DW_CFA_def_cfa: reg31 +32
 # UNWINDINFO:       DW_CFA_def_cfa_offset: +16
@@ -125,10 +125,11 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w21, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+
 #
 # CHECK-NEXT: $x20 = IMPLICIT_DEF
 # CHECK-NEXT: $x21 = IMPLICIT_DEF
@@ -149,7 +150,7 @@ body:             |
 # ASM:       .cfi_offset w20, -8
 # ASM-NEXT:  .cfi_offset w21, -16
 # ASM-NEXT:  .cfi_offset w29, -32
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
+# ASM:       .cfi_def_cfa_offset 48
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
 #
 # ASM:       .cfi_def_cfa wsp, 48
@@ -164,7 +165,7 @@ body:             |
 # UNWINDINFO:       DW_CFA_offset: reg20 -8
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg21 -16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -32
-# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:       DW_CFA_def_cfa_offset: +48 
 # UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:       DW_CFA_def_cfa: reg31 +48
@@ -205,9 +206,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
-# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2
+# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]]
 # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
@@ -267,9 +268,9 @@ body:             |
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
 
 # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
@@ -292,7 +293,7 @@ body:             |
 # ASM-LABEL:  test_address_sve:
 # ASM:       .cfi_def_cfa_offset 16
 # ASM-NEXT:  .cfi_offset w29, -16
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+# ASM:       .cfi_def_cfa_offset 32
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
 #
 # ASM:       .cfi_def_cfa wsp, 32
@@ -302,7 +303,7 @@ body:             |
 #
 # UNWINDINFO:       DW_CFA_def_cfa_offset: +16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -16
-# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:       DW_CFA_def_cfa_offset: +32
 # UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:       DW_CFA_def_cfa: reg31 +32
@@ -353,8 +354,8 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 
 # CHECK-NEXT: STR_ZXI $z0, $fp, -1
 # CHECK-NEXT: STR_ZXI $z1, $fp, -2
@@ -429,9 +430,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
 
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK:      $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
 # CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
@@ -448,7 +449,7 @@ body:             |
 # ASM-LABEL: test_stack_arg_sve:
 # ASM:       .cfi_def_cfa_offset 16
 # ASM-NEXT:  .cfi_offset w29, -16
-# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+# ASM:       .cfi_def_cfa_offset 32
 # ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
 #
 # ASM:       .cfi_def_cfa wsp, 32
@@ -458,7 +459,7 @@ body:             |
 
 # UNWINDINFO:      DW_CFA_def_cfa_offset: +16
 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:      DW_CFA_def_cfa_offset: +32
 # UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 #
 # UNWINDINFO:      DW_CFA_def_cfa: reg31 +32
@@ -640,8 +641,8 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -16
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32
-# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
 # CHECK-NEXT: STRXui $xzr, $x19, 0
 # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
@@ -863,9 +864,9 @@ body:             |
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22
-# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
 # CHECK:      $sp = frame-setup SUBXri $sp, 32, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK:      $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22
 
 # CHECK:      $sp = frame-destroy ADDXri $sp, 32, 0
@@ -916,7 +917,7 @@ bod...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/72028