[llvm] 6e54fcc - [AArch64] Emit fewer CFI instructions for synchronous unwind tables

Igor Kudrin via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 1 17:03:46 PDT 2023


Author: Igor Kudrin
Date: 2023-07-01T16:31:09-07:00
New Revision: 6e54fccede402c9ed0e8038aa258a99c5a2773e5

URL: https://github.com/llvm/llvm-project/commit/6e54fccede402c9ed0e8038aa258a99c5a2773e5
DIFF: https://github.com/llvm/llvm-project/commit/6e54fccede402c9ed0e8038aa258a99c5a2773e5.diff

LOG: [AArch64] Emit fewer CFI instructions for synchronous unwind tables

The instruction-precise, or asynchronous, unwind tables usually take up
much more space than the synchronous ones. If a user is concerned about
the load size of the program and does not need the features provided
with the asynchronous tables, the compiler should be able to generate
the more compact variant.

This patch changes the generation of CFI instructions for these cases so
that they all come in one chunk in the prolog; it emits only one
`.cfi_def_cfa*` instruction followed by `.cfi_offset` ones after all
stack adjustments and register spills, and avoids generating CFI
instructions in the epilog(s) as well as any other exceeding CFI
instructions like `.cfi_remember_state` and `.cfi_restore_state`.
Effectively, it reverses the effects of D111411 and D114545 on functions
with the `uwtable(sync)` attribute. As a side effect, it also restores
the behavior on functions that have neither `uwtable` nor `nounwind`
attributes.

Differential Revision: https://reviews.llvm.org/D153098

Added: 
    llvm/test/CodeGen/AArch64/cfi-sync-async.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
    llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
    llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
    llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll
    llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll
    llvm/test/CodeGen/AArch64/aarch64-mops.ll
    llvm/test/CodeGen/AArch64/active_lane_mask.ll
    llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
    llvm/test/CodeGen/AArch64/addsub.ll
    llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
    llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll
    llvm/test/CodeGen/AArch64/arm64-fp128.ll
    llvm/test/CodeGen/AArch64/arm64-large-frame.ll
    llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
    llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
    llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
    llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
    llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
    llvm/test/CodeGen/AArch64/arm64-virtual_base.ll
    llvm/test/CodeGen/AArch64/cmp-select-sign.ll
    llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
    llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
    llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
    llvm/test/CodeGen/AArch64/i128-math.ll
    llvm/test/CodeGen/AArch64/isinf.ll
    llvm/test/CodeGen/AArch64/large-stack-cmp.ll
    llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll
    llvm/test/CodeGen/AArch64/machine-combiner.ll
    llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll
    llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
    llvm/test/CodeGen/AArch64/neg-imm.ll
    llvm/test/CodeGen/AArch64/peephole-and-tst.ll
    llvm/test/CodeGen/AArch64/ragreedy-csr.ll
    llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
    llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
    llvm/test/CodeGen/AArch64/sibling-call.ll
    llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
    llvm/test/CodeGen/AArch64/stack-protector-darwin-got.ll
    llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
    llvm/test/CodeGen/AArch64/sve-alloca.ll
    llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
    llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll
    llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
    llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
    llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
    llvm/test/CodeGen/AArch64/sve-trunc.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll
    llvm/test/CodeGen/AArch64/swifterror.ll
    llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 72d69a8cf0a65e..f13344a27bf8b3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1364,6 +1364,27 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
   }
 }
 
+// Define the current CFA rule to use the provided FP.
+static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                const DebugLoc &DL, unsigned FixedObject) {
+  const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
+  const TargetInstrInfo *TII = STI.getInstrInfo();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+  const int OffsetToFirstCalleeSaveFromFP =
+      AFI->getCalleeSaveBaseToFrameRecordOffset() -
+      AFI->getCalleeSavedStackSize();
+  Register FramePtr = TRI->getFrameRegister(MF);
+  unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);
+  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+      nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex)
+      .setMIFlags(MachineInstr::FrameSetup);
+}
+
 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1375,6 +1396,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   MachineModuleInfo &MMI = MF.getMMI();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+  bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
   bool HasFP = hasFP(MF);
   bool NeedsWinCFI = needsWinCFI(MF);
   bool HasWinCFI = false;
@@ -1535,7 +1557,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
                     StackOffset::getFixed(-NumBytes), TII,
                     MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
-                    EmitCFI);
+                    EmitAsyncCFI);
     NumBytes = 0;
   } else if (HomPrologEpilog) {
     // Stack has been already adjusted.
@@ -1543,7 +1565,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   } else if (PrologueSaveSize != 0) {
     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
         MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
-        EmitCFI);
+        EmitAsyncCFI);
     NumBytes -= PrologueSaveSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -1604,25 +1626,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
         NeedsWinCFI = false;
       }
     }
-    if (EmitCFI) {
-      // Define the current CFA rule to use the provided FP.
-      const int OffsetToFirstCalleeSaveFromFP =
-          AFI->getCalleeSaveBaseToFrameRecordOffset() -
-          AFI->getCalleeSavedStackSize();
-      Register FramePtr = RegInfo->getFrameRegister(MF);
-      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
-      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
-          nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameSetup);
-    }
+    if (EmitAsyncCFI)
+      emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
   }
 
   // Now emit the moves for whatever callee saved regs we have (including FP,
   // LR if those are saved). Frame instructions for SVE register are emitted
   // later, after the instruction which actually save SVE regs.
-  if (EmitCFI)
+  if (EmitAsyncCFI)
     emitCalleeSavedGPRLocations(MBB, MBBI);
 
   // Alignment is required for the parent frame, not the funclet
@@ -1766,16 +1777,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   emitFrameOffset(
       MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
       MachineInstr::FrameSetup, false, false, nullptr,
-      EmitCFI && !HasFP && AllocateBefore,
+      EmitAsyncCFI && !HasFP && AllocateBefore,
       StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
 
-  if (EmitCFI)
+  if (EmitAsyncCFI)
     emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
 
   // Finally allocate remaining SVE stack space.
   emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
                   -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
-                  nullptr, EmitCFI && !HasFP && AllocateAfter,
+                  nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
                   AllocateBefore + StackOffset::getFixed(
                                        (int64_t)MFI.getStackSize() - NumBytes));
 
@@ -1796,7 +1807,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       emitFrameOffset(
           MBB, MBBI, DL, scratchSPReg, AArch64::SP,
           StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
-          false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+          false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
           SVEStackSize +
               StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
     }
@@ -1858,6 +1869,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       MBB.addLiveIn(AArch64::X1);
     }
   }
+
+  if (EmitCFI && !EmitAsyncCFI) {
+    if (HasFP) {
+      emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
+    } else {
+      StackOffset TotalSize =
+          SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+      unsigned CFIIndex = MF.addFrameInst(createDefCFA(
+          *RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, TotalSize,
+          /*LastAdjustmentWasScalable=*/false));
+      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex)
+          .setMIFlags(MachineInstr::FrameSetup);
+    }
+    emitCalleeSavedGPRLocations(MBB, MBBI);
+    emitCalleeSavedSVELocations(MBB, MBBI);
+  }
 }
 
 static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -2229,6 +2257,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
+bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const {
+  return TargetFrameLowering::enableCFIFixup(MF) &&
+         MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
+}
+
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
 /// debug info.  It's the same as what we use for resolving the code-gen
 /// references for now.  FIXME: This can go wrong when references are

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index e8336651955299..147b5c181be5e5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -35,6 +35,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
+  bool enableCFIFixup(MachineFunction &MF) const override;
+
   bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
 
   StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
index a46df80090167d..2d8a675c737c32 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll
@@ -31,13 +31,13 @@ define i32 @test_musttail_variadic_spill(i32 %arg0, ...) {
 ; CHECK-LABEL: test_musttail_variadic_spill:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #224
-; CHECK-NEXT:    .cfi_def_cfa_offset 224
 ; CHECK-NEXT:    stp x28, x27, [sp, #128] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x26, x25, [sp, #144] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #160] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #176] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #192] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #208] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 224
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24
@@ -103,13 +103,13 @@ define void @f_thunk(ptr %this, ...) {
 ; CHECK-LABEL: f_thunk:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #256
-; CHECK-NEXT:    .cfi_def_cfa_offset 256
 ; CHECK-NEXT:    stp x28, x27, [sp, #160] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x26, x25, [sp, #176] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #192] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #208] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #224] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #240] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 256
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
index 4a49bd7ad758bd..7fd71b26fa1ba7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll
@@ -348,8 +348,8 @@ define dso_local void @trunc_from_larger_src_val(i64 %hold.4.lcssa, ptr %check17
 ; CHECK-LABEL: trunc_from_larger_src_val:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    str w0, [sp, #12]

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 3ed5db358e091f..a304ec09e88985 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -99,9 +99,9 @@ entry:
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK: sub	sp, sp, #32
-; CHECK: .cfi_def_cfa_offset 32
 ; CHECK: stp	x30, x19, [sp, #16]
 ;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa_offset 32
 ; CHECK: .cfi_offset w19, -8
 ; CHECK: .cfi_offset w30, -16
 ;   Check correct access to arguments passed on the stack, through stack pointer
@@ -118,7 +118,6 @@ entry:
 ; CHECK-MACHO: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK-MACHO: sub	sp, sp, #48
-; CHECK-MACHO:.cfi_def_cfa_offset 48
 ; CHECK-MACHO: stp	x20, x19, [sp, #16]
 ;   Check that the frame pointer is created:
 ; CHECK-MACHO: stp	x29, x30, [sp, #32]
@@ -182,18 +181,17 @@ entry:
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK: stp	x29, x30, [sp, #-32]!
-; CHECK: .cfi_def_cfa_offset 32
 ;   Check that the frame pointer is created:
 ; CHECK: str	x19, [sp, #16]
 ; CHECK: mov	x29, sp
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub	x9, sp, #96
+; CHECK: and	sp, x9, #0xffffffffffffff80
 ;   Check correctness of cfi pseudo-instructions
 ; CHECK: .cfi_def_cfa w29, 32
 ; CHECK: .cfi_offset w19, -16
 ; CHECK: .cfi_offset w30, -24
 ; CHECK: .cfi_offset w29, -32
-;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
-; CHECK: sub	x9, sp, #96
-; CHECK: and	sp, x9, #0xffffffffffffff80
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #56]
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #40]
@@ -211,19 +209,18 @@ entry:
 ; CHECK-MACHO: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK-MACHO: stp	x20, x19, [sp, #-32]!
-; CHECK-MACHO: .cfi_def_cfa_offset 32
 ;   Check that the frame pointer is created:
 ; CHECK-MACHO: stp	x29, x30, [sp, #16]
 ; CHECK-MACHO: add	x29, sp, #16
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK-MACHO: sub	x9, sp, #96
+; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
 ;   Check correctness of cfi pseudo-instructions
 ; CHECK-MACHO: .cfi_def_cfa w29, 16
 ; CHECK-MACHO: .cfi_offset w30, -8
 ; CHECK-MACHO: .cfi_offset w29, -16
 ; CHECK-MACHO: .cfi_offset w19, -24
 ; CHECK-MACHO: .cfi_offset w20, -32
-;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
-; CHECK-MACHO: sub	x9, sp, #96
-; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
 ; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
@@ -288,19 +285,18 @@ entry:
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK: stp	x29, x30, [sp, #-32]!
-; CHECK: .cfi_def_cfa_offset 32
 ;   Check that the frame pointer is created:
 ; CHECK: stp	x20, x19, [sp, #16]
 ; CHECK: mov	x29, sp
+;   Check that space is reserved on the stack for the local variable,
+;   rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub	sp, sp, #16
 ;   Check correctness of cfi pseudo-instructions
 ; CHECK: .cfi_def_cfa w29, 32
 ; CHECK: .cfi_offset w19, -8
 ; CHECK: .cfi_offset w20, -16
 ; CHECK: .cfi_offset w30, -24
 ; CHECK: .cfi_offset w29, -32
-;   Check that space is reserved on the stack for the local variable,
-;   rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
-; CHECK: sub	sp, sp, #16
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
@@ -388,18 +384,10 @@ entry:
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK: stp	x29, x30, [sp, #-48]!
-; CHECK: .cfi_def_cfa_offset 48
 ; CHECK: str	x21, [sp, #16]
 ; CHECK: stp	x20, x19, [sp, #32]
 ;   Check that the frame pointer is created:
 ; CHECK: mov	x29, sp
-;   Check correctness of cfi pseudo-instructions
-; CHECK: .cfi_def_cfa w29, 48
-; CHECK: .cfi_offset w19, -8
-; CHECK: .cfi_offset w20, -16
-; CHECK: .cfi_offset w21, -32
-; CHECK: .cfi_offset w30, -40
-; CHECK: .cfi_offset w29, -48
 ;   Check that the stack pointer gets re-aligned to 128
 ;   bytes & the base pointer (x19) gets initialized to
 ;   this 128-byte aligned area for local variables &
@@ -407,6 +395,13 @@ entry:
 ; CHECK: sub	x9, sp, #80
 ; CHECK: and	sp, x9, #0xffffffffffffff80
 ; CHECK: mov    x19, sp
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 48
+; CHECK: .cfi_offset w19, -8
+; CHECK: .cfi_offset w20, -16
+; CHECK: .cfi_offset w21, -32
+; CHECK: .cfi_offset w30, -40
+; CHECK: .cfi_offset w29, -48
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #56]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
@@ -434,11 +429,17 @@ entry:
 ; CHECK-MACHO: .cfi_startproc
 ;   Check that used callee-saved registers are saved
 ; CHECK-MACHO: stp	x22, x21, [sp, #-48]!
-; CHECK-MACHO: .cfi_def_cfa_offset 48
 ; CHECK-MACHO: stp	x20, x19, [sp, #16]
 ;   Check that the frame pointer is created:
 ; CHECK-MACHO: stp	x29, x30, [sp, #32]
 ; CHECK-MACHO: add	x29, sp, #32
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK-MACHO: sub	x9, sp, #80
+; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov    x19, sp
 ;   Check correctness of cfi pseudo-instructions
 ; CHECK-MACHO: .cfi_def_cfa w29, 16
 ; CHECK-MACHO: .cfi_offset w30, -8
@@ -447,13 +448,6 @@ entry:
 ; CHECK-MACHO: .cfi_offset w20, -32
 ; CHECK-MACHO: .cfi_offset w21, -40
 ; CHECK-MACHO: .cfi_offset w22, -48
-;   Check that the stack pointer gets re-aligned to 128
-;   bytes & the base pointer (x19) gets initialized to
-;   this 128-byte aligned area for local variables &
-;   spill slots
-; CHECK-MACHO: sub	x9, sp, #80
-; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
-; CHECK-MACHO: mov    x19, sp
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll b/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll
index 71f8a43a43852d..db5289ac4bdcaa 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fastcc-stackup.ll
@@ -21,8 +21,8 @@ define fastcc i64 @baz() {
 ; CHECK-LABEL: baz:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    mov x7, xzr

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll
index de6bbb351e61ca..89a1db79356896 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll
@@ -10,11 +10,10 @@ define void @consecutive() {
 ; CHECK-MOPS-LABEL: consecutive:
 ; CHECK-MOPS:       // %bb.0: // %entry
 ; CHECK-MOPS-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-MOPS-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-MOPS-NEXT:    .cfi_offset w30, -8
-; CHECK-MOPS-NEXT:    .cfi_offset w29, -16
 ; CHECK-MOPS-NEXT:    sub sp, sp, #2016
 ; CHECK-MOPS-NEXT:    .cfi_def_cfa_offset 2032
+; CHECK-MOPS-NEXT:    .cfi_offset w30, -8
+; CHECK-MOPS-NEXT:    .cfi_offset w29, -16
 ; CHECK-MOPS-NEXT:    mov w8, #1000
 ; CHECK-MOPS-NEXT:    add x9, sp, #8
 ; CHECK-MOPS-NEXT:    adrp x10, .LCPI0_0

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index 52610ceca481d4..0e7014ac8cbb3c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -690,8 +690,8 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) {
 ; GISel-WITHOUT-MOPS-O0-LABEL: memset_size:
 ; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
 ; GISel-WITHOUT-MOPS-O0-NEXT:    sub sp, sp, #32
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 32
 ; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 32
 ; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
 ; GISel-WITHOUT-MOPS-O0-NEXT:    str x1, [sp, #8] // 8-byte Folded Spill
 ; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, w2
@@ -759,8 +759,8 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) {
 ; GISel-WITHOUT-MOPS-O0-LABEL: memset_size_volatile:
 ; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
 ; GISel-WITHOUT-MOPS-O0-NEXT:    sub sp, sp, #32
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 32
 ; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 32
 ; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
 ; GISel-WITHOUT-MOPS-O0-NEXT:    str x1, [sp, #8] // 8-byte Folded Spill
 ; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, w2

diff  --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 09cb0aa28b104f..b1d5e2c4dca349 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -152,10 +152,11 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) {
 ; CHECK-LABEL: lane_mask_nxv32i1_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    index z0.s, #0, #1
 ; CHECK-NEXT:    mov z3.s, w0
 ; CHECK-NEXT:    mov z1.d, z0.d
@@ -175,7 +176,6 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) {
 ; CHECK-NEXT:    cmphi p3.s, p0/z, z4.s, z5.s
 ; CHECK-NEXT:    uqadd z5.s, z6.s, z3.s
 ; CHECK-NEXT:    incw z1.s, all, mul #4
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    cmphi p4.s, p0/z, z4.s, z5.s
 ; CHECK-NEXT:    uqadd z0.s, z0.s, z3.s
 ; CHECK-NEXT:    uqadd z1.s, z1.s, z3.s
@@ -187,7 +187,6 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) {
 ; CHECK-NEXT:    cmphi p4.s, p0/z, z4.s, z1.s
 ; CHECK-NEXT:    uqadd z0.s, z2.s, z3.s
 ; CHECK-NEXT:    uqadd z1.s, z6.s, z3.s
-; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    cmphi p5.s, p0/z, z4.s, z0.s
 ; CHECK-NEXT:    cmphi p0.s, p0/z, z4.s, z1.s
 ; CHECK-NEXT:    uzp1 p3.h, p3.h, p4.h
@@ -207,10 +206,13 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
 ; CHECK-LABEL: lane_mask_nxv32i1_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p7, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p6, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    index z0.d, #0, #1
 ; CHECK-NEXT:    mov z3.d, x0
 ; CHECK-NEXT:    mov z1.d, z0.d
@@ -236,16 +238,13 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
 ; CHECK-NEXT:    uqadd z6.d, z7.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    incd z24.d, all, mul #4
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z5.d
 ; CHECK-NEXT:    cmphi p4.d, p0/z, z4.d, z6.d
 ; CHECK-NEXT:    uqadd z6.d, z24.d, z3.d
 ; CHECK-NEXT:    incd z25.d, all, mul #4
-; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    cmphi p5.d, p0/z, z4.d, z6.d
 ; CHECK-NEXT:    uqadd z6.d, z25.d, z3.d
 ; CHECK-NEXT:    incd z26.d, all, mul #4
-; CHECK-NEXT:    str p6, [sp, #5, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    cmphi p6.d, p0/z, z4.d, z6.d
 ; CHECK-NEXT:    uqadd z6.d, z26.d, z3.d
 ; CHECK-NEXT:    uzp1 p2.s, p2.s, p3.s
@@ -278,7 +277,6 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
 ; CHECK-NEXT:    cmphi p6.d, p0/z, z4.d, z1.d
 ; CHECK-NEXT:    uqadd z0.d, z25.d, z3.d
 ; CHECK-NEXT:    uqadd z1.d, z26.d, z3.d
-; CHECK-NEXT:    str p7, [sp, #4, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    cmphi p7.d, p0/z, z4.d, z0.d
 ; CHECK-NEXT:    cmphi p0.d, p0/z, z4.d, z1.d
 ; CHECK-NEXT:    uzp1 p5.s, p5.s, p6.s

diff  --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index af760714d928e3..75fdac707b834e 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -50,8 +50,8 @@ define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_add_const_add_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -127,8 +127,8 @@ define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_add_const_sub_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -206,8 +206,8 @@ define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_add_const_const_sub_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -283,8 +283,8 @@ define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_sub_const_add_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -360,8 +360,8 @@ define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_sub_const_sub_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -439,8 +439,8 @@ define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_sub_const_const_sub_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
@@ -519,8 +519,8 @@ define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_const_sub_add_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -599,8 +599,8 @@ define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_const_sub_sub_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -678,8 +678,8 @@ define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: vec_const_sub_const_sub_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s

diff  --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 430462fadefa3f..a483d9a8e97d36 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -674,8 +674,6 @@ define dso_local i32 @_extract_crng_crng() {
 ; CHECK-NEXT:    bl crng_reseed
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:  .LBB36_3: // %if.end
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value w30
 ; CHECK-NEXT:    ret
 entry:
   br i1 icmp slt (ptr @_extract_crng_crng, ptr null), label %if.then, label %lor.lhs.false

diff  --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
index 407a8e2120ccc8..94e6a25aa73c15 100644
--- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
+++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll
@@ -452,8 +452,8 @@ define void @caller_in_memory() {
 ; CHECK-LABEL: caller_in_memory:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x8, sp, #8
 ; CHECK-NEXT:    bl return_in_memory
@@ -496,8 +496,8 @@ define void @argument_in_memory() {
 ; CHECK-LABEL: argument_in_memory:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, in_memory_store
 ; CHECK-NEXT:    add x8, x8, :lo12:in_memory_store

diff  --git a/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll b/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll
index 31a2c74d3bd15f..165f3d5bdb9815 100644
--- a/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-custom-call-saved-reg.ll
@@ -72,7 +72,6 @@ define dso_local void @callee() {
 ; CHECK-SAVED-X18: str x18, [sp
 
 ; CHECK-SAVED-ALL: str x18, [sp
-; CHECK-SAVED-ALL-NEXT: .cfi_def_cfa_offset
 ; CHECK-SAVED-ALL-NEXT: stp x15, x14, [sp
 ; CHECK-SAVED-ALL-NEXT: stp x13, x12, [sp
 ; CHECK-SAVED-ALL-NEXT: stp x11, x10, [sp

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 9efe28b383c8df..61e64e219355fc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -75,8 +75,8 @@ define dso_local void @test_fptosi() {
 ; CHECK-LABEL: test_fptosi:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, lhs
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:lhs]
@@ -106,8 +106,8 @@ define dso_local void @test_fptoui() {
 ; CHECK-LABEL: test_fptoui:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, lhs
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:lhs]
@@ -247,8 +247,8 @@ define dso_local i1 @test_setcc3() {
 ; CHECK-LABEL: test_setcc3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, lhs
@@ -342,8 +342,8 @@ define dso_local void @test_round() {
 ; CHECK-LABEL: test_round:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, lhs
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:lhs]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-large-frame.ll b/llvm/test/CodeGen/AArch64/arm64-large-frame.ll
index a5c0fe5ccb8b66..683b5a83651b75 100644
--- a/llvm/test/CodeGen/AArch64/arm64-large-frame.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-large-frame.ll
@@ -12,11 +12,8 @@ define void @test_bigframe() {
   %var3 = alloca i8, i32 20000000
 
 ; CHECK:      sub sp, sp, #4095, lsl #12          // =16773120
-; CHECK-NEXT: .cfi_def_cfa_offset 16773136
 ; CHECK-NEXT: sub sp, sp, #4095, lsl #12          // =16773120
-; CHECK-NEXT: .cfi_def_cfa_offset 33546256
 ; CHECK-NEXT: sub sp, sp, #1575, lsl #12          // =6451200
-; CHECK-NEXT: .cfi_def_cfa_offset 39997456
 ; CHECK-NEXT: sub sp, sp, #2576
 ; CHECK-NEXT: .cfi_def_cfa_offset 40000032
 
@@ -56,7 +53,6 @@ define void @test_mediumframe() {
   %var3 = alloca i8, i32 1000000
 
 ; CHECK:      sub sp, sp, #488, lsl #12           // =1998848
-; CHECK-NEXT: .cfi_def_cfa_offset 1998864
 ; CHECK-NEXT: sub sp, sp, #1168
 ; CHECK-NEXT: .cfi_def_cfa_offset 2000032
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
index ef3fb33476a044..52b09885ebb1c5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -96,8 +96,8 @@ define void @bzero_12_stack() {
 ; CHECK-LABEL: bzero_12_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x0, sp
 ; CHECK-NEXT:    str wzr, [sp, #8]
@@ -116,8 +116,8 @@ define void @bzero_16_stack() {
 ; CHECK-LABEL: bzero_16_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp xzr, x30, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x0, sp
 ; CHECK-NEXT:    str xzr, [sp]
@@ -135,8 +135,8 @@ define void @bzero_20_stack() {
 ; CHECK-LABEL: bzero_20_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x0, sp, #8
 ; CHECK-NEXT:    stp xzr, xzr, [sp, #8]
@@ -155,8 +155,8 @@ define void @bzero_26_stack() {
 ; CHECK-LABEL: bzero_26_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x0, sp
 ; CHECK-NEXT:    stp xzr, xzr, [sp]
@@ -176,8 +176,8 @@ define void @bzero_32_stack() {
 ; CHECK-LABEL: bzero_32_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x0, sp
@@ -196,8 +196,8 @@ define void @bzero_40_stack() {
 ; CHECK-LABEL: bzero_40_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x0, sp
@@ -217,8 +217,8 @@ define void @bzero_64_stack() {
 ; CHECK-LABEL: bzero_64_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x0, sp
@@ -238,8 +238,8 @@ define void @bzero_72_stack() {
 ; CHECK-LABEL: bzero_72_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x0, sp
@@ -260,8 +260,8 @@ define void @bzero_128_stack() {
 ; CHECK-LABEL: bzero_128_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #144
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x0, sp
@@ -283,8 +283,8 @@ define void @bzero_256_stack() {
 ; CHECK-LABEL: bzero_256_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #272
-; CHECK-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
@@ -346,8 +346,8 @@ define void @memset_12_stack() {
 ; CHECK-LABEL: memset_12_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x8, #-6148914691236517206
 ; CHECK-NEXT:    mov x0, sp
@@ -387,8 +387,8 @@ define void @memset_20_stack() {
 ; CHECK-LABEL: memset_20_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x8, #-6148914691236517206
 ; CHECK-NEXT:    add x0, sp, #8
@@ -408,8 +408,8 @@ define void @memset_26_stack() {
 ; CHECK-LABEL: memset_26_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x8, #-6148914691236517206
 ; CHECK-NEXT:    mov x0, sp
@@ -430,8 +430,8 @@ define void @memset_32_stack() {
 ; CHECK-LABEL: memset_32_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.16b, #170
 ; CHECK-NEXT:    mov x0, sp
@@ -450,8 +450,8 @@ define void @memset_40_stack() {
 ; CHECK-LABEL: memset_40_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.16b, #170
 ; CHECK-NEXT:    mov x8, #-6148914691236517206
@@ -472,8 +472,8 @@ define void @memset_64_stack() {
 ; CHECK-LABEL: memset_64_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.16b, #170
 ; CHECK-NEXT:    mov x0, sp
@@ -493,8 +493,8 @@ define void @memset_72_stack() {
 ; CHECK-LABEL: memset_72_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.16b, #170
 ; CHECK-NEXT:    mov x8, #-6148914691236517206
@@ -516,8 +516,8 @@ define void @memset_128_stack() {
 ; CHECK-LABEL: memset_128_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #144
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v0.16b, #170
 ; CHECK-NEXT:    mov x0, sp
@@ -539,8 +539,8 @@ define void @memset_256_stack() {
 ; CHECK-LABEL: memset_256_stack:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #272
-; CHECK-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    movi v0.16b, #170

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
index 7cfd698167bde9..f77df93783b8cc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -807,10 +807,10 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
 ; CHECK-LABEL: srem16x8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1171,10 +1171,10 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
 ; CHECK-LABEL: urem16x8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1469,8 +1469,8 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
 ; CHECK-LABEL: frem2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -1498,8 +1498,8 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-LABEL: frem4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
@@ -1554,8 +1554,8 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
 ; CHECK-LABEL: frem2d64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-NEXT:    mov d0, v0.d[1]
@@ -1712,4 +1712,3 @@ define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
         %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
         ret <2 x double> %val
 }
-

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
index 5a7cc89e1554e9..bb06778696c977 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
@@ -15,8 +15,8 @@ define i32 @spill.DPairReg(ptr %arg1, i32 %arg2) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: // %if.then
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    stp x29, x30, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
@@ -53,8 +53,8 @@ define i16 @spill.DTripleReg(ptr %arg1, i32 %arg2) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB1_2: // %if.then
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
@@ -91,8 +91,8 @@ define i16 @spill.DQuadReg(ptr %arg1, i32 %arg2) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB2_2: // %if.then
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
@@ -129,8 +129,8 @@ define i32 @spill.QPairReg(ptr %arg1, i32 %arg2) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB3_2: // %if.then
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    stp x29, x30, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
@@ -167,8 +167,8 @@ define float @spill.QTripleReg(ptr %arg1, i32 %arg2) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB4_2: // %if.then
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
@@ -205,8 +205,8 @@ define i8 @spill.QQuadReg(ptr %arg1, i32 %arg2) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB5_2: // %if.then
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp

diff  --git a/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll b/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
index a493cd796ed4d3..c58f4b10290974 100644
--- a/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -27,7 +27,6 @@ entry:
 ;
 ; CHECK-LABEL: caller_meta_leaf
 ; CHECK:       sub sp, sp, #48
-; CHECK-NEXT: .cfi_def_cfa_offset 48
 ; CHECK-NEXT:  stp x29, x30, [sp, #32]
 ; CHECK-NEXT:  add x29, sp, #32
 ; CHECK:       Ltmp
@@ -83,4 +82,3 @@ entry:
 declare void @llvm.experimental.stackmap(i64, i32, ...)
 declare void @llvm.experimental.patchpoint.void(i64, i32, ptr, i32, ...)
 declare i64 @llvm.experimental.patchpoint.i64(i64, i32, ptr, i32, ...)
-

diff  --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index c47f9926e936bf..8079c1306d9b6f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -13,7 +13,6 @@ define i32 @foo(i32 %a, i32 %b) {
 ; ENABLE-NEXT:    b.ge LBB0_2
 ; ENABLE-NEXT:  ; %bb.1: ; %true
 ; ENABLE-NEXT:    sub sp, sp, #32
-; ENABLE-NEXT:    .cfi_def_cfa_offset 32
 ; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #16
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -26,15 +25,11 @@ define i32 @foo(i32 %a, i32 %b) {
 ; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; ENABLE-NEXT:    add sp, sp, #32
 ; ENABLE-NEXT:  LBB0_2: ; %false
-; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
-; ENABLE-NEXT:    .cfi_same_value w30
-; ENABLE-NEXT:    .cfi_same_value w29
 ; ENABLE-NEXT:    ret
 ;
 ; DISABLE-LABEL: foo:
 ; DISABLE:       ; %bb.0:
 ; DISABLE-NEXT:    sub sp, sp, #32
-; DISABLE-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #16
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -77,7 +72,6 @@ define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    cbz w0, LBB1_4
 ; ENABLE-NEXT:  ; %bb.1: ; %for.body.preheader
 ; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 32
 ; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #16
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -99,18 +93,12 @@ define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
 ; ENABLE-NEXT:    ret
 ; ENABLE-NEXT:  LBB1_4: ; %if.else
-; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
-; ENABLE-NEXT:    .cfi_same_value w30
-; ENABLE-NEXT:    .cfi_same_value w29
-; ENABLE-NEXT:    .cfi_same_value w19
-; ENABLE-NEXT:    .cfi_same_value w20
 ; ENABLE-NEXT:    lsl w0, w1, #1
 ; ENABLE-NEXT:    ret
 ;
 ; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #16
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -171,7 +159,6 @@ define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
 ; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
 ; ENABLE:       ; %bb.0: ; %entry
 ; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 32
 ; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #16
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -196,7 +183,6 @@ define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
 ; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #16
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -241,7 +227,6 @@ define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    cbz w0, LBB3_4
 ; ENABLE-NEXT:  ; %bb.1: ; %for.body.preheader
 ; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 32
 ; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #16
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -264,18 +249,12 @@ define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
 ; ENABLE-NEXT:    ret
 ; ENABLE-NEXT:  LBB3_4: ; %if.else
-; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
-; ENABLE-NEXT:    .cfi_same_value w30
-; ENABLE-NEXT:    .cfi_same_value w29
-; ENABLE-NEXT:    .cfi_same_value w19
-; ENABLE-NEXT:    .cfi_same_value w20
 ; ENABLE-NEXT:    lsl w0, w1, #1
 ; ENABLE-NEXT:    ret
 ;
 ; DISABLE-LABEL: loopInfoSaveOutsideLoop:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #16
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -571,9 +550,6 @@ define i32 @inlineAsm(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    ldp x20, x19, [sp], #16 ; 16-byte Folded Reload
 ; ENABLE-NEXT:    ret
 ; ENABLE-NEXT:  LBB7_4: ; %if.else
-; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
-; ENABLE-NEXT:    .cfi_same_value w19
-; ENABLE-NEXT:    .cfi_same_value w20
 ; ENABLE-NEXT:    lsl w0, w1, #1
 ; ENABLE-NEXT:    ret
 ;
@@ -629,7 +605,6 @@ define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    cbz w0, LBB8_2
 ; ENABLE-NEXT:  ; %bb.1: ; %if.then
 ; ENABLE-NEXT:    sub sp, sp, #64
-; ENABLE-NEXT:    .cfi_def_cfa_offset 64
 ; ENABLE-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #48
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -645,16 +620,12 @@ define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    add sp, sp, #64
 ; ENABLE-NEXT:    ret
 ; ENABLE-NEXT:  LBB8_2: ; %if.else
-; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
-; ENABLE-NEXT:    .cfi_same_value w30
-; ENABLE-NEXT:    .cfi_same_value w29
 ; ENABLE-NEXT:    lsl w0, w1, #1
 ; ENABLE-NEXT:    ret
 ;
 ; DISABLE-LABEL: callVariadicFunc:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    sub sp, sp, #64
-; DISABLE-NEXT:    .cfi_def_cfa_offset 64
 ; DISABLE-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #48
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -709,7 +680,6 @@ define i32 @noreturn(i8 signext %bad_thing) {
 ; ENABLE-NEXT:    ret
 ; ENABLE-NEXT:  LBB9_2: ; %if.abort
 ; ENABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 16
 ; ENABLE-NEXT:    mov x29, sp
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
 ; ENABLE-NEXT:    .cfi_offset w30, -8
@@ -719,7 +689,6 @@ define i32 @noreturn(i8 signext %bad_thing) {
 ; DISABLE-LABEL: noreturn:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 16
 ; DISABLE-NEXT:    mov x29, sp
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
 ; DISABLE-NEXT:    .cfi_offset w30, -8
@@ -759,7 +728,6 @@ define void @infiniteloop() {
 ; ENABLE-LABEL: infiniteloop:
 ; ENABLE:       ; %bb.0: ; %entry
 ; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 32
 ; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #16
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -787,7 +755,6 @@ define void @infiniteloop() {
 ; DISABLE-LABEL: infiniteloop:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #16
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -834,7 +801,6 @@ define void @infiniteloop2() {
 ; ENABLE-LABEL: infiniteloop2:
 ; ENABLE:       ; %bb.0: ; %entry
 ; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 32
 ; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #16
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -868,7 +834,6 @@ define void @infiniteloop2() {
 ; DISABLE-LABEL: infiniteloop2:
 ; DISABLE:       ; %bb.0: ; %entry
 ; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 32
 ; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #16
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
@@ -1010,20 +975,16 @@ define i32 @stack_realign(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2) {
 ; ENABLE-NEXT:    b.ge LBB13_2
 ; ENABLE-NEXT:  ; %bb.1: ; %true
 ; ENABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 16
 ; ENABLE-NEXT:    mov x29, sp
+; ENABLE-NEXT:    sub x1, sp, #16
+; ENABLE-NEXT:    and sp, x1, #0xffffffffffffffe0
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
 ; ENABLE-NEXT:    .cfi_offset w30, -8
 ; ENABLE-NEXT:    .cfi_offset w29, -16
-; ENABLE-NEXT:    sub x1, sp, #16
-; ENABLE-NEXT:    and sp, x1, #0xffffffffffffffe0
 ; ENABLE-NEXT:    str w0, [sp]
 ; ENABLE-NEXT:    mov sp, x29
 ; ENABLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; ENABLE-NEXT:  LBB13_2: ; %false
-; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
-; ENABLE-NEXT:    .cfi_same_value w30
-; ENABLE-NEXT:    .cfi_same_value w29
 ; ENABLE-NEXT:    str w8, [x2]
 ; ENABLE-NEXT:    str w9, [x3]
 ; ENABLE-NEXT:    ret
@@ -1031,13 +992,12 @@ define i32 @stack_realign(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2) {
 ; DISABLE-LABEL: stack_realign:
 ; DISABLE:       ; %bb.0:
 ; DISABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 16
 ; DISABLE-NEXT:    mov x29, sp
+; DISABLE-NEXT:    sub x9, sp, #16
+; DISABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
 ; DISABLE-NEXT:    .cfi_offset w30, -8
 ; DISABLE-NEXT:    .cfi_offset w29, -16
-; DISABLE-NEXT:    sub x9, sp, #16
-; DISABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; DISABLE-NEXT:    lsl w8, w0, w1
 ; DISABLE-NEXT:    lsl w9, w1, w0
 ; DISABLE-NEXT:    cmp w0, w1
@@ -1077,13 +1037,14 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr
 ; ENABLE-LABEL: stack_realign2:
 ; ENABLE:       ; %bb.0:
 ; ENABLE-NEXT:    stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill
-; ENABLE-NEXT:    .cfi_def_cfa_offset 96
 ; ENABLE-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    stp x24, x23, [sp, #32] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
 ; ENABLE-NEXT:    add x29, sp, #80
+; ENABLE-NEXT:    sub x9, sp, #32
+; ENABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
 ; ENABLE-NEXT:    .cfi_offset w30, -8
 ; ENABLE-NEXT:    .cfi_offset w29, -16
@@ -1097,8 +1058,6 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr
 ; ENABLE-NEXT:    .cfi_offset w26, -80
 ; ENABLE-NEXT:    .cfi_offset w27, -88
 ; ENABLE-NEXT:    .cfi_offset w28, -96
-; ENABLE-NEXT:    sub x9, sp, #32
-; ENABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; ENABLE-NEXT:    add w8, w1, w0
 ; ENABLE-NEXT:    lsl w9, w0, w1
 ; ENABLE-NEXT:    lsl w10, w1, w0
@@ -1137,13 +1096,14 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr
 ; DISABLE-LABEL: stack_realign2:
 ; DISABLE:       ; %bb.0:
 ; DISABLE-NEXT:    stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill
-; DISABLE-NEXT:    .cfi_def_cfa_offset 96
 ; DISABLE-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    stp x24, x23, [sp, #32] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
 ; DISABLE-NEXT:    add x29, sp, #80
+; DISABLE-NEXT:    sub x9, sp, #32
+; DISABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
 ; DISABLE-NEXT:    .cfi_offset w30, -8
 ; DISABLE-NEXT:    .cfi_offset w29, -16
@@ -1157,8 +1117,6 @@ define void @stack_realign2(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr
 ; DISABLE-NEXT:    .cfi_offset w26, -80
 ; DISABLE-NEXT:    .cfi_offset w27, -88
 ; DISABLE-NEXT:    .cfi_offset w28, -96
-; DISABLE-NEXT:    sub x9, sp, #32
-; DISABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; DISABLE-NEXT:    add w8, w1, w0
 ; DISABLE-NEXT:    lsl w9, w0, w1
 ; DISABLE-NEXT:    lsl w10, w1, w0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll b/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll
index 853811d551ab2b..7d067772e4d84e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll
@@ -36,8 +36,8 @@ define void @Precompute_Patch_Values(ptr %Shape) {
 ; CHECK-LABEL: Precompute_Patch_Values:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #400
-; CHECK-NEXT:    .cfi_def_cfa_offset 400
 ; CHECK-NEXT:    stp x28, x27, [sp, #384] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 400
 ; CHECK-NEXT:    .cfi_offset w27, -8
 ; CHECK-NEXT:    .cfi_offset w28, -16
 ; CHECK-NEXT:    ldr q0, [x0, #272]

diff  --git a/llvm/test/CodeGen/AArch64/cfi-sync-async.ll b/llvm/test/CodeGen/AArch64/cfi-sync-async.ll
new file mode 100644
index 00000000000000..721a957d0e0444
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cfi-sync-async.ll
@@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+
+declare i32 @g0(i32)
+declare i32 @g1(ptr)
+
+define i32 @foo_nounwind(i1 %cmp) nounwind {
+; CHECK-LABEL: foo_nounwind:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    sub sp, sp, #2688
+; CHECK-NEXT:    tbz w0, #0, .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %br1
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    bl g0
+; CHECK-NEXT:    add w0, w0, #1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2: // %br2
+; CHECK-NEXT:    mov w8, #42 // =0x2a
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str w8, [sp]
+; CHECK-NEXT:    bl g1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    b g0
+  %ptr = alloca i32, i32 100000, align 4
+  br i1 %cmp, label %br1, label %br2
+
+br1:
+  %call1 = call i32 @g0(i32 0)
+  %rv1 = add i32 %call1, 1
+  ret i32 %rv1
+
+br2:
+  store i32 42, ptr %ptr, align 4
+  %call2 = call i32 @g1(ptr %ptr)
+  %rv2 = tail call i32 @g0(i32 %call2)
+  ret i32 %rv2
+}
+
+define i32 @foo_default(i1 %cmp) {
+; CHECK-LABEL: foo_default:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    sub sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 400016
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    tbz w0, #0, .LBB1_2
+; CHECK-NEXT:  // %bb.1: // %br1
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    bl g0
+; CHECK-NEXT:    add w0, w0, #1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_2: // %br2
+; CHECK-NEXT:    mov w8, #42 // =0x2a
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str w8, [sp]
+; CHECK-NEXT:    bl g1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    b g0
+  %ptr = alloca i32, i32 100000, align 4
+  br i1 %cmp, label %br1, label %br2
+
+br1:
+  %call1 = call i32 @g0(i32 0)
+  %rv1 = add i32 %call1, 1
+  ret i32 %rv1
+
+br2:
+  store i32 42, ptr %ptr, align 4
+  %call2 = call i32 @g1(ptr %ptr)
+  %rv2 = tail call i32 @g0(i32 %call2)
+  ret i32 %rv2
+}
+
+define i32 @foo_uwtable(i1 %cmp) uwtable {
+; CHECK-LABEL: foo_uwtable:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    sub sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    .cfi_def_cfa_offset 397328
+; CHECK-NEXT:    sub sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 400016
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    tbz w0, #0, .LBB2_2
+; CHECK-NEXT:  // %bb.1: // %br1
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    bl g0
+; CHECK-NEXT:    add w0, w0, #1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    .cfi_def_cfa_offset 2704
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB2_2: // %br2
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    mov w8, #42 // =0x2a
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str w8, [sp]
+; CHECK-NEXT:    bl g1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    .cfi_def_cfa_offset 2704
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    b g0
+  %ptr = alloca i32, i32 100000, align 4
+  br i1 %cmp, label %br1, label %br2
+
+br1:
+  %call1 = call i32 @g0(i32 0)
+  %rv1 = add i32 %call1, 1
+  ret i32 %rv1
+
+br2:
+  store i32 42, ptr %ptr, align 4
+  %call2 = call i32 @g1(ptr %ptr)
+  %rv2 = tail call i32 @g0(i32 %call2)
+  ret i32 %rv2
+}
+
+define i32 @foo_uwtable_sync(i1 %cmp) uwtable(sync) {
+; CHECK-LABEL: foo_uwtable_sync:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    sub sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 400016
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    tbz w0, #0, .LBB3_2
+; CHECK-NEXT:  // %bb.1: // %br1
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    bl g0
+; CHECK-NEXT:    add w0, w0, #1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB3_2: // %br2
+; CHECK-NEXT:    mov w8, #42 // =0x2a
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str w8, [sp]
+; CHECK-NEXT:    bl g1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    b g0
+  %ptr = alloca i32, i32 100000, align 4
+  br i1 %cmp, label %br1, label %br2
+
+br1:
+  %call1 = call i32 @g0(i32 0)
+  %rv1 = add i32 %call1, 1
+  ret i32 %rv1
+
+br2:
+  store i32 42, ptr %ptr, align 4
+  %call2 = call i32 @g1(ptr %ptr)
+  %rv2 = tail call i32 @g0(i32 %call2)
+  ret i32 %rv2
+}
+
+define i32 @foo_uwtable_async(i1 %cmp) uwtable(async) {
+; CHECK-LABEL: foo_uwtable_async:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    sub sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    .cfi_def_cfa_offset 397328
+; CHECK-NEXT:    sub sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 400016
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    tbz w0, #0, .LBB4_2
+; CHECK-NEXT:  // %bb.1: // %br1
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    bl g0
+; CHECK-NEXT:    add w0, w0, #1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    .cfi_def_cfa_offset 2704
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB4_2: // %br2
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    mov w8, #42 // =0x2a
+; CHECK-NEXT:    mov x0, sp
+; CHECK-NEXT:    str w8, [sp]
+; CHECK-NEXT:    bl g1
+; CHECK-NEXT:    add sp, sp, #97, lsl #12 // =397312
+; CHECK-NEXT:    .cfi_def_cfa_offset 2704
+; CHECK-NEXT:    add sp, sp, #2688
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    b g0
+  %ptr = alloca i32, i32 100000, align 4
+  br i1 %cmp, label %br1, label %br2
+
+br1:
+  %call1 = call i32 @g0(i32 0)
+  %rv1 = add i32 %call1, 1
+  ret i32 %rv1
+
+br2:
+  store i32 42, ptr %ptr, align 4
+  %call2 = call i32 @g1(ptr %ptr)
+  %rv2 = tail call i32 @g0(i32 %call2)
+  ret i32 %rv2
+}

diff  --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index 185f847646b490..6ce10d1b56d994 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -173,8 +173,8 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
 ; CHECK-LABEL: sign_4xi32_multi_use:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    cmlt v2.4s, v0.4s, #0

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index ec1020b392ccea..15fabf37793c12 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -25,8 +25,8 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) {
 ; OUTLINE-ATOMICS-LABEL: test_return:
 ; OUTLINE-ATOMICS:       ; %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
 ; OUTLINE-ATOMICS-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w30, -8
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w29, -16
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w19, -24
@@ -75,8 +75,8 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
 ; OUTLINE-ATOMICS-LABEL: test_return_bool:
 ; OUTLINE-ATOMICS:       ; %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
 ; OUTLINE-ATOMICS-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w30, -8
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w29, -16
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w19, -24
@@ -120,8 +120,8 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) {
 ; OUTLINE-ATOMICS-LABEL: test_conditional:
 ; OUTLINE-ATOMICS:       ; %bb.0:
 ; OUTLINE-ATOMICS-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
 ; OUTLINE-ATOMICS-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w30, -8
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w29, -16
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w19, -24
@@ -166,9 +166,9 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
 ; CHECK-LABEL: test_conditional2:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24
@@ -222,9 +222,9 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
 ; OUTLINE-ATOMICS-LABEL: test_conditional2:
 ; OUTLINE-ATOMICS:       ; %bb.0: ; %entry
 ; OUTLINE-ATOMICS-NEXT:    stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
-; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 48
 ; OUTLINE-ATOMICS-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
 ; OUTLINE-ATOMICS-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 48
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w30, -8
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w29, -16
 ; OUTLINE-ATOMICS-NEXT:    .cfi_offset w19, -24

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index 1044f65c95ef91..d92bbfd7a21d66 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -153,9 +153,6 @@ define i64 @test_or_unpredictable(i32 %a, i32 %b) {
 ; CHECK-NEXT:    bl _bar
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB4_2: ; %common.ret
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value w30
-; CHECK-NEXT:    .cfi_same_value w29
 ; CHECK-NEXT:    ret
 bb1:
   %0 = icmp eq i32 %a, 0
@@ -189,9 +186,6 @@ define i64 @test_and_unpredictable(i32 %a, i32 %b) {
 ; CHECK-NEXT:    bl _bar
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB5_2: ; %common.ret
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value w30
-; CHECK-NEXT:    .cfi_same_value w29
 ; CHECK-NEXT:    ret
 bb1:
   %0 = icmp ne i32 %a, 0
@@ -212,4 +206,3 @@ declare i64 @bar()
 !0 = !{!"branch_weights", i32 5128, i32 32}
 !1 = !{!"branch_weights", i32 1024, i32 4136}
 !2 = !{}
-

diff  --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 99fcf5e1265c2e..c81eb610609a6f 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -353,9 +353,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -390,9 +390,9 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -457,9 +457,9 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: utest_f32i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -495,9 +495,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-LABEL: ustest_f32i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -575,9 +575,9 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-LABEL: utesth_f16i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -613,9 +613,9 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-LABEL: ustest_f16i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -978,9 +978,9 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -1014,9 +1014,9 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -1075,9 +1075,9 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: utest_f32i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -1112,9 +1112,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-LABEL: ustest_f32i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -1186,9 +1186,9 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-LABEL: utesth_f16i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -1223,9 +1223,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-LABEL: ustest_f16i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 39672d266702a0..147333b7d864a6 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -248,8 +248,8 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-LABEL: test_signed_v1f128_v1i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
@@ -286,10 +286,10 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-LABEL: test_signed_v2f128_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -355,10 +355,10 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-LABEL: test_signed_v3f128_v3i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -445,10 +445,10 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-LABEL: test_signed_v4f128_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #144
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #112] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -808,12 +808,12 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-LABEL: test_signed_v2f32_v2i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -874,12 +874,12 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-LABEL: test_signed_v2f32_v2i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1080,7 +1080,6 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    str d10, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #40] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
@@ -1088,6 +1087,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1184,7 +1184,6 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-LABEL: test_signed_v4f32_v4i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    str d10, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #40] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
@@ -1192,6 +1191,7 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1462,12 +1462,12 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-LABEL: test_signed_v2f64_v2i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1528,12 +1528,12 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-LABEL: test_signed_v2f64_v2i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1828,7 +1828,6 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
@@ -1836,6 +1835,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1935,7 +1935,6 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-LABEL: test_signed_v4f16_v4i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    str d10, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
@@ -1943,6 +1942,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -2560,7 +2560,6 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #192
-; CHECK-NEXT:    .cfi_def_cfa_offset 192
 ; CHECK-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
@@ -2569,6 +2568,7 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 192
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -2757,7 +2757,6 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-LABEL: test_signed_v8f16_v8i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #192
-; CHECK-NEXT:    .cfi_def_cfa_offset 192
 ; CHECK-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
@@ -2766,6 +2765,7 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 192
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 25567e9e76c848..1dfd8a53149ee9 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -248,8 +248,8 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-LABEL: test_unsigned_v1f128_v1i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
@@ -279,9 +279,9 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-LABEL: test_unsigned_v2f128_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -332,9 +332,9 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-LABEL: test_unsigned_v3f128_v3i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -400,9 +400,9 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-LABEL: test_unsigned_v4f128_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -722,10 +722,10 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v2f32_v2i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -774,10 +774,10 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v2f32_v2i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -947,12 +947,12 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v4f32_v4i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x25, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1031,12 +1031,12 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-LABEL: test_unsigned_v4f32_v4i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1262,10 +1262,10 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-LABEL: test_unsigned_v2f64_v2i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1313,10 +1313,10 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-LABEL: test_unsigned_v2f64_v2i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -1564,12 +1564,12 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-LABEL: test_unsigned_v4f16_v4i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x25, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -1651,12 +1651,12 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-LABEL: test_unsigned_v4f16_v4i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -2162,7 +2162,6 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-LABEL: test_unsigned_v8f16_v8i100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #176
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
@@ -2170,6 +2169,7 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -2327,7 +2327,6 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-LABEL: test_unsigned_v8f16_v8i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #176
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
@@ -2335,6 +2334,7 @@ define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24

diff  --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 0b4a615e205689..a16c51541df152 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -401,8 +401,8 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) {
 ; CHECK-LABEL: i128_saturating_mul:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32

diff  --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index 947e73cbfa3450..a4aa7b73e77bef 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -59,8 +59,8 @@ define i32 @replace_isinf_call_f128(fp128 %x) {
 ; CHECK-LABEL: replace_isinf_call_f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldrb w8, [sp, #15]

diff  --git a/llvm/test/CodeGen/AArch64/large-stack-cmp.ll b/llvm/test/CodeGen/AArch64/large-stack-cmp.ll
index 56094903efe7e3..12179d3c944d2e 100644
--- a/llvm/test/CodeGen/AArch64/large-stack-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/large-stack-cmp.ll
@@ -5,16 +5,14 @@ define void @foo() {
 ; CHECK-LABEL: foo:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    stp x28, x27, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #1, lsl #12 ; =4096
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    .cfi_def_cfa_offset 4208
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w27, -24
 ; CHECK-NEXT:    .cfi_offset w28, -32
-; CHECK-NEXT:    sub sp, sp, #1, lsl #12 ; =4096
-; CHECK-NEXT:    .cfi_def_cfa_offset 4128
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 4208
 ; CHECK-NEXT:    adds x8, sp, #1, lsl #12 ; =4096
 ; CHECK-NEXT:    cmn x8, #32
 ; CHECK-NEXT:    b.eq LBB0_2

diff  --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll
index 5534568f72cd36..b3be71cc2bbf5c 100644
--- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll
@@ -13,8 +13,8 @@ define i32 @main() local_unnamed_addr #1 {
 ; CHECK-LABEL: main:
 ; CHECK:       // %bb.0: // %for.body.lr.ph.i.i.i.i.i.i63
 ; CHECK-NEXT:    sub sp, sp, #112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl _Z5setupv
 ; CHECK-NEXT:    movi v0.4s, #1

diff  --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index ea7d102cce08ae..70a638857ce4a9 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -754,9 +754,9 @@ define double @reassociate_adds_from_calls() {
 ; CHECK-STD-LABEL: reassociate_adds_from_calls:
 ; CHECK-STD:       // %bb.0:
 ; CHECK-STD-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-STD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-STD-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
 ; CHECK-STD-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-STD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-STD-NEXT:    .cfi_offset w30, -8
 ; CHECK-STD-NEXT:    .cfi_offset b8, -16
 ; CHECK-STD-NEXT:    .cfi_offset b9, -24
@@ -779,9 +779,9 @@ define double @reassociate_adds_from_calls() {
 ; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls:
 ; CHECK-UNSAFE:       // %bb.0:
 ; CHECK-UNSAFE-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-UNSAFE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-UNSAFE-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
 ; CHECK-UNSAFE-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-UNSAFE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-UNSAFE-NEXT:    .cfi_offset w30, -8
 ; CHECK-UNSAFE-NEXT:    .cfi_offset b8, -16
 ; CHECK-UNSAFE-NEXT:    .cfi_offset b9, -24
@@ -814,9 +814,9 @@ define double @already_reassociated() {
 ; CHECK-LABEL: already_reassociated:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset b8, -16
 ; CHECK-NEXT:    .cfi_offset b9, -24
@@ -844,4 +844,3 @@ define double @already_reassociated() {
   %t2 = fadd double %t0, %t1
   ret double %t2
 }
-

diff  --git a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll
index 60c73384107b97..1db0ce9fef73e0 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll
@@ -9,8 +9,8 @@ define i32 @sink_load_and_copy(i32 %n) {
 ; CHECK-LABEL: sink_load_and_copy:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -63,8 +63,8 @@ define i32 @cant_sink_successive_call(i32 %n) {
 ; CHECK-LABEL: cant_sink_successive_call:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24
@@ -120,8 +120,8 @@ define i32 @cant_sink_successive_store(ptr nocapture readnone %store, i32 %n) {
 ; CHECK-LABEL: cant_sink_successive_store:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24

diff  --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
index a0792b0f309730..aa6e31d6ff21d7 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
@@ -20,7 +20,6 @@ define void @_Z2f1v() minsize {
 ; NOOMIT-LABEL: _Z2f1v:
 ; NOOMIT:       // %bb.0: // %entry
 ; NOOMIT-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; NOOMIT-NEXT:    .cfi_def_cfa_offset 16
 ; NOOMIT-NEXT:    mov x29, sp
 ; NOOMIT-NEXT:    .cfi_def_cfa w29, 16
 ; NOOMIT-NEXT:    .cfi_offset w30, -8
@@ -47,7 +46,6 @@ define void @_Z2f2v() minsize {
 ; NOOMIT-LABEL: _Z2f2v:
 ; NOOMIT:       // %bb.0: // %entry
 ; NOOMIT-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; NOOMIT-NEXT:   .cfi_def_cfa_offset 16
 ; NOOMIT-NEXT:    mov x29, sp
 ; NOOMIT-NEXT:    .cfi_def_cfa w29, 16
 ; NOOMIT-NEXT:    .cfi_offset w30, -8

diff  --git a/llvm/test/CodeGen/AArch64/neg-imm.ll b/llvm/test/CodeGen/AArch64/neg-imm.ll
index 345713d4659f4d..baf1463058664f 100644
--- a/llvm/test/CodeGen/AArch64/neg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/neg-imm.ll
@@ -9,8 +9,8 @@ define void @test(i32 %px) {
 ; CHECK-LABEL: test:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32

diff  --git a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
index 3b3ef4a7ec53ce..b47dd2ede5f9ea 100644
--- a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll
@@ -151,8 +151,8 @@ define i64 @test_and3(i64 %x, i64 %y) {
 ; CHECK-LABEL: test_and3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32

diff  --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
index 99f01883dbfb15..25c9a8ea24ac0b 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -27,8 +27,8 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    b.ne LBB0_47
 ; CHECK-NEXT:  ; %bb.1: ; %if.end
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:  Lloh0:
@@ -260,9 +260,6 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    cbz w8, LBB0_43
 ; CHECK-NEXT:    b LBB0_12
 ; CHECK-NEXT:  LBB0_47:
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value w30
-; CHECK-NEXT:    .cfi_same_value w29
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1

diff  --git a/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll b/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
index ffe4f4d1bbd646..0fe61e38e916d8 100644
--- a/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -16,7 +16,6 @@ define i64 @test_chains() {
 ; CHECK-LABEL: test_chains:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-NEXT:    add x29, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16

diff  --git a/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll b/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
index 23f28b1e9ce1ac..210c8701ea9292 100644
--- a/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
+++ b/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
@@ -24,8 +24,8 @@ define void @test_regular_pointers(ptr %a, ptr %b) {
 ; CHECK-NEXT:    b.gt LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %then
 ; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24
@@ -36,11 +36,6 @@ define void @test_regular_pointers(ptr %a, ptr %b) {
 ; CHECK-NEXT:    str xzr, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB0_2: ; %exit
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value w30
-; CHECK-NEXT:    .cfi_same_value w29
-; CHECK-NEXT:    .cfi_same_value w19
-; CHECK-NEXT:    .cfi_same_value w20
 ; CHECK-NEXT:    ret
 entry:
   %l.a = load double, ptr %a, align 8
@@ -65,8 +60,8 @@ define void @test_byval_pointers(ptr %a, ptr byval(%struct.s) %b) {
 ; CHECK-LABEL: test_byval_pointers:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24
@@ -113,8 +108,8 @@ define void @test_inalloca_pointers(ptr %a, ptr inalloca(%struct.s) %b) {
 ; CHECK-LABEL: test_inalloca_pointers:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24
@@ -161,8 +156,8 @@ define void @test_preallocated_pointers(ptr %a, ptr preallocated(%struct.s) %b)
 ; CHECK-LABEL: test_preallocated_pointers:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24
@@ -203,4 +198,3 @@ then:
 exit:
   ret void
 }
-

diff  --git a/llvm/test/CodeGen/AArch64/sibling-call.ll b/llvm/test/CodeGen/AArch64/sibling-call.ll
index c7a48e3075671b..98323350bb3b34 100644
--- a/llvm/test/CodeGen/AArch64/sibling-call.ll
+++ b/llvm/test/CodeGen/AArch64/sibling-call.ll
@@ -26,8 +26,8 @@ define dso_local void @caller_to8_from0() {
 ; CHECK-LABEL: caller_to8_from0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov w8, #42
 ; CHECK-NEXT:    str x8, [sp]
@@ -58,8 +58,8 @@ define dso_local void @caller_to16_from8([8 x i64], i64 %a) {
 ; CHECK-LABEL: caller_to16_from8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl callee_stack16
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
index 4982f56c5484a7..737df95f1cd23e 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
@@ -10,7 +10,6 @@ define i32 @test_stack_guard_remat2() ssp {
 ; CHECK-LABEL: test_stack_guard_remat2:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-NEXT:    add x29, sp, #48
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16

diff  --git a/llvm/test/CodeGen/AArch64/stack-protector-darwin-got.ll b/llvm/test/CodeGen/AArch64/stack-protector-darwin-got.ll
index 140f2f3e31e6b9..8d144c12ae3f1e 100644
--- a/llvm/test/CodeGen/AArch64/stack-protector-darwin-got.ll
+++ b/llvm/test/CodeGen/AArch64/stack-protector-darwin-got.ll
@@ -9,9 +9,9 @@ define void @test(ptr %a) #0 {
 ; CHECK-LABEL: test:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_offset w19, -24

diff  --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
index 1344ac54e149d0..e45c36bfaf10a3 100644
--- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
@@ -161,8 +161,8 @@ define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) gc "statepoint-
 ; CHECK-LABEL: test_cross_bb:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
@@ -205,8 +205,8 @@ define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" {
 ; CHECK-LABEL: test_attributes:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x18, xzr
 ; CHECK-NEXT:    ldr q0, [sp, #48]

diff  --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll
index 6361b2ec763b8f..90eed07c242bf6 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll
@@ -9,14 +9,8 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    tbz w0, #0, .LBB0_2
 ; CHECK-NEXT:  // %bb.1: // %if.then
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x28, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov x29, sp
-; CHECK-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w28, -16
-; CHECK-NEXT:    .cfi_offset w30, -24
-; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    addvl sp, sp, #-18
 ; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
@@ -46,6 +40,12 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w28, -16
+; CHECK-NEXT:    .cfi_offset w30, -24
+; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -57,7 +57,6 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    rdvl x9, #2
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    add x9, x9, #15
-; CHECK-NEXT:    mov x19, sp
 ; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0
 ; CHECK-NEXT:    sub x8, x8, x9
 ; CHECK-NEXT:    and x0, x8, #0xffffffffffffffe0
@@ -99,19 +98,6 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
 ; CHECK-NEXT:    ldp x28, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
 ; CHECK-NEXT:  .LBB0_2: // %if.end
-; CHECK-NEXT:    .cfi_def_cfa wsp, 0
-; CHECK-NEXT:    .cfi_same_value b8
-; CHECK-NEXT:    .cfi_same_value b9
-; CHECK-NEXT:    .cfi_same_value b10
-; CHECK-NEXT:    .cfi_same_value b11
-; CHECK-NEXT:    .cfi_same_value b12
-; CHECK-NEXT:    .cfi_same_value b13
-; CHECK-NEXT:    .cfi_same_value b14
-; CHECK-NEXT:    .cfi_same_value b15
-; CHECK-NEXT:    .cfi_same_value w19
-; CHECK-NEXT:    .cfi_same_value w28
-; CHECK-NEXT:    .cfi_same_value w30
-; CHECK-NEXT:    .cfi_same_value w29
 ; CHECK-NEXT:    ret
 entry:
   br i1 %cond, label %if.then, label %if.end

diff  --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
index 8b0ea9da945a2c..251e06bad004ba 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -450,11 +450,10 @@ define void @non_sve_caller_non_sve_callee_high_range()  {
 ; CHECK-LABEL: non_sve_caller_non_sve_callee_high_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    movi d0, #0000000000000000
 ; CHECK-NEXT:    fmov s1, #1.00000000
 ; CHECK-NEXT:    fmov s2, #2.00000000
@@ -477,11 +476,10 @@ define void @non_sve_caller_high_range_non_sve_callee_high_range(float %f0, floa
 ; CHECK-LABEL: non_sve_caller_high_range_non_sve_callee_high_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    movi d0, #0000000000000000
 ; CHECK-NEXT:    ld1w { z16.s }, p0/z, [x0]
@@ -509,11 +507,7 @@ define <vscale x 4 x float> @sve_caller_non_sve_callee_high_range(<vscale x 4 x
 ; CHECK-LABEL: sve_caller_non_sve_callee_high_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-18
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG
 ; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
@@ -542,6 +536,10 @@ define <vscale x 4 x float> @sve_caller_non_sve_callee_high_range(<vscale x 4 x
 ; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-3
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 168 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
@@ -550,8 +548,6 @@ define <vscale x 4 x float> @sve_caller_non_sve_callee_high_range(<vscale x 4 x
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
-; CHECK-NEXT:    addvl sp, sp, #-3
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa8, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 168 * VG
 ; CHECK-NEXT:    mov z25.d, z0.d
 ; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    movi d0, #0000000000000000
@@ -610,11 +606,7 @@ define <vscale x 4 x float> @sve_ret_caller_non_sve_callee_high_range()  {
 ; CHECK-LABEL: sve_ret_caller_non_sve_callee_high_range:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-18
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG
 ; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
@@ -643,6 +635,10 @@ define <vscale x 4 x float> @sve_ret_caller_non_sve_callee_high_range()  {
 ; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-2
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
@@ -651,8 +647,6 @@ define <vscale x 4 x float> @sve_ret_caller_non_sve_callee_high_range()  {
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
-; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG
 ; CHECK-NEXT:    movi d0, #0000000000000000
 ; CHECK-NEXT:    fmov s1, #1.00000000
 ; CHECK-NEXT:    fmov s2, #2.00000000

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
index 3572885abc4ca4..e4448df84e47a0 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
@@ -7,10 +7,9 @@ define <4 x i32> @extract_v4i32_nxv16i32_12(<vscale x 16 x i32> %arg) {
 ; CHECK-LABEL: extract_v4i32_nxv16i32_12:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    st1w { z3.s }, p0, [sp, #3, mul vl]
 ; CHECK-NEXT:    st1w { z2.s }, p0, [sp, #2, mul vl]
@@ -28,10 +27,9 @@ define <8 x i16> @extract_v8i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
 ; CHECK-LABEL: extract_v8i16_nxv32i16_8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
@@ -47,10 +45,9 @@ define <4 x i16> @extract_v4i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
 ; CHECK-LABEL: extract_v4i16_nxv32i16_8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    st1h { z3.h }, p0, [sp, #3, mul vl]
 ; CHECK-NEXT:    st1h { z2.h }, p0, [sp, #2, mul vl]
@@ -70,10 +67,9 @@ define <2 x i16> @extract_v2i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
 ; CHECK-LABEL: extract_v2i16_nxv32i16_8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-8
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    add x8, x8, #32
@@ -101,10 +97,9 @@ define <2 x i64> @extract_v2i64_nxv8i64_8(<vscale x 8 x i64> %arg) {
 ; CHECK-LABEL: extract_v2i64_nxv8i64_8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    mov w9, #8
 ; CHECK-NEXT:    sub x8, x8, #2
@@ -129,10 +124,9 @@ define <4 x float> @extract_v4f32_nxv16f32_12(<vscale x 16 x float> %arg) {
 ; CHECK-LABEL: extract_v4f32_nxv16f32_12:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    st1w { z3.s }, p0, [sp, #3, mul vl]
 ; CHECK-NEXT:    st1w { z2.s }, p0, [sp, #2, mul vl]
@@ -150,10 +144,9 @@ define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
 ; CHECK-LABEL: extract_v2f32_nxv16f32_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
 ; CHECK-NEXT:    ldr d0, [sp, #8]
@@ -187,10 +180,9 @@ define <4 x i1> @extract_v4i1_nxv32i1_16(<vscale x 32 x i1> %arg) {
 ; CHECK-LABEL: extract_v4i1_nxv32i1_16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-8
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ptrue p2.b
 ; CHECK-NEXT:    add x8, x8, #16
@@ -245,10 +237,9 @@ define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
 ; CHECK-LABEL: extract_v4i3_nxv32i3_16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-8
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    add x8, x8, #16
@@ -284,10 +275,9 @@ define <2 x i32> @extract_v2i32_nxv16i32_2(<vscale x 16 x i32> %arg) {
 ; CHECK-LABEL: extract_v2i32_nxv16i32_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
 ; CHECK-NEXT:    ldr d0, [sp, #8]
@@ -302,10 +292,9 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
 ; CHECK-LABEL: extract_v4i64_nxv8i64_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
index 5d5c59ae16fcff..bf69d2d1bbe7c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll
@@ -12,10 +12,10 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v
 ; CHECK-LABEL: func1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x25, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w21, -24

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
index abeb88a0a317bb..2e4ef8fb64976a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll
@@ -195,13 +195,12 @@ define void @test_rev_elts_fail(ptr %a) #1 {
 ; CHECK-LABEL: test_rev_elts_fail:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sub x9, sp, #48
 ; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    sub x9, sp, #48
-; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    mov z1.d, z0.d[2]
@@ -264,13 +263,12 @@ define void @test_revv8i32(ptr %a) #0 {
 ; CHECK-LABEL: test_revv8i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sub x9, sp, #48
 ; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    sub x9, sp, #48
-; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    mov w8, v0.s[1]
@@ -382,13 +380,12 @@ define void @test_rev_fail(ptr %a) #1 {
 ; CHECK-LABEL: test_rev_fail:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sub x9, sp, #48
 ; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    sub x9, sp, #48
-; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    mov z1.h, z0.h[8]
@@ -448,13 +445,12 @@ define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 {
 ; CHECK-LABEL: test_revv8i16v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sub x9, sp, #48
 ; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    sub x9, sp, #48
-; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ldr q0, [x1]
 ; CHECK-NEXT:    orr x9, x8, #0x1e

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll
index 8387f6a47aea10..b29ac96c8a7f53 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll
@@ -132,13 +132,12 @@ define void @zip_v4f64(ptr %a, ptr %b) #0 {
 ; VBITS_EQ_512-LABEL: zip_v4f64:
 ; VBITS_EQ_512:       // %bb.0:
 ; VBITS_EQ_512-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; VBITS_EQ_512-NEXT:    .cfi_def_cfa_offset 16
+; VBITS_EQ_512-NEXT:    sub x9, sp, #48
 ; VBITS_EQ_512-NEXT:    mov x29, sp
+; VBITS_EQ_512-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; VBITS_EQ_512-NEXT:    .cfi_def_cfa w29, 16
 ; VBITS_EQ_512-NEXT:    .cfi_offset w30, -8
 ; VBITS_EQ_512-NEXT:    .cfi_offset w29, -16
-; VBITS_EQ_512-NEXT:    sub x9, sp, #48
-; VBITS_EQ_512-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; VBITS_EQ_512-NEXT:    ptrue p0.d, vl4
 ; VBITS_EQ_512-NEXT:    mov x8, sp
 ; VBITS_EQ_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
@@ -650,13 +649,12 @@ define void @zip_vscale2_4(ptr %a, ptr %b) #2 {
 ; CHECK-LABEL: zip_vscale2_4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sub x9, sp, #48
 ; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    sub x9, sp, #48
-; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll
index b3660cadba4d0e..dbb6820ff02d88 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll
@@ -910,13 +910,12 @@ define void @shuffle_ext_invalid(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: shuffle_ext_invalid:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    sub x9, sp, #48
 ; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    sub x9, sp, #48
-; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
index 2c91d1d34e8671..4c28228184d39a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
@@ -43,10 +43,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
 ; CHECK-LABEL: fadda_nxv6f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ptrue p1.d
@@ -68,10 +67,9 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-LABEL: fadda_nxv10f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ptrue p1.d

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 6cb491050dc254..37ea51ba787193 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -914,18 +914,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_0(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -942,18 +941,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_1(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -970,18 +968,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_2(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -998,18 +995,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_3(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -1026,18 +1022,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_4(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -1054,18 +1049,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_5(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -1082,18 +1076,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_6(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_6:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -1110,18 +1103,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_7(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_7:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpklo p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpkhi p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -1138,18 +1130,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_8(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -1166,18 +1157,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_9(<vscale x 16 x i1> %vec, <vsc
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -1194,18 +1184,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_10(<vscale x 16 x i1> %vec, <vs
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_10:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -1222,18 +1211,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_11(<vscale x 16 x i1> %vec, <vs
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_11:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpklo p3.h, p2.b
 ; CHECK-NEXT:    punpkhi p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -1250,18 +1238,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_12(<vscale x 16 x i1> %vec, <vs
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_12:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -1278,18 +1265,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_13(<vscale x 16 x i1> %vec, <vs
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_13:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpklo p4.h, p3.b
 ; CHECK-NEXT:    punpkhi p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p1.s, p3.s
@@ -1306,18 +1292,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_14(<vscale x 16 x i1> %vec, <vs
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_14:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpkhi p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p1.d, p4.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s
@@ -1334,18 +1319,17 @@ define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_15(<vscale x 16 x i1> %vec, <vs
 ; CHECK-LABEL: insert_nxv1i1_nxv16i1_15:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    punpkhi p2.h, p0.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpkhi p3.h, p2.b
 ; CHECK-NEXT:    punpklo p2.h, p2.b
 ; CHECK-NEXT:    punpkhi p4.h, p3.b
 ; CHECK-NEXT:    punpklo p3.h, p3.b
 ; CHECK-NEXT:    punpklo p4.h, p4.b
-; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    uzp1 p1.d, p4.d, p1.d
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    uzp1 p1.s, p3.s, p1.s

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
index 2ebad78b9bbd8e..d54e6259a7429b 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
@@ -19,10 +19,9 @@ define i8 @split_extract_32i8_idx(<vscale x 32 x i8> %a, i32 %idx) {
 ; CHECK-LABEL: split_extract_32i8_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    ptrue p0.b
@@ -44,10 +43,9 @@ define i16 @split_extract_16i16_idx(<vscale x 16 x i16> %a, i32 %idx) {
 ; CHECK-LABEL: split_extract_16i16_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    ptrue p0.h
@@ -69,10 +67,9 @@ define i32 @split_extract_8i32_idx(<vscale x 8 x i32> %a, i32 %idx) {
 ; CHECK-LABEL: split_extract_8i32_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -94,10 +91,9 @@ define i64 @split_extract_8i64_idx(<vscale x 8 x i64> %a, i32 %idx) {
 ; CHECK-LABEL: split_extract_8i64_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -141,10 +137,9 @@ define i16 @split_extract_16i16(<vscale x 16 x i16> %a) {
 ; CHECK-LABEL: split_extract_16i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w9, #128
 ; CHECK-NEXT:    ptrue p0.h
@@ -166,10 +161,9 @@ define i32 @split_extract_16i32(<vscale x 16 x i32> %a) {
 ; CHECK-LABEL: split_extract_16i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w9, #34464
 ; CHECK-NEXT:    movk w9, #1, lsl #16
@@ -194,10 +188,9 @@ define i64 @split_extract_4i64(<vscale x 4 x i64> %a) {
 ; CHECK-LABEL: split_extract_4i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cntw x8
 ; CHECK-NEXT:    mov w9, #10
 ; CHECK-NEXT:    sub x8, x8, #1

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
index 2e4c960c2c4f34..0465da7e7093d0 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
@@ -20,10 +20,9 @@ define <vscale x 32 x i8> @split_insert_32i8_idx(<vscale x 32 x i8> %a, i8 %elt,
 ; CHECK-LABEL: split_insert_32i8_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    ptrue p0.b
@@ -46,10 +45,9 @@ define <vscale x 8 x float> @split_insert_8f32_idx(<vscale x 8 x float> %a, floa
 ; CHECK-LABEL: split_insert_8f32_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -72,10 +70,9 @@ define <vscale x 8 x i64> @split_insert_8i64_idx(<vscale x 8 x i64> %a, i64 %elt
 ; CHECK-LABEL: split_insert_8i64_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -135,10 +132,9 @@ define <vscale x 32 x i16> @split_insert_32i16(<vscale x 32 x i16> %a, i16 %elt)
 ; CHECK-LABEL: split_insert_32i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w9, #128
 ; CHECK-NEXT:    ptrue p0.h
@@ -166,10 +162,9 @@ define <vscale x 8 x i32> @split_insert_8i32(<vscale x 8 x i32> %a, i32 %elt) {
 ; CHECK-LABEL: split_insert_8i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    mov w9, #16960
 ; CHECK-NEXT:    movk w9, #15, lsl #16

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
index 922320e84f83ec..ef0dbd81960986 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
@@ -19,13 +19,12 @@ define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
 ; CHECK-LABEL: andv_nxv64i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    and p1.b, p1/z, p1.b, p3.b
 ; CHECK-NEXT:    and p0.b, p0/z, p0.b, p2.b
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    ptrue p4.b
 ; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    nots p0.b, p4/z, p0.b

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
index 1db3b26827a520..8712985b41588a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
@@ -715,11 +715,11 @@ define void @uzp_v32i8(ptr %a, ptr %b) #0{
 ; CHECK-LABEL: uzp_v32i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    stp d15, d14, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp d13, d12, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp d11, d10, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset b8, -8
 ; CHECK-NEXT:    .cfi_offset b9, -16
 ; CHECK-NEXT:    .cfi_offset b10, -24

diff  --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index 07a55ebb3cb74f..6869cd5d30ed6c 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -110,16 +110,16 @@ define <vscale x 16 x i1> @trunc_i64toi1_split3(<vscale x 16 x i64> %in) {
 ; CHECK-LABEL: trunc_i64toi1_split3:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    and z7.d, z7.d, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    and z6.d, z6.d, #0x1
 ; CHECK-NEXT:    and z5.d, z5.d, #0x1
 ; CHECK-NEXT:    and z4.d, z4.d, #0x1
-; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    cmpne p1.d, p0/z, z7.d, #0
 ; CHECK-NEXT:    cmpne p2.d, p0/z, z6.d, #0
 ; CHECK-NEXT:    cmpne p3.d, p0/z, z5.d, #0
@@ -128,7 +128,6 @@ define <vscale x 16 x i1> @trunc_i64toi1_split3(<vscale x 16 x i64> %in) {
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    and z1.d, z1.d, #0x1
 ; CHECK-NEXT:    and z0.d, z0.d, #0x1
-; CHECK-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    uzp1 p1.s, p2.s, p1.s
 ; CHECK-NEXT:    uzp1 p2.s, p4.s, p3.s
 ; CHECK-NEXT:    cmpne p3.d, p0/z, z3.d, #0
@@ -212,4 +211,3 @@ entry:
   store <vscale x 4 x i16> %1, <vscale x 4 x i16>* %ptr, align 2
   ret void
 }
-

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll
index 79d7e67232313b..827a3c28e1c983 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll
@@ -137,11 +137,10 @@ define <vscale x 16 x i8> @ld1_x2_i8_z0_taken(target("aarch64.svcount") %pn, ptr
 ; CHECK-LABEL: ld1_x2_i8_z0_taken:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1b { z2.b, z3.b }, pn8/z, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -288,11 +287,10 @@ define <vscale x 8 x i16> @ld1_x4_i16_z0_taken(target("aarch64.svcount") %pn, pt
 ; CHECK-LABEL: ld1_x4_i16_z0_taken:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ld1h { z4.h - z7.h }, pn8/z, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -442,11 +440,10 @@ define <vscale x 4 x i32> @ldnt1_x2_i32_z0_taken(target("aarch64.svcount") %pn,
 ; CHECK-LABEL: ldnt1_x2_i32_z0_taken:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ldnt1w { z2.s, z3.s }, pn8/z, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -593,11 +590,10 @@ define <vscale x 2 x i64> @ldnt1_x4_i64_z0_taken(target("aarch64.svcount") %pn,
 ; CHECK-LABEL: ldnt1_x4_i64_z0_taken:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    ldnt1d { z4.d - z7.d }, pn8/z, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index f89dd07b5c9893..d76fb959fd6cef 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -14,7 +14,6 @@ define float @foo(ptr swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-APPLE-NEXT:    mov x29, sp
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
@@ -31,7 +30,6 @@ define float @foo(ptr swifterror %error_ptr_ref) {
 ; CHECK-O0-AARCH64-LABEL: foo:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-O0-AARCH64-NEXT:    mov x29, sp
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
@@ -74,7 +72,6 @@ define float @caller(ptr %error_ref) {
 ; CHECK-APPLE-AARCH64-LABEL: caller:
 ; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #64
-; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
@@ -106,7 +103,6 @@ define float @caller(ptr %error_ref) {
 ; CHECK-O0-AARCH64-LABEL: caller:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #64
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #48
@@ -138,7 +134,6 @@ define float @caller(ptr %error_ref) {
 ; CHECK-APPLE-ARM64_32-LABEL: caller:
 ; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #64
-; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
@@ -170,9 +165,9 @@ define float @caller(ptr %error_ref) {
 ; CHECK-O0-ARM64_32-LABEL: caller:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #64
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
@@ -222,7 +217,6 @@ define float @caller2(ptr %error_ref) {
 ; CHECK-APPLE-AARCH64-LABEL: caller2:
 ; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #80
-; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-APPLE-AARCH64-NEXT:    stp d9, d8, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
@@ -265,7 +259,6 @@ define float @caller2(ptr %error_ref) {
 ; CHECK-O0-AARCH64-LABEL: caller2:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #64
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #48
@@ -306,7 +299,6 @@ define float @caller2(ptr %error_ref) {
 ; CHECK-APPLE-ARM64_32-LABEL: caller2:
 ; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80
-; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-APPLE-ARM64_32-NEXT:    stp d9, d8, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
@@ -349,9 +341,9 @@ define float @caller2(ptr %error_ref) {
 ; CHECK-O0-ARM64_32-LABEL: caller2:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #64
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
@@ -416,7 +408,6 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-LABEL: foo_if:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-APPLE-NEXT:    mov x29, sp
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
@@ -439,7 +430,6 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-O0-AARCH64-LABEL: foo_if:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -468,8 +458,8 @@ define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-O0-ARM64_32-LABEL: foo_if:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
 ; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    cbz w0, LBB3_2
@@ -514,7 +504,6 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-APPLE-LABEL: foo_loop:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    stp d9, d8, [sp, #-48]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    add x29, sp, #32
@@ -555,7 +544,6 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-O0-AARCH64-LABEL: foo_loop:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -599,8 +587,8 @@ define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-O0-ARM64_32-LABEL: foo_loop:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
 ; CHECK-O0-ARM64_32-NEXT:    str s0, [sp, #16] ; 4-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    str w0, [sp, #20] ; 4-byte Folded Spill
@@ -674,7 +662,6 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
 ; CHECK-APPLE-LABEL: foo_sret:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    add x29, sp, #16
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
@@ -697,7 +684,6 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
 ; CHECK-O0-AARCH64-LABEL: foo_sret:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -722,8 +708,8 @@ define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror
 ; CHECK-O0-ARM64_32-LABEL: foo_sret:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
 ; CHECK-O0-ARM64_32-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    str x8, [sp] ; 8-byte Folded Spill
@@ -758,7 +744,6 @@ define float @caller3(ptr %error_ref) {
 ; CHECK-APPLE-AARCH64-LABEL: caller3:
 ; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #80
-; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
@@ -792,7 +777,6 @@ define float @caller3(ptr %error_ref) {
 ; CHECK-O0-AARCH64-LABEL: caller3:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #80
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #64
@@ -826,7 +810,6 @@ define float @caller3(ptr %error_ref) {
 ; CHECK-APPLE-ARM64_32-LABEL: caller3:
 ; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80
-; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
@@ -860,9 +843,9 @@ define float @caller3(ptr %error_ref) {
 ; CHECK-O0-ARM64_32-LABEL: caller3:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #80
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
@@ -920,7 +903,6 @@ define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) {
 ; CHECK-APPLE-AARCH64-LABEL: foo_vararg:
 ; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #48
-; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #32
 ; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -948,7 +930,6 @@ define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) {
 ; CHECK-O0-AARCH64-LABEL: foo_vararg:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -985,7 +966,6 @@ define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) {
 ; CHECK-APPLE-ARM64_32-LABEL: foo_vararg:
 ; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #48
-; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #32
 ; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
@@ -1017,8 +997,8 @@ define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) {
 ; CHECK-O0-ARM64_32-LABEL: foo_vararg:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
 ; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
 ; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
@@ -1087,7 +1067,6 @@ define float @caller4(ptr %error_ref) {
 ; CHECK-APPLE-AARCH64-LABEL: caller4:
 ; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #96
-; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
 ; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
@@ -1126,7 +1105,6 @@ define float @caller4(ptr %error_ref) {
 ; CHECK-O0-AARCH64-LABEL: caller4:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #96
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #80
@@ -1174,7 +1152,6 @@ define float @caller4(ptr %error_ref) {
 ; CHECK-APPLE-ARM64_32-LABEL: caller4:
 ; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80
-; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
@@ -1215,9 +1192,9 @@ define float @caller4(ptr %error_ref) {
 ; CHECK-O0-ARM64_32-LABEL: caller4:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #80
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
@@ -1293,7 +1270,6 @@ define float @tailcallswifterror(ptr swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: tailcallswifterror:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-APPLE-NEXT:    mov x29, sp
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
@@ -1305,7 +1281,6 @@ define float @tailcallswifterror(ptr swifterror %error_ptr_ref) {
 ; CHECK-O0-AARCH64-LABEL: tailcallswifterror:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-O0-AARCH64-NEXT:    mov x29, sp
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
@@ -1330,7 +1305,6 @@ define swiftcc float @tailcallswifterror_swiftcc(ptr swifterror %error_ptr_ref)
 ; CHECK-APPLE-LABEL: tailcallswifterror_swiftcc:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-APPLE-NEXT:    mov x29, sp
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
@@ -1342,7 +1316,6 @@ define swiftcc float @tailcallswifterror_swiftcc(ptr swifterror %error_ptr_ref)
 ; CHECK-O0-AARCH64-LABEL: tailcallswifterror_swiftcc:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-O0-AARCH64-NEXT:    mov x29, sp
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
@@ -1368,7 +1341,6 @@ define swiftcc void @swifterror_clobber(ptr nocapture swifterror %err) {
 ; CHECK-APPLE-LABEL: swifterror_clobber:
 ; CHECK-APPLE:       ; %bb.0:
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-APPLE-NEXT:    mov x29, sp
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
@@ -1384,7 +1356,6 @@ define swiftcc void @swifterror_clobber(ptr nocapture swifterror %err) {
 ; CHECK-O0-AARCH64-LABEL: swifterror_clobber:
 ; CHECK-O0-AARCH64:       ; %bb.0:
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -1418,7 +1389,6 @@ define swiftcc void @swifterror_reg_clobber(ptr nocapture %err) {
 ; CHECK-APPLE-LABEL: swifterror_reg_clobber:
 ; CHECK-APPLE:       ; %bb.0:
 ; CHECK-APPLE-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    add x29, sp, #16
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
@@ -1436,7 +1406,6 @@ define swiftcc void @swifterror_reg_clobber(ptr nocapture %err) {
 ; CHECK-O0-AARCH64-LABEL: swifterror_reg_clobber:
 ; CHECK-O0-AARCH64:       ; %bb.0:
 ; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -1470,7 +1439,6 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s
 ; CHECK-APPLE-LABEL: params_in_reg:
 ; CHECK-APPLE:       ; %bb.0:
 ; CHECK-APPLE-NEXT:    sub sp, sp, #112
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-APPLE-NEXT:    stp x21, x28, [sp, #8] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    stp x27, x26, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    stp x25, x24, [sp, #48] ; 16-byte Folded Spill
@@ -1532,7 +1500,6 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s
 ; CHECK-O0-AARCH64-LABEL: params_in_reg:
 ; CHECK-O0-AARCH64:       ; %bb.0:
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #128
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #96] ; 8-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #112] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #112
@@ -1590,8 +1557,8 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr s
 ; CHECK-O0-ARM64_32-LABEL: params_in_reg:
 ; CHECK-O0-ARM64_32:       ; %bb.0:
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #112
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-O0-ARM64_32-NEXT:    stp x20, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w20, -16
 ; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #80] ; 8-byte Folded Spill
@@ -1651,7 +1618,6 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_
 ; CHECK-APPLE-LABEL: params_and_return_in_reg:
 ; CHECK-APPLE:       ; %bb.0:
 ; CHECK-APPLE-NEXT:    sub sp, sp, #128
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-APPLE-NEXT:    stp x20, x28, [sp, #24] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    stp x27, x26, [sp, #48] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    stp x25, x24, [sp, #64] ; 16-byte Folded Spill
@@ -1742,7 +1708,6 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_
 ; CHECK-O0-AARCH64-LABEL: params_and_return_in_reg:
 ; CHECK-O0-AARCH64:       ; %bb.0:
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #272
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-O0-AARCH64-NEXT:    stp x28, x20, [sp, #240] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #256] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #256
@@ -1850,9 +1815,9 @@ define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_
 ; CHECK-O0-ARM64_32-LABEL: params_and_return_in_reg:
 ; CHECK-O0-ARM64_32:       ; %bb.0:
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #272
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-O0-ARM64_32-NEXT:    str x28, [sp, #240] ; 8-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    stp x20, x30, [sp, #256] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 272
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w20, -16
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w28, -32
@@ -1969,7 +1934,6 @@ define swiftcc void @tailcall_from_swifterror(ptr swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: tailcall_from_swifterror:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    add x29, sp, #16
 ; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
@@ -1987,7 +1951,6 @@ define swiftcc void @tailcall_from_swifterror(ptr swifterror %error_ptr_ref) {
 ; CHECK-O0-AARCH64-LABEL: tailcall_from_swifterror:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
 ; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
@@ -2004,8 +1967,8 @@ define swiftcc void @tailcall_from_swifterror(ptr swifterror %error_ptr_ref) {
 ; CHECK-O0-ARM64_32-LABEL: tailcall_from_swifterror:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
 ; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    mov x0, xzr
@@ -2026,7 +1989,6 @@ define swiftcc ptr @testAssign(ptr %error_ref) {
 ; CHECK-APPLE-LABEL: testAssign:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
 ; CHECK-APPLE-NEXT:    sub sp, sp, #48
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-APPLE-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-APPLE-NEXT:    add x29, sp, #32
@@ -2046,7 +2008,6 @@ define swiftcc ptr @testAssign(ptr %error_ref) {
 ; CHECK-O0-AARCH64-LABEL: testAssign:
 ; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
 ; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48
-; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32
@@ -2069,9 +2030,9 @@ define swiftcc ptr @testAssign(ptr %error_ref) {
 ; CHECK-O0-ARM64_32-LABEL: testAssign:
 ; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
 ; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48
-; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
 ; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
 ; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24

diff  --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
index 79d7f4a4200561..276ba08834a0a1 100644
--- a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
@@ -53,7 +53,6 @@ body:             |
   ; CHECK:   successors: %bb.1, %bb.2
   ; CHECK:   liveins: $q0, $q22, $q23, $q20, $q21, $q18, $q19, $q16, $q17, $q14, $q15, $q12, $q13, $q10, $q11, $q8, $q9, $lr, $fp
   ; CHECK:   $sp = frame-setup SUBXri $sp, 304, 0
-  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 304
   ; CHECK:   frame-setup STPQi killed $q23, killed $q22, $sp, 2 :: (store (s128) into %stack.19), (store (s128) into %stack.18)
   ; CHECK:   frame-setup STPQi killed $q21, killed $q20, $sp, 4 :: (store (s128) into %stack.17), (store (s128) into %stack.16)
   ; CHECK:   frame-setup STPQi killed $q19, killed $q18, $sp, 6 :: (store (s128) into %stack.15), (store (s128) into %stack.14)
@@ -63,6 +62,7 @@ body:             |
   ; CHECK:   frame-setup STPQi killed $q11, killed $q10, $sp, 14 :: (store (s128) into %stack.7), (store (s128) into %stack.6)
   ; CHECK:   frame-setup STPQi killed $q9, killed $q8, $sp, 16 :: (store (s128) into %stack.5), (store (s128) into %stack.4)
   ; CHECK:   frame-setup STPXi killed $fp, killed $lr, $sp, 36 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
+  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 304
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $w30, -8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $w29, -16
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $b8, -32


        


More information about the llvm-commits mailing list