[clang] 9a5bc72 - [AArch64][SME] Remove SelectionDAG SME ABI lowering (#190950)

via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 13 01:52:25 PDT 2026


Author: Benjamin Maxwell
Date: 2026-04-13T09:52:18+01:00
New Revision: 9a5bc720257024b44cf9f4c63741be53c4fba4c2

URL: https://github.com/llvm/llvm-project/commit/9a5bc720257024b44cf9f4c63741be53c4fba4c2
DIFF: https://github.com/llvm/llvm-project/commit/9a5bc720257024b44cf9f4c63741be53c4fba4c2.diff

LOG: [AArch64][SME] Remove SelectionDAG SME ABI lowering (#190950)

This patch removes the `-aarch64-new-sme-abi=<true/false>` option (which
has been defaulted to "true" since LLVM 22), and removes the Selection
DAG lowering for the SME ABI.

There should be no functional changes for the default path
(`-aarch64-new-sme-abi=true`).

Added: 
    

Modified: 
    clang/test/CodeGen/AArch64/sme-remarks.c
    llvm/docs/AArch64SME.rst
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Target/AArch64/AArch64.h
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
    llvm/lib/Target/AArch64/AArch64SMEAttributes.cpp
    llvm/lib/Target/AArch64/AArch64SMEAttributes.h
    llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
    llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
    llvm/lib/Target/AArch64/AArch64TargetMachine.h
    llvm/lib/Target/AArch64/CMakeLists.txt
    llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
    llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll
    llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
    llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
    llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll
    llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
    llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
    llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
    llvm/test/CodeGen/AArch64/sme-new-za-function.ll
    llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
    llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
    llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
    llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
    llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll
    llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
    llvm/test/CodeGen/AArch64/sme-zt0-state.ll
    llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
    llvm/test/Verifier/sme-attributes.ll
    llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
    llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn

Removed: 
    llvm/lib/Target/AArch64/SMEABIPass.cpp
    llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll


################################################################################
diff  --git a/clang/test/CodeGen/AArch64/sme-remarks.c b/clang/test/CodeGen/AArch64/sme-remarks.c
index f7a1f33f3372d..d220c8e076661 100644
--- a/clang/test/CodeGen/AArch64/sme-remarks.c
+++ b/clang/test/CodeGen/AArch64/sme-remarks.c
@@ -1,6 +1,4 @@
 // REQUIRES: aarch64-registered-target
-
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -mllvm -aarch64-new-sme-abi=false -Rpass-analysis=sme -verify=expected-sdag %s -S -o /dev/null
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -Rpass-analysis=sme -verify %s -S -o /dev/null %s
 
 void private_za_callee_a();
@@ -10,32 +8,26 @@ void private_za_callee_c();
 void test_za_merge_paths(int a) __arm_inout("za") {
   // expected-remark at +1 {{lazy save of ZA emitted in 'test_za_merge_paths'}}
   if (a != 0)
-    // expected-sdag-remark at +2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
     // expected-remark at +1 {{call to 'private_za_callee_a' requires ZA save}}
     private_za_callee_a();
   else
-    // expected-sdag-remark at +2 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
     // expected-remark at +1 {{call to 'private_za_callee_b' requires ZA save}}
     private_za_callee_b();
-  // expected-sdag-remark at +3 {{call from 'test_za_merge_paths' to 'unknown callee' sets up a lazy save for ZA}}
-  /// The new lowering won't report this call as the save is already needed due
-  /// to the call to `private_za_callee_a/b()` calls on both paths to this call.
+  /// The analysis won't report this call as the save is already needed due to
+  /// the call to `private_za_callee_a/b()` calls on both paths to this call.
   private_za_callee_c();
 }
 
 void test_lazy_save_multiple_paths(int a) __arm_inout("za") {
   // expected-remark at +1 {{lazy save of ZA emitted in 'test_lazy_save_multiple_paths'}}
   if (a != 0)
-    // expected-sdag-remark at +2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
     // expected-remark at +1 {{call to 'private_za_callee_a' requires ZA save}}
     private_za_callee_a();
   else {
-    // expected-sdag-remark at +2 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
     // expected-remark at +1 {{call to 'private_za_callee_b' requires ZA save}}
     private_za_callee_b();
-    // expected-sdag-remark at +3 {{call from 'test_lazy_save_multiple_paths' to 'unknown callee' sets up a lazy save for ZA}}
-    /// The new lowering won't report this call as the save is already needed
-    /// due to the call to `private_za_callee_b()`.
+    /// The analysis won't report this call as the save is already needed due
+    /// to the call to `private_za_callee_b()`.
     private_za_callee_c();
   }
 }

diff  --git a/llvm/docs/AArch64SME.rst b/llvm/docs/AArch64SME.rst
index 327f9dcb232c1..d633dc2fbce2d 100644
--- a/llvm/docs/AArch64SME.rst
+++ b/llvm/docs/AArch64SME.rst
@@ -49,6 +49,10 @@ C/C++-level ACLE attributes:
 ``aarch64_expanded_pstate_za``
   is used for functions with ``__arm_new_za``
 
+``aarch64_zt0_undef``
+  Deprecated. Previously used internally to prevent spills/reloads of ZT0 in
+  some cases.
+
 Clang must ensure that the above attributes are added both to the
 function's declaration/definition as well as to their call-sites. This is
 important for calls to attributed function pointers, where no

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index a86e8fdb7d73a..d4ade9c7ce534 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3122,9 +3122,6 @@ void Verifier::visitFunction(const Function &F) {
   Check(!Attrs.hasAttrSomewhere(Attribute::ElementType),
         "Attribute 'elementtype' can only be applied to a callsite.", &F);
 
-  Check(!Attrs.hasFnAttr("aarch64_zt0_undef"),
-        "Attribute 'aarch64_zt0_undef' can only be applied to a callsite.");
-
   if (Attrs.hasFnAttr(Attribute::Naked))
     for (const Argument &Arg : F.args())
       Check(Arg.use_empty(), "cannot use argument of naked function", &Arg);

diff  --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 3d2838291d7e8..81f766080c82a 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -66,7 +66,6 @@ FunctionPass *createAArch64PostCoalescerPass();
 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
 
 FunctionPass *createAArch64CollectLOHPass();
-FunctionPass *createSMEABIPass();
 FunctionPass *createSMEPeepholeOptPass();
 FunctionPass *createMachineSMEABIPass(CodeGenOptLevel);
 FunctionPass *createAArch64SRLTDefineSuperRegsPass();
@@ -145,8 +144,7 @@ void initializeAArch64StackTaggingPreRAPass(PassRegistry &);
 void initializeAArch64StorePairSuppressPass(PassRegistry&);
 void initializeFalkorHWPFFixPass(PassRegistry&);
 void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
-void initializeLDTLSCleanupPass(PassRegistry&);
-void initializeSMEABIPass(PassRegistry &);
+void initializeLDTLSCleanupPass(PassRegistry &);
 void initializeSMEPeepholeOptPass(PassRegistry &);
 void initializeMachineSMEABIPass(PassRegistry &);
 void initializeAArch64SRLTDefineSuperRegsPass(PassRegistry &);

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 96ccd38e34117..ece24767bdbb9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3260,136 +3260,6 @@ AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
   return BB;
 }
 
-MachineBasicBlock *
-AArch64TargetLowering::EmitInitTPIDR2Object(MachineInstr &MI,
-                                            MachineBasicBlock *BB) const {
-  MachineFunction *MF = BB->getParent();
-  MachineFrameInfo &MFI = MF->getFrameInfo();
-  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
-  TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
-  if (TPIDR2.Uses > 0) {
-    // Note: This case just needs to do `SVL << 48`. It is not implemented as we
-    // generally don't support big-endian SVE/SME.
-    if (!Subtarget->isLittleEndian())
-      reportFatalInternalError(
-          "TPIDR2 block initialization is not supported on big-endian targets");
-
-    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-    // Store buffer pointer and num_za_save_slices.
-    // Bytes 10-15 are implicitly zeroed.
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STPXi))
-        .addReg(MI.getOperand(0).getReg())
-        .addReg(MI.getOperand(1).getReg())
-        .addFrameIndex(TPIDR2.FrameIndex)
-        .addImm(0);
-  } else
-    MFI.RemoveStackObject(TPIDR2.FrameIndex);
-
-  BB->remove_instr(&MI);
-  return BB;
-}
-
-MachineBasicBlock *
-AArch64TargetLowering::EmitAllocateZABuffer(MachineInstr &MI,
-                                            MachineBasicBlock *BB) const {
-  MachineFunction *MF = BB->getParent();
-  MachineFrameInfo &MFI = MF->getFrameInfo();
-  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
-  // TODO This function grows the stack with a subtraction, which doesn't work
-  // on Windows. Some refactoring to share the functionality in
-  // LowerWindowsDYNAMIC_STACKALLOC will be required once the Windows ABI
-  // supports SME
-  assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
-         "Lazy ZA save is not yet supported on Windows");
-
-  TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
-
-  if (TPIDR2.Uses > 0) {
-    const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-    MachineRegisterInfo &MRI = MF->getRegInfo();
-
-    // The SUBXrs below won't always be emitted in a form that accepts SP
-    // directly
-    Register SP = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP)
-        .addReg(AArch64::SP);
-
-    // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
-    auto Size = MI.getOperand(1).getReg();
-    auto Dest = MI.getOperand(0).getReg();
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest)
-        .addReg(Size)
-        .addReg(Size)
-        .addReg(SP);
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
-            AArch64::SP)
-        .addReg(Dest);
-
-    // We have just allocated a variable sized object, tell this to PEI.
-    MFI.CreateVariableSizedObject(Align(16), nullptr);
-  }
-
-  BB->remove_instr(&MI);
-  return BB;
-}
-
-// TODO: Find a way to merge this with EmitAllocateZABuffer.
-MachineBasicBlock *
-AArch64TargetLowering::EmitAllocateSMESaveBuffer(MachineInstr &MI,
-                                                 MachineBasicBlock *BB) const {
-  MachineFunction *MF = BB->getParent();
-  MachineFrameInfo &MFI = MF->getFrameInfo();
-  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
-  assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() &&
-         "Lazy ZA save is not yet supported on Windows");
-
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-  if (FuncInfo->isSMESaveBufferUsed()) {
-    // Allocate a buffer object of the size given by MI.getOperand(1).
-    auto Size = MI.getOperand(1).getReg();
-    auto Dest = MI.getOperand(0).getReg();
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::SUBXrx64), AArch64::SP)
-        .addReg(AArch64::SP)
-        .addReg(Size)
-        .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0));
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), Dest)
-        .addReg(AArch64::SP);
-
-    // We have just allocated a variable sized object, tell this to PEI.
-    MFI.CreateVariableSizedObject(Align(16), nullptr);
-  } else
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
-            MI.getOperand(0).getReg());
-
-  BB->remove_instr(&MI);
-  return BB;
-}
-
-MachineBasicBlock *
-AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
-                                          MachineBasicBlock *BB) const {
-  // If the buffer is used, emit a call to __arm_sme_state_size()
-  MachineFunction *MF = BB->getParent();
-  AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-  if (FuncInfo->isSMESaveBufferUsed()) {
-    RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
-    const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
-        .addExternalSymbol(getLibcallName(LC))
-        .addReg(AArch64::X0, RegState::ImplicitDefine)
-        .addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
-            MI.getOperand(0).getReg())
-        .addReg(AArch64::X0);
-  } else
-    BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
-            MI.getOperand(0).getReg())
-        .addReg(AArch64::XZR);
-  BB->remove_instr(&MI);
-  return BB;
-}
-
 MachineBasicBlock *
 AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI,
                                          MachineBasicBlock *BB) const {
@@ -3524,14 +3394,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
     MI.dump();
 #endif
     llvm_unreachable("Unexpected instruction for custom inserter!");
-  case AArch64::InitTPIDR2Obj:
-    return EmitInitTPIDR2Object(MI, BB);
-  case AArch64::AllocateZABuffer:
-    return EmitAllocateZABuffer(MI, BB);
-  case AArch64::AllocateSMESaveBuffer:
-    return EmitAllocateSMESaveBuffer(MI, BB);
-  case AArch64::GetSMESaveSize:
-    return EmitGetSMESaveSize(MI, BB);
   case AArch64::EntryPStateSM:
     return EmitEntryPStateSM(MI, BB);
   case AArch64::F128CSEL:
@@ -8748,81 +8610,6 @@ static bool isPassedInFPR(EVT VT) {
          (VT.isFloatingPoint() && !VT.isScalableVector());
 }
 
-static SDValue getZT0FrameIndex(MachineFrameInfo &MFI,
-                                AArch64FunctionInfo &FuncInfo,
-                                SelectionDAG &DAG) {
-  if (!FuncInfo.hasZT0SpillSlotIndex())
-    FuncInfo.setZT0SpillSlotIndex(MFI.CreateSpillStackObject(64, Align(16)));
-
-  return DAG.getFrameIndex(
-      FuncInfo.getZT0SpillSlotIndex(),
-      DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
-}
-
-// Emit a call to __arm_sme_save or __arm_sme_restore.
-static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
-                                       SelectionDAG &DAG,
-                                       AArch64FunctionInfo *Info, SDLoc DL,
-                                       SDValue Chain, bool IsSave) {
-  MachineFunction &MF = DAG.getMachineFunction();
-  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
-  FuncInfo->setSMESaveBufferUsed();
-  TargetLowering::ArgListTy Args;
-  Args.emplace_back(
-      DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64),
-      PointerType::getUnqual(*DAG.getContext()));
-
-  RTLIB::Libcall LC =
-      IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
-  RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(LC);
-  SDValue Callee =
-      DAG.getExternalSymbol(LCImpl, TLI.getPointerTy(DAG.getDataLayout()));
-  auto *RetTy = Type::getVoidTy(*DAG.getContext());
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
-      DAG.getLibcalls().getLibcallImplCallingConv(LCImpl), RetTy, Callee,
-      std::move(Args));
-  return TLI.LowerCallTo(CLI).second;
-}
-
-static SDValue emitRestoreZALazySave(SDValue Chain, SDLoc DL,
-                                     const AArch64TargetLowering &TLI,
-                                     const AArch64RegisterInfo &TRI,
-                                     AArch64FunctionInfo &FuncInfo,
-                                     SelectionDAG &DAG) {
-  // Conditionally restore the lazy save using a pseudo node.
-  RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
-  TPIDR2Object &TPIDR2 = FuncInfo.getTPIDR2Obj();
-
-  RTLIB::LibcallImpl LibcallImpl = DAG.getLibcalls().getLibcallImpl(LC);
-  SDValue RegMask = DAG.getRegisterMask(TRI.getCallPreservedMask(
-      DAG.getMachineFunction(),
-      DAG.getLibcalls().getLibcallImplCallingConv(LibcallImpl)));
-  SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
-      LibcallImpl, TLI.getPointerTy(DAG.getDataLayout()));
-  SDValue TPIDR2_EL0 = DAG.getNode(
-      ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Chain,
-      DAG.getTargetConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
-  // Copy the address of the TPIDR2 block into X0 before 'calling' the
-  // RESTORE_ZA pseudo.
-  SDValue Glue;
-  SDValue TPIDR2Block = DAG.getFrameIndex(
-      TPIDR2.FrameIndex,
-      DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
-  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, TPIDR2Block, Glue);
-  Chain =
-      DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
-                  {Chain, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64),
-                   RestoreRoutine, RegMask, Chain.getValue(1)});
-  // Finally reset the TPIDR2_EL0 register to 0.
-  Chain = DAG.getNode(
-      ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
-      DAG.getTargetConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
-      DAG.getConstant(0, DL, MVT::i64));
-  TPIDR2.Uses++;
-  return Chain;
-}
-
 SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
                                                SelectionDAG &DAG) const {
   assert(Chain.getOpcode() == ISD::EntryToken && "Unexpected Chain value");
@@ -8830,8 +8617,6 @@ SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
 
   MachineFunction &MF = DAG.getMachineFunction();
   auto &FuncInfo = *MF.getInfo<AArch64FunctionInfo>();
-  auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
-  const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
 
   SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
 
@@ -8858,34 +8643,6 @@ SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
   else if (SMEFnAttrs.hasStreamingCompatibleInterface())
     Chain = changeStreamingMode(DAG, DL, /*Enable=*/true, Chain, Glue,
                                 AArch64SME::IfCallerIsStreaming);
-
-  if (getTM().useNewSMEABILowering())
-    return Chain;
-
-  if (SMEFnAttrs.hasAgnosticZAInterface()) {
-    // Restore full ZA
-    Chain = emitSMEStateSaveRestore(*this, DAG, &FuncInfo, DL, Chain,
-                                    /*IsSave=*/false);
-  } else if (SMEFnAttrs.hasZAState() || SMEFnAttrs.hasZT0State()) {
-    // SMSTART ZA
-    Chain = DAG.getNode(
-        AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain,
-        DAG.getTargetConstant(int32_t(AArch64SVCR::SVCRZA), DL, MVT::i32));
-
-    // Restore ZT0
-    if (SMEFnAttrs.hasZT0State()) {
-      SDValue ZT0FrameIndex =
-          getZT0FrameIndex(MF.getFrameInfo(), FuncInfo, DAG);
-      Chain =
-          DAG.getNode(AArch64ISD::RESTORE_ZT, DL, DAG.getVTList(MVT::Other),
-                      {Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
-    }
-
-    // Restore ZA
-    if (SMEFnAttrs.hasZAState())
-      Chain = emitRestoreZALazySave(Chain, DL, *this, TRI, FuncInfo, DAG);
-  }
-
   return Chain;
 }
 
@@ -9300,91 +9057,38 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
   if (Subtarget->hasCustomCallingConv())
     Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
 
-  if (getTM().useNewSMEABILowering()) {
-    if (Subtarget->isTargetWindows() || hasInlineStackProbe(MF)) {
-      SDValue Size;
-      if (Attrs.hasZAState()) {
-        SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
-                                  DAG.getConstant(1, DL, MVT::i32));
-        Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
-      } else if (Attrs.hasAgnosticZAInterface()) {
-        RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
-        RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(LC);
-
-        SDValue Callee =
-            DAG.getExternalSymbol(LCImpl, getPointerTy(DAG.getDataLayout()));
-        auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.getContext());
-        TargetLowering::CallLoweringInfo CLI(DAG);
-        CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
-            DAG.getLibcalls().getLibcallImplCallingConv(LCImpl), RetTy, Callee,
-            {});
-        std::tie(Size, Chain) = LowerCallTo(CLI);
-      }
-      if (Size) {
-        SDValue Buffer = DAG.getNode(
-            ISD::DYNAMIC_STACKALLOC, DL, DAG.getVTList(MVT::i64, MVT::Other),
-            {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
-        Chain = Buffer.getValue(1);
-
-        Register BufferPtr =
-            MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
-        Chain = DAG.getCopyToReg(Chain, DL, BufferPtr, Buffer);
-        Chain = DAG.getNode(AArch64ISD::SME_STATE_ALLOC, DL,
-                            DAG.getVTList(MVT::Other), Chain);
-        FuncInfo->setEarlyAllocSMESaveBuffer(BufferPtr);
-        MFI.CreateVariableSizedObject(Align(16), nullptr);
-      }
-    }
-  } else {
-    // Old SME ABI lowering (deprecated):
-    // Create a 16 Byte TPIDR2 object. The dynamic buffer
-    // will be expanded and stored in the static object later using a
-    // pseudonode.
+  if (Subtarget->isTargetWindows() || hasInlineStackProbe(MF)) {
+    SDValue Size;
     if (Attrs.hasZAState()) {
-      TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
-      TPIDR2.FrameIndex = MFI.CreateStackObject(16, Align(16), false);
       SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
                                 DAG.getConstant(1, DL, MVT::i32));
-      SDValue Buffer;
-      if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
-        Buffer = DAG.getNode(AArch64ISD::ALLOCATE_ZA_BUFFER, DL,
-                             DAG.getVTList(MVT::i64, MVT::Other), {Chain, SVL});
-      } else {
-        SDValue Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
-        Buffer = DAG.getNode(ISD::DYNAMIC_STACKALLOC, DL,
-                             DAG.getVTList(MVT::i64, MVT::Other),
-                             {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
-        MFI.CreateVariableSizedObject(Align(16), nullptr);
-      }
-      SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
-                                            DAG.getConstant(1, DL, MVT::i32));
-      Chain = DAG.getNode(
-          AArch64ISD::INIT_TPIDR2OBJ, DL, DAG.getVTList(MVT::Other),
-          {/*Chain*/ Buffer.getValue(1), /*Buffer ptr*/ Buffer.getValue(0),
-           /*Num save slices*/ NumZaSaveSlices});
+      Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
     } else if (Attrs.hasAgnosticZAInterface()) {
-      // Call __arm_sme_state_size().
-      SDValue BufferSize =
-          DAG.getNode(AArch64ISD::GET_SME_SAVE_SIZE, DL,
-                      DAG.getVTList(MVT::i64, MVT::Other), Chain);
-      Chain = BufferSize.getValue(1);
-      SDValue Buffer;
-      if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) {
-        Buffer = DAG.getNode(AArch64ISD::ALLOC_SME_SAVE_BUFFER, DL,
-                             DAG.getVTList(MVT::i64, MVT::Other),
-                             {Chain, BufferSize});
-      } else {
-        // Allocate space dynamically.
-        Buffer = DAG.getNode(
-            ISD::DYNAMIC_STACKALLOC, DL, DAG.getVTList(MVT::i64, MVT::Other),
-            {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
-        MFI.CreateVariableSizedObject(Align(16), nullptr);
-      }
-      // Copy the value to a virtual register, and save that in FuncInfo.
+      RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
+      RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(LC);
+
+      SDValue Callee =
+          DAG.getExternalSymbol(LCImpl, getPointerTy(DAG.getDataLayout()));
+      auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.getContext());
+      TargetLowering::CallLoweringInfo CLI(DAG);
+      CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
+          DAG.getLibcalls().getLibcallImplCallingConv(LCImpl), RetTy, Callee,
+          {});
+      std::tie(Size, Chain) = LowerCallTo(CLI);
+    }
+    if (Size) {
+      SDValue Buffer = DAG.getNode(
+          ISD::DYNAMIC_STACKALLOC, DL, DAG.getVTList(MVT::i64, MVT::Other),
+          {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
+      Chain = Buffer.getValue(1);
+
       Register BufferPtr =
           MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
-      FuncInfo->setSMESaveBufferAddr(BufferPtr);
-      Chain = DAG.getCopyToReg(Buffer.getValue(1), DL, BufferPtr, Buffer);
+      Chain = DAG.getCopyToReg(Chain, DL, BufferPtr, Buffer);
+      Chain = DAG.getNode(AArch64ISD::SME_STATE_ALLOC, DL,
+                          DAG.getVTList(MVT::Other), Chain);
+      FuncInfo->setEarlyAllocSMESaveBuffer(BufferPtr);
+      MFI.CreateVariableSizedObject(Align(16), nullptr);
     }
   }
 
@@ -10118,18 +9822,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       getSMECallAttrs(MF.getFunction(), getRuntimeLibcallsInfo(), CLI);
 
   std::optional<unsigned> ZAMarkerNode;
-  bool UseNewSMEABILowering = getTM().useNewSMEABILowering();
-
-  if (UseNewSMEABILowering) {
-    if (CallAttrs.requiresLazySave() ||
-        CallAttrs.requiresPreservingAllZAState())
-      ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
-    else if (CallAttrs.requiresPreservingZT0())
-      ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
-    else if (CallAttrs.caller().hasZAState() ||
-             CallAttrs.caller().hasZT0State())
-      ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
-  }
+  if (CallAttrs.requiresLazySave() || CallAttrs.requiresPreservingAllZAState())
+    ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
+  else if (CallAttrs.requiresPreservingZT0())
+    ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
+  else if (CallAttrs.caller().hasZAState() || CallAttrs.caller().hasZT0State())
+    ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
 
   if (IsTailCall) {
     // Check if it's really possible to do a tail call.
@@ -10203,33 +9901,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     return R;
   };
 
-  bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.requiresLazySave();
-  bool RequiresSaveAllZA =
-      !UseNewSMEABILowering && CallAttrs.requiresPreservingAllZAState();
-  if (RequiresLazySave) {
-    TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
-    SDValue TPIDR2ObjAddr = DAG.getFrameIndex(
-        TPIDR2.FrameIndex,
-        DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
-    Chain = DAG.getNode(
-        ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
-        DAG.getTargetConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
-        TPIDR2ObjAddr);
-    OptimizationRemarkEmitter ORE(&MF.getFunction());
-    ORE.emit([&]() {
-      auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
-                                                   CLI.CB)
-                      : OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
-                                                   &MF.getFunction());
-      return DescribeCallsite(R) << " sets up a lazy save for ZA";
-    });
-  } else if (RequiresSaveAllZA) {
-    assert(!CallAttrs.callee().hasSharedZAInterface() &&
-           "Cannot share state that may not exist");
-    Chain = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Chain,
-                                    /*IsSave=*/true);
-  }
-
   bool RequiresSMChange = CallAttrs.requiresSMChange();
   if (RequiresSMChange) {
     OptimizationRemarkEmitter ORE(&MF.getFunction());
@@ -10243,32 +9914,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     });
   }
 
-  SDValue ZTFrameIdx;
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-  bool ShouldPreserveZT0 =
-      !UseNewSMEABILowering && CallAttrs.requiresPreservingZT0();
-
-  // If the caller has ZT0 state which will not be preserved by the callee,
-  // spill ZT0 before the call.
-  if (ShouldPreserveZT0) {
-    ZTFrameIdx = getZT0FrameIndex(MFI, *FuncInfo, DAG);
-
-    Chain = DAG.getNode(AArch64ISD::SAVE_ZT, DL, DAG.getVTList(MVT::Other),
-                        {Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
-  }
-
-  // If caller shares ZT0 but the callee is not shared ZA, we need to stop
-  // PSTATE.ZA before the call if there is no lazy-save active.
-  bool DisableZA =
-      !UseNewSMEABILowering && CallAttrs.requiresDisablingZABeforeCall();
-  assert((!DisableZA || !RequiresLazySave) &&
-         "Lazy-save should have PSTATE.SM=1 on entry to the function");
-
-  if (DisableZA)
-    Chain = DAG.getNode(
-        AArch64ISD::SMSTOP, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain,
-        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32));
-
   // Adjust the stack pointer for the new arguments... and mark ZA uses.
   // These operations are automatically eliminated by the prolog/epilog pass
   assert((!IsSibCall || !ZAMarkerNode) && "ZA markers require CALLSEQ_START");
@@ -10742,27 +10387,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         getSMToggleCondition(CallAttrs));
   }
 
-  if (!UseNewSMEABILowering &&
-      (RequiresLazySave || CallAttrs.requiresEnablingZAAfterCall()))
-    // Unconditionally resume ZA.
-    Result = DAG.getNode(
-        AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue), Result,
-        DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32));
-
-  if (ShouldPreserveZT0)
-    Result =
-        DAG.getNode(AArch64ISD::RESTORE_ZT, DL, DAG.getVTList(MVT::Other),
-                    {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
-
-  if (RequiresLazySave) {
-    Result = emitRestoreZALazySave(Result, DL, *this, *TRI, *FuncInfo, DAG);
-  } else if (RequiresSaveAllZA) {
-    Result = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Result,
-                                     /*IsSave=*/false);
-  }
-
-  if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
-      RequiresSaveAllZA) {
+  if (RequiresSMChange) {
     for (unsigned I = 0; I < InVals.size(); ++I) {
       // The smstart/smstop is chained as part of the call, but when the
       // resulting chain is discarded (which happens when the call is not part

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 49ff76bb2f469..58efdd3e18fc0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -184,18 +184,6 @@ class AArch64TargetLowering : public TargetLowering {
   MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
                                  unsigned Opcode, bool Op0IsDef) const;
   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
-
-  // Note: The following group of functions are only used as part of the old SME
-  // ABI lowering. They will be removed once -aarch64-new-sme-abi=true is the
-  // default.
-  MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
-                                          MachineBasicBlock *BB) const;
-  MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
-                                          MachineBasicBlock *BB) const;
-  MachineBasicBlock *EmitAllocateSMESaveBuffer(MachineInstr &MI,
-                                               MachineBasicBlock *BB) const;
-  MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
-                                        MachineBasicBlock *BB) const;
   MachineBasicBlock *EmitEntryPStateSM(MachineInstr &MI,
                                        MachineBasicBlock *BB) const;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 00e0c2511aaf0..60a5a978bb09a 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -37,11 +37,6 @@ struct AArch64FunctionInfo;
 class AArch64Subtarget;
 class MachineInstr;
 
-struct TPIDR2Object {
-  int FrameIndex = std::numeric_limits<int>::max();
-  unsigned Uses = 0;
-};
-
 /// Condition of signing the return address in a function.
 ///
 /// Corresponds to possible values of "sign-return-address" function attribute.
@@ -245,19 +240,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   // support).
   Register EarlyAllocSMESaveBuffer = AArch64::NoRegister;
 
-  // Holds the spill slot for ZT0.
-  int ZT0SpillSlotIndex = std::numeric_limits<int>::max();
-
-  // Note: The following properties are only used for the old SME ABI lowering:
-  /// The frame-index for the TPIDR2 object used for lazy saves.
-  TPIDR2Object TPIDR2;
-  // Holds a pointer to a buffer that is large enough to represent
-  // all SME ZA state and any additional state required by the
-  // __arm_sme_save/restore support routines.
-  Register SMESaveBufferAddr = MCRegister::NoRegister;
-  // true if SMESaveBufferAddr is used.
-  bool SMESaveBufferUsed = false;
-
 public:
   AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI);
 
@@ -274,22 +256,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
     return EarlyAllocSMESaveBuffer;
   }
 
-  void setZT0SpillSlotIndex(int FI) { ZT0SpillSlotIndex = FI; }
-  int getZT0SpillSlotIndex() const {
-    assert(hasZT0SpillSlotIndex() && "ZT0 spill slot index not set!");
-    return ZT0SpillSlotIndex;
-  }
-  bool hasZT0SpillSlotIndex() const {
-    return ZT0SpillSlotIndex != std::numeric_limits<int>::max();
-  }
-
-  // Old SME ABI lowering state getters/setters:
-  Register getSMESaveBufferAddr() const { return SMESaveBufferAddr; };
-  void setSMESaveBufferAddr(Register Reg) { SMESaveBufferAddr = Reg; };
-  unsigned isSMESaveBufferUsed() const { return SMESaveBufferUsed; };
-  void setSMESaveBufferUsed(bool Used = true) { SMESaveBufferUsed = Used; };
-  TPIDR2Object &getTPIDR2Obj() { return TPIDR2; }
-
   void setPredicateRegForFillSpill(unsigned Reg) {
     PredicateRegForFillSpill = Reg;
   }

diff  --git a/llvm/lib/Target/AArch64/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/AArch64SMEAttributes.cpp
index 085c858820568..dd9f74df3609c 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SMEAttributes.cpp
@@ -41,6 +41,9 @@ void SMEAttrs::validate() const {
 }
 
 SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
+  // Note: 'aarch64_zt0_undef' was previously used (and subsequently removed).
+  // To avoid introducing any compatibility issues don't reuse
+  // 'aarch64_zt0_undef' for another purpose.
   Bitmask = 0;
   if (Attrs.hasFnAttr("aarch64_pstate_sm_enabled"))
     Bitmask |= SM_Enabled;
@@ -50,8 +53,6 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
     Bitmask |= SM_Body;
   if (Attrs.hasFnAttr("aarch64_za_state_agnostic"))
     Bitmask |= ZA_State_Agnostic;
-  if (Attrs.hasFnAttr("aarch64_zt0_undef"))
-    Bitmask |= ZT0_Undef;
   if (Attrs.hasFnAttr("aarch64_in_za"))
     Bitmask |= encodeZAState(StateValue::In);
   if (Attrs.hasFnAttr("aarch64_out_za"))
@@ -133,8 +134,7 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB,
 
   // FIXME: We probably should not allow SME attributes on direct calls but
   // clang duplicates streaming mode attributes at each callsite.
-  assert((IsIndirect ||
-          ((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) &&
+  assert((IsIndirect || ((Callsite | CalledFn) == CalledFn)) &&
          "SME attributes at callsite do not match declaration");
 
   // An `invoke` of an agnostic ZA function may not return normally (it may

diff  --git a/llvm/lib/Target/AArch64/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/AArch64SMEAttributes.h
index 28c397e221fdc..a01333087594a 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/AArch64SMEAttributes.h
@@ -43,12 +43,10 @@ class SMEAttrs {
     SM_Body = 1 << 2,         // aarch64_pstate_sm_body
     SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves
     ZA_State_Agnostic = 1 << 4,
-    ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills
     ZA_Shift = 6,
     ZA_Mask = 0b111 << ZA_Shift,
     ZT0_Shift = 9,
-    ZT0_Mask = 0b111 << ZT0_Shift,
-    CallSiteFlags_Mask = ZT0_Undef
+    ZT0_Mask = 0b111 << ZT0_Shift
   };
 
   SMEAttrs() = default;
@@ -134,7 +132,6 @@ class SMEAttrs {
   bool isPreservesZT0() const {
     return decodeZT0State(Bitmask) == StateValue::Preserved;
   }
-  bool hasUndefZT0() const { return Bitmask & ZT0_Undef; }
   bool sharesZT0() const {
     StateValue State = decodeZT0State(Bitmask);
     return State == StateValue::In || State == StateValue::Out ||
@@ -148,10 +145,6 @@ class SMEAttrs {
     return Merged;
   }
 
-  SMEAttrs withoutPerCallsiteFlags() const {
-    return (Bitmask & ~CallSiteFlags_Mask);
-  }
-
   bool operator==(SMEAttrs const &Other) const {
     return Bitmask == Other.Bitmask;
   }
@@ -197,8 +190,8 @@ class SMECallAttrs {
   }
 
   bool requiresPreservingZT0() const {
-    return caller().hasZT0State() && !callsite().hasUndefZT0() &&
-           !callee().sharesZT0() && !callee().hasAgnosticZAInterface();
+    return caller().hasZT0State() && !callee().sharesZT0() &&
+           !callee().hasAgnosticZAInterface();
   }
 
   bool requiresDisablingZABeforeCall() const {

diff  --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 905eed50dee9a..022fed6473486 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -26,10 +26,6 @@ def AArch64_cond_smstop  : SDNode<"AArch64ISD::COND_SMSTOP", SDTypeProfile<0, 3,
                              [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>,
                              [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
                               SDNPOptInGlue, SDNPOutGlue]>;
-def AArch64_restore_za : SDNode<"AArch64ISD::RESTORE_ZA", SDTypeProfile<0, 3,
-                             [SDTCisInt<0>, SDTCisPtrTy<1>]>,
-                             [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
-                              SDNPOptInGlue]>;
 def AArch64_restore_zt : SDNode<"AArch64ISD::RESTORE_ZT", SDTypeProfile<0, 2,
                                 [SDTCisInt<0>, SDTCisPtrTy<1>]>,
                                 [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
@@ -60,53 +56,16 @@ def AArch64_check_matching_vl
 def : Pat<(AArch64_check_matching_vl), (CHECK_MATCHING_VL_PSEUDO)>;
 
 //===----------------------------------------------------------------------===//
-// Old SME ABI lowering ISD nodes/pseudos (deprecated)
-//===----------------------------------------------------------------------===//
-
-def AArch64AllocateZABuffer : SDNode<"AArch64ISD::ALLOCATE_ZA_BUFFER", SDTypeProfile<1, 1,
-                              [SDTCisInt<0>, SDTCisInt<1>]>,
-                              [SDNPHasChain, SDNPSideEffect]>;
-let usesCustomInserter = 1, Defs = [SP], Uses = [SP] in {
-  def AllocateZABuffer : Pseudo<(outs GPR64sp:$dst), (ins GPR64:$size), []>, Sched<[WriteI]> {}
-}
-def : Pat<(i64 (AArch64AllocateZABuffer GPR64:$size)),
-          (AllocateZABuffer $size)>;
-
-def AArch64InitTPIDR2Obj  : SDNode<"AArch64ISD::INIT_TPIDR2OBJ", SDTypeProfile<0, 2,
-                              [SDTCisInt<0>, SDTCisInt<1>]>, [SDNPHasChain, SDNPMayStore]>;
-let usesCustomInserter = 1 in {
-  def InitTPIDR2Obj : Pseudo<(outs), (ins GPR64:$buffer, GPR64:$save_slices), [(AArch64InitTPIDR2Obj GPR64:$buffer, GPR64:$save_slices)]>, Sched<[WriteI]> {}
-}
-
-// Nodes to allocate a save buffer for SME.
-// Needed for __arm_agnostic("sme_za_state").
-def AArch64SMESaveSize : SDNode<"AArch64ISD::GET_SME_SAVE_SIZE", SDTypeProfile<1, 0,
-                               [SDTCisInt<0>]>, [SDNPHasChain]>;
-let usesCustomInserter = 1, Defs = [X0] in {
-  def GetSMESaveSize : Pseudo<(outs GPR64:$dst), (ins), []>, Sched<[]> {}
-}
-def : Pat<(i64 AArch64SMESaveSize), (GetSMESaveSize)>;
-
-def AArch64AllocateSMESaveBuffer : SDNode<"AArch64ISD::ALLOC_SME_SAVE_BUFFER", SDTypeProfile<1, 1,
-                                          [SDTCisInt<0>, SDTCisInt<1>]>, [SDNPHasChain]>;
-let usesCustomInserter = 1, Defs = [SP] in {
-  def AllocateSMESaveBuffer : Pseudo<(outs GPR64sp:$dst), (ins GPR64:$size), []>, Sched<[WriteI]> {}
-}
-def : Pat<(i64 (AArch64AllocateSMESaveBuffer GPR64:$size)),
-          (AllocateSMESaveBuffer $size)>;
-
-//===----------------------------------------------------------------------===//
-// New SME ABI lowering ISD nodes/pseudos (-aarch64-new-sme-abi)
+// SME ABI lowering ISD nodes/pseudos
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 1, isMeta = 1 in {
   def InOutZAUsePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
   def RequiresZASavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
   def RequiresZT0SavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
+  def SMEStateAllocPseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
 }
 
-def SMEStateAllocPseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
-
 def CommitZASavePseudo
   : Pseudo<(outs),
            (ins GPR64:$tpidr2_el0, i1imm:$zero_za, i1imm:$zero_zt0,
@@ -320,10 +279,6 @@ def RestoreZAPseudo :
          (ins GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, i64imm:$restore_routine, variable_ops), []>,
          Sched<[]>;
 
-def : Pat<(AArch64_restore_za
-            (i64 GPR64:$tpidr2_el0), (i64 GPR64sp:$tpidr2obj), (i64 texternalsym:$restore_routine)),
-          (RestoreZAPseudo GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, texternalsym:$restore_routine)>;
-
 // Read and write TPIDR2_EL0
 def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
           (MSR 0xde85, GPR64:$val)>;

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 37d893435ca9b..929001ed0ae6b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -222,11 +222,6 @@ static cl::opt<bool>
                            cl::desc("Enable Machine Pipeliner for AArch64"),
                            cl::init(false), cl::Hidden);
 
-static cl::opt<bool>
-    EnableNewSMEABILowering("aarch64-new-sme-abi",
-                            cl::desc("Enable new lowering for the SME ABI"),
-                            cl::init(true), cl::Hidden);
-
 static cl::opt<bool> EnableSRLTSubregToRegMitigation(
     "aarch64-srlt-mitigate-sr2r",
     cl::desc("Enable SUBREG_TO_REG mitigation by adding 'implicit-def' for "
@@ -272,7 +267,6 @@ LLVMInitializeAArch64Target() {
   initializeFalkorMarkStridedAccessesLegacyPass(PR);
   initializeLDTLSCleanupPass(PR);
   initializeKCFIPass(PR);
-  initializeSMEABIPass(PR);
   initializeMachineSMEABIPass(PR);
   initializeAArch64SRLTDefineSuperRegsPass(PR);
   initializeSMEPeepholeOptPass(PR);
@@ -357,8 +351,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
                                computeDefaultCPU(TT, CPU), FS, Options,
                                getEffectiveRelocModel(TT, RM),
                                getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
-      TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian),
-      UseNewSMEABILowering(EnableNewSMEABILowering) {
+      TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
   initAsmInfo();
 
   if (TT.isOSBinFormatMachO()) {
@@ -693,13 +686,6 @@ void AArch64PassConfig::addIRPasses() {
     addPass(createInterleavedAccessPass());
   }
 
-  if (!EnableNewSMEABILowering) {
-    // Expand any functions marked with SME attributes which require special
-    // changes for the calling convention or that require the lazy-saving
-    // mechanism specified in the SME ABI.
-    addPass(createSMEABIPass());
-  }
-
   // Add Control Flow Guard checks.
   if (TM->getTargetTriple().isOSWindows()) {
     if (TM->getTargetTriple().isWindowsArm64EC())
@@ -802,7 +788,7 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
 }
 
 void AArch64PassConfig::addMachineSSAOptimization() {
-  if (TM->getOptLevel() != CodeGenOptLevel::None && EnableNewSMEABILowering)
+  if (TM->getOptLevel() != CodeGenOptLevel::None)
     addPass(createMachineSMEABIPass(TM->getOptLevel()));
 
   if (TM->getOptLevel() != CodeGenOptLevel::None && EnableSMEPeepholeOpt)
@@ -835,7 +821,7 @@ bool AArch64PassConfig::addILPOpts() {
 }
 
 void AArch64PassConfig::addPreRegAlloc() {
-  if (TM->getOptLevel() == CodeGenOptLevel::None && EnableNewSMEABILowering)
+  if (TM->getOptLevel() == CodeGenOptLevel::None)
     addPass(createMachineSMEABIPass(CodeGenOptLevel::None));
 
   // Change dead register definitions to refer to the zero register.

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 25ab66f36f8ec..209374553d297 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -79,15 +79,11 @@ class AArch64TargetMachine : public CodeGenTargetMachineImpl {
   size_t clearLinkerOptimizationHints(
       const SmallPtrSetImpl<MachineInstr *> &MIs) const override;
 
-  /// Returns true if the new SME ABI lowering should be used.
-  bool useNewSMEABILowering() const { return UseNewSMEABILowering; }
-
   /// Returns the optimisation level that enables GlobalISel.
   unsigned getEnableGlobalISelAtO() const;
 
 private:
   bool isLittle;
-  bool UseNewSMEABILowering;
 };
 
 // AArch64 little endian target machine.

diff  --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 2fe554217c1ba..0c9a567278c56 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -88,7 +88,6 @@ add_llvm_target(AArch64CodeGen
   AArch64TargetMachine.cpp
   AArch64TargetObjectFile.cpp
   AArch64TargetTransformInfo.cpp
-  SMEABIPass.cpp
   SMEPeepholeOpt.cpp
   SVEIntrinsicOpts.cpp
   MachineSMEABIPass.cpp

diff  --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 406f9bdddaec0..4462af1ca306f 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -1168,14 +1168,13 @@ INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI",
                 false, false)
 
 bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
-
   AFI = MF.getInfo<AArch64FunctionInfo>();
   SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
   if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
       !SMEFnAttrs.hasAgnosticZAInterface())
     return false;
 
+  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
   if (!Subtarget->hasSME() && !SMEFnAttrs.hasAgnosticZAInterface())
     return false;
 

diff  --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp
deleted file mode 100644
index 4245afbbf6beb..0000000000000
--- a/llvm/lib/Target/AArch64/SMEABIPass.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-//===--------- SMEABI - SME  ABI-------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements parts of the the SME ABI, such as:
-// * Using the lazy-save mechanism before enabling the use of ZA.
-// * Setting up the lazy-save mechanism around invokes.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "AArch64SMEAttributes.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/RuntimeLibcalls.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "aarch64-sme-abi"
-
-namespace {
-struct SMEABI : public FunctionPass {
-  static char ID; // Pass identification, replacement for typeid
-  SMEABI() : FunctionPass(ID) {}
-
-  bool runOnFunction(Function &F) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<TargetPassConfig>();
-  }
-
-private:
-  bool updateNewStateFunctions(Module *M, Function *F, IRBuilder<> &Builder,
-                               SMEAttrs FnAttrs, const TargetLowering &TLI);
-};
-} // end anonymous namespace
-
-char SMEABI::ID = 0;
-static const char *name = "SME ABI Pass";
-INITIALIZE_PASS(SMEABI, DEBUG_TYPE, name, false, false)
-
-FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
-
-//===----------------------------------------------------------------------===//
-// Utility functions
-//===----------------------------------------------------------------------===//
-
-// Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
-void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, const TargetLowering &TLI,
-                    bool ZT0IsUndef = false) {
-  auto &Ctx = M->getContext();
-  auto *TPIDR2SaveTy =
-      FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
-  auto Attrs =
-      AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible");
-  RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_SAVE;
-  FunctionCallee Callee =
-      M->getOrInsertFunction(TLI.getLibcallName(LC), TPIDR2SaveTy, Attrs);
-  CallInst *Call = Builder.CreateCall(Callee);
-
-  // If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark
-  // that on the __arm_tpidr2_save call. This prevents an unnecessary spill of
-  // ZT0 that can occur before ZA is enabled.
-  if (ZT0IsUndef)
-    Call->addFnAttr(Attribute::get(Ctx, "aarch64_zt0_undef"));
-
-  Call->setCallingConv(TLI.getLibcallCallingConv(LC));
-
-  // A save to TPIDR2 should be followed by clearing TPIDR2_EL0.
-  Function *WriteIntr =
-      Intrinsic::getOrInsertDeclaration(M, Intrinsic::aarch64_sme_set_tpidr2);
-  Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr,
-                     Builder.getInt64(0));
-}
-
-/// This function generates code at the beginning and end of a function marked
-/// with either `aarch64_new_za` or `aarch64_new_zt0`.
-/// At the beginning of the function, the following code is generated:
-///  - Commit lazy-save if active   [Private-ZA Interface*]
-///  - Enable PSTATE.ZA             [Private-ZA Interface]
-///  - Zero ZA                      [Has New ZA State]
-///  - Zero ZT0                     [Has New ZT0 State]
-///
-/// * A function with new ZT0 state will not change ZA, so committing the
-/// lazy-save is not strictly necessary. However, the lazy-save mechanism
-/// may be active on entry to the function, with PSTATE.ZA set to 1. If
-/// the new ZT0 function calls a function that does not share ZT0, we will
-/// need to conditionally SMSTOP ZA before the call, setting PSTATE.ZA to 0.
-/// For this reason, it's easier to always commit the lazy-save at the
-/// beginning of the function regardless of whether it has ZA state.
-///
-/// At the end of the function, PSTATE.ZA is disabled if the function has a
-/// Private-ZA Interface. A function is considered to have a Private-ZA
-/// interface if it does not share ZA or ZT0.
-///
-bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
-                                     IRBuilder<> &Builder, SMEAttrs FnAttrs,
-                                     const TargetLowering &TLI) {
-  LLVMContext &Context = F->getContext();
-  BasicBlock *OrigBB = &F->getEntryBlock();
-  Builder.SetInsertPoint(&OrigBB->front());
-
-  // Commit any active lazy-saves if this is a Private-ZA function. If the
-  // value read from TPIDR2_EL0 is not null on entry to the function then
-  // the lazy-saving scheme is active and we should call __arm_tpidr2_save
-  // to commit the lazy save.
-  if (FnAttrs.hasPrivateZAInterface()) {
-    // Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
-    auto *SaveBB = OrigBB->splitBasicBlockBefore(OrigBB->begin(), "save.za");
-    auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
-
-    // Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
-    Builder.SetInsertPoint(PreludeBB);
-    Function *TPIDR2Intr =
-        Intrinsic::getOrInsertDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
-    auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
-                                      {}, "tpidr2");
-    auto *Cmp = Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2,
-                                  Builder.getInt64(0), "cmp");
-    Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
-
-    // Create a call __arm_tpidr2_save, which commits the lazy save.
-    Builder.SetInsertPoint(&SaveBB->back());
-    emitTPIDR2Save(M, Builder, TLI, /*ZT0IsUndef=*/FnAttrs.isNewZT0());
-
-    // Enable pstate.za at the start of the function.
-    Builder.SetInsertPoint(&OrigBB->front());
-    Function *EnableZAIntr =
-        Intrinsic::getOrInsertDeclaration(M, Intrinsic::aarch64_sme_za_enable);
-    Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
-  }
-
-  if (FnAttrs.isNewZA()) {
-    Function *ZeroIntr =
-        Intrinsic::getOrInsertDeclaration(M, Intrinsic::aarch64_sme_zero);
-    Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr,
-                       Builder.getInt32(0xff));
-  }
-
-  if (FnAttrs.isNewZT0()) {
-    Function *ClearZT0Intr =
-        Intrinsic::getOrInsertDeclaration(M, Intrinsic::aarch64_sme_zero_zt);
-    Builder.CreateCall(ClearZT0Intr->getFunctionType(), ClearZT0Intr,
-                       {Builder.getInt32(0)});
-  }
-
-  if (FnAttrs.hasPrivateZAInterface()) {
-    // Before returning, disable pstate.za
-    for (BasicBlock &BB : *F) {
-      Instruction *T = BB.getTerminator();
-      if (!T || !isa<ReturnInst>(T))
-        continue;
-      Builder.SetInsertPoint(T);
-      Function *DisableZAIntr = Intrinsic::getOrInsertDeclaration(
-          M, Intrinsic::aarch64_sme_za_disable);
-      Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
-    }
-  }
-
-  F->addFnAttr("aarch64_expanded_pstate_za");
-  return true;
-}
-
-bool SMEABI::runOnFunction(Function &F) {
-  Module *M = F.getParent();
-  LLVMContext &Context = F.getContext();
-  IRBuilder<> Builder(Context);
-
-  if (F.isDeclaration() || F.hasFnAttribute("aarch64_expanded_pstate_za"))
-    return false;
-
-  const TargetMachine &TM =
-      getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
-  const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering();
-
-  bool Changed = false;
-  SMEAttrs FnAttrs(F);
-  if (FnAttrs.isNewZA() || FnAttrs.isNewZT0())
-    Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs, TLI);
-
-  return Changed;
-}

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll
index 0b8645f66b5f3..d956a231ba922 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sme,+sve -aarch64-new-sme-abi -stop-before=aarch64-machine-sme-abi -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-BEFORE-SMEABI
-; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sme,+sve -aarch64-new-sme-abi -stop-after=aarch64-machine-sme-abi -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-AFTER-SMEABI
+; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sme,+sve -stop-before=aarch64-machine-sme-abi -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-BEFORE-SMEABI
+; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sme,+sve -stop-after=aarch64-machine-sme-abi -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-AFTER-SMEABI
 
 declare void @private_za_callee()
 declare void @shared_za_callee() "aarch64_inout_za"

diff  --git a/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
index c3c76e3e803d0..c7e04aaadbc6a 100644
--- a/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/sme-abi-save-call-remarks.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -mattr=+sme2 --aarch64-new-sme-abi=false --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s  --check-prefix=CHECK-SDAG
 ; RUN: llc -mtriple=aarch64 -mattr=+sme2 --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s
 
 declare void @private_za_callee()
@@ -13,8 +12,6 @@ declare void @shared_za_zt0_callee() "aarch64_inout_za" "aarch64_inout_zt0"
 ; Note: These remarks are more useful with source debug info (which gives line numbers for `<unknown>:0:0`).
 
 define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA
-
 ; CHECK:      remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_1_callee'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save
   call void @private_za_callee()
@@ -22,9 +19,6 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
 }
 
 define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA
-
 ; CHECK:      remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_2_callees'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'private_za_callee' requires ZA save
   call void @private_za_callee()
@@ -33,8 +27,6 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
 }
 
 define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA
-
 ; CHECK:      remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_expanded_intrinsic'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'cosf' requires ZA save
   %res = call float @llvm.cos.f32(float %a)
@@ -42,10 +34,6 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou
 }
 
 define void @test_lazy_save_multiple_paths(i1 %a) "aarch64_inout_za" {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_a' sets up a lazy save for ZA
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_b' sets up a lazy save for ZA
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_multiple_paths' to 'private_za_callee_c' sets up a lazy save for ZA
-
 ;      CHECK: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_multiple_paths'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_b' requires ZA save
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_a' requires ZA save
@@ -67,8 +55,6 @@ if.end:
 
 define void @test_lazy_save_with_zt0() "aarch64_inout_za" "aarch64_inout_zt0"
 {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_with_zt0' to 'private_za_callee' sets up a lazy save for ZA
-
 ;      CHECK: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'shared_za_callee' requires ZT0 save
 ; CHECK-NEXT: remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_with_zt0'
@@ -80,8 +66,6 @@ define void @test_lazy_save_with_zt0() "aarch64_inout_za" "aarch64_inout_zt0"
 
 define void @test_lazy_save_with_zt0_reload() "aarch64_inout_za" "aarch64_inout_zt0"
 {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_with_zt0_reload' to 'private_za_callee' sets up a lazy save for ZA
-
 ;      CHECK: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0_reload'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'shared_za_callee' requires ZT0 save
 ; CHECK-NEXT: remark: <unknown>:0:0: spill of ZT0 emitted in 'test_lazy_save_with_zt0_reload'
@@ -94,8 +78,6 @@ define void @test_lazy_save_with_zt0_reload() "aarch64_inout_za" "aarch64_inout_
 }
 
 define void @test_za_merge_paths(i1 %a) "aarch64_za_state_agnostic" {
-;; Note: The old lowering does not emit any remarks for agnostic ZA saves.
-
 ;      CHECK: remark: <unknown>:0:0: full save of ZA emitted in 'test_za_merge_paths'
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_b' requires ZA save
 ; CHECK-NEXT: remark: <unknown>:0:0: call to 'private_za_callee_a' requires ZA save
@@ -111,7 +93,7 @@ if.end:
   br label %exit
 
 exit:
-  ; The new lowering won't report this call as the save is already needed due to
+  ; The remarks won't report this call as the save is already needed due to
   ; the call to `private_za_callee_*()` calls on both paths to this BB.
   call void @private_za_callee_c()
 
@@ -119,8 +101,6 @@ exit:
 }
 
 define void @test_lazy_save_function_ptr_callee(ptr %private_za_callee) nounwind "aarch64_inout_za" {
-; CHECK-SDAG: remark: <unknown>:0:0: call from 'test_lazy_save_function_ptr_callee' to 'unknown callee' sets up a lazy save for ZA
-
 ; CHECK:      remark: <unknown>:0:0: lazy save of ZA emitted in 'test_lazy_save_function_ptr_callee'
 ; CHECK-NEXT: remark: <unknown>:0:0: call requires ZA save
   call void %private_za_callee()

diff  --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 7a89879625632..e9d7971560474 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sme2 < %s -aarch64-new-sme-abi=false | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-SDAG
-; RUN: llc -mattr=+sme2 < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
+; RUN: llc -mattr=+sme2 < %s | FileCheck %s
 
 target triple = "aarch64"
 
@@ -10,10 +9,10 @@ declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic"
 
 ; No calls. Test that no buffer is allocated.
 define i64 @agnostic_caller_no_callees(ptr %ptr) nounwind "aarch64_za_state_agnostic" {
-; CHECK-COMMON-LABEL: agnostic_caller_no_callees:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    ldr x0, [x0]
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: agnostic_caller_no_callees:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr x0, [x0]
+; CHECK-NEXT:    ret
   %v = load i64, ptr %ptr
   ret i64 %v
 }
@@ -24,34 +23,6 @@ define i64 @agnostic_caller_no_callees(ptr %ptr) nounwind "aarch64_za_state_agno
 ; inserted for calls to non-agnostic functions and that the arg/result registers are
 ; preserved by the register allocator.
 define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" {
-; CHECK-SDAG-LABEL: agnostic_caller_private_za_callee:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    mov x8, x0
-; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
-; CHECK-SDAG-NEXT:    sub sp, sp, x0
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    mov x0, x8
-; CHECK-SDAG-NEXT:    bl private_za_decl
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    bl private_za_decl
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: agnostic_caller_private_za_callee:
 ; CHECK:       // %bb.0:
@@ -84,12 +55,12 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state
 ;
 ; Should not result in save/restore code.
 define i64 @agnostic_caller_agnostic_callee(i64 %v) nounwind "aarch64_za_state_agnostic" {
-; CHECK-COMMON-LABEL: agnostic_caller_agnostic_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    bl agnostic_decl
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: agnostic_caller_agnostic_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl agnostic_decl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %res = call i64 @agnostic_decl(i64 %v)
   ret i64 %res
 }
@@ -98,58 +69,18 @@ define i64 @agnostic_caller_agnostic_callee(i64 %v) nounwind "aarch64_za_state_a
 ;
 ; Should not result in lazy-save or save of ZT0
 define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "aarch64_inout_zt0" {
-; CHECK-COMMON-LABEL: shared_caller_agnostic_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    bl agnostic_decl
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: shared_caller_agnostic_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl agnostic_decl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %res = call i64 @agnostic_decl(i64 %v)
   ret i64 %res
 }
 
 ; agnostic-ZA + streaming -> private-ZA + non-streaming
 define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" {
-; CHECK-SDAG-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x8, x0
-; CHECK-SDAG-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    add x29, sp, #64
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
-; CHECK-SDAG-NEXT:    sub sp, sp, x0
-; CHECK-SDAG-NEXT:    mov x20, sp
-; CHECK-SDAG-NEXT:    mov x0, x20
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    smstop sm
-; CHECK-SDAG-NEXT:    mov x0, x8
-; CHECK-SDAG-NEXT:    bl private_za_decl
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    smstart sm
-; CHECK-SDAG-NEXT:    mov x0, x20
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x20
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    smstop sm
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    bl private_za_decl
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    smstart sm
-; CHECK-SDAG-NEXT:    mov x0, x20
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    sub sp, x29, #64
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
 ; CHECK:       // %bb.0:
@@ -190,59 +121,6 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou
 
 ; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming
 define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" {
-; CHECK-SDAG-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x8, x0
-; CHECK-SDAG-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    add x29, sp, #64
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mrs x20, SVCR
-; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
-; CHECK-SDAG-NEXT:    sub sp, sp, x0
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    tbz w20, #0, .LBB5_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    smstop sm
-; CHECK-SDAG-NEXT:  .LBB5_2:
-; CHECK-SDAG-NEXT:    mov x0, x8
-; CHECK-SDAG-NEXT:    bl private_za_decl
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    tbz w20, #0, .LBB5_4
-; CHECK-SDAG-NEXT:  // %bb.3:
-; CHECK-SDAG-NEXT:    smstart sm
-; CHECK-SDAG-NEXT:  .LBB5_4:
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    tbz w20, #0, .LBB5_6
-; CHECK-SDAG-NEXT:  // %bb.5:
-; CHECK-SDAG-NEXT:    smstop sm
-; CHECK-SDAG-NEXT:  .LBB5_6:
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    bl private_za_decl
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    tbz w20, #0, .LBB5_8
-; CHECK-SDAG-NEXT:  // %bb.7:
-; CHECK-SDAG-NEXT:    smstart sm
-; CHECK-SDAG-NEXT:  .LBB5_8:
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    sub sp, x29, #64
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
 ; CHECK:       // %bb.0:
@@ -295,30 +173,6 @@ declare i64 @many_args_private_za_callee(
 ; stack pointer before the call -- in this test the call to __arm_sme_save
 ; should occur _before_ the stack decrement.
 define i64  @test_many_callee_arguments(
-; CHECK-SDAG-LABEL: test_many_callee_arguments:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    mov x8, x0
-; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
-; CHECK-SDAG-NEXT:    sub sp, sp, x0
-; CHECK-SDAG-NEXT:    ldp x9, x10, [x29, #32]
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    stp x9, x10, [sp, #-16]!
-; CHECK-SDAG-NEXT:    mov x0, x8
-; CHECK-SDAG-NEXT:    bl many_args_private_za_callee
-; CHECK-SDAG-NEXT:    add sp, sp, #16
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: test_many_callee_arguments:
 ; CHECK:       // %bb.0:
@@ -352,33 +206,33 @@ define i64  @test_many_callee_arguments(
 }
 
 define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
-; CHECK-COMMON-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    bl __arm_sme_state_size
-; CHECK-COMMON-NEXT:    mov x8, sp
-; CHECK-COMMON-NEXT:    sub x19, x8, x0
-; CHECK-COMMON-NEXT:  .LBB7_1: // =>This Inner Loop Header: Depth=1
-; CHECK-COMMON-NEXT:    sub sp, sp, #16, lsl #12 // =65536
-; CHECK-COMMON-NEXT:    cmp sp, x19
-; CHECK-COMMON-NEXT:    b.le .LBB7_3
-; CHECK-COMMON-NEXT:  // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-COMMON-NEXT:    ldr xzr, [sp]
-; CHECK-COMMON-NEXT:    b .LBB7_1
-; CHECK-COMMON-NEXT:  .LBB7_3:
-; CHECK-COMMON-NEXT:    mov sp, x19
-; CHECK-COMMON-NEXT:    ldr xzr, [sp]
-; CHECK-COMMON-NEXT:    mov x0, x19
-; CHECK-COMMON-NEXT:    bl __arm_sme_save
-; CHECK-COMMON-NEXT:    bl private_za
-; CHECK-COMMON-NEXT:    mov x0, x19
-; CHECK-COMMON-NEXT:    bl __arm_sme_restore
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    bl __arm_sme_state_size
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    sub x19, x8, x0
+; CHECK-NEXT:  .LBB7_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536
+; CHECK-NEXT:    cmp sp, x19
+; CHECK-NEXT:    b.le .LBB7_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB7_1 Depth=1
+; CHECK-NEXT:    ldr xzr, [sp]
+; CHECK-NEXT:    b .LBB7_1
+; CHECK-NEXT:  .LBB7_3:
+; CHECK-NEXT:    mov sp, x19
+; CHECK-NEXT:    ldr xzr, [sp]
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_save
+; CHECK-NEXT:    bl private_za
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __arm_sme_restore
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   call void @private_za()
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll b/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll
index 87a63fed0546c..c5115d0ff0a60 100644
--- a/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll
+++ b/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -aarch64-new-sme-abi -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -relocation-model=pic < %s | FileCheck %s
 
 @x = external thread_local local_unnamed_addr global i32, align 4
 

diff  --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 188059baa6675..bbdda5fa8f484 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -aarch64-new-sme-abi=false < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-SDAG
-; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
+; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s
 
 declare void @private_za_callee()
 declare void @shared_za_callee() "aarch64_inout_za"
@@ -10,74 +9,38 @@ declare float @llvm.cos.f32(float)
 
 ; Test lazy-save mechanism for a single callee.
 define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
-; CHECK-COMMON-LABEL: test_lazy_save_1_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    sub x10, x29, #16
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-COMMON-NEXT:    bl private_za_callee
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB0_2
-; CHECK-COMMON-NEXT:  // %bb.1:
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB0_2:
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: test_lazy_save_1_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #16
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
+; CHECK-NEXT:    bl private_za_callee
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB0_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   call void @private_za_callee()
   ret void
 }
 
 ; Test lazy-save mechanism for multiple callees.
 define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
-; CHECK-SDAG-LABEL: test_lazy_save_2_callees:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_2:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_4
-; CHECK-SDAG-NEXT:  // %bb.3:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_4:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: test_lazy_save_2_callees:
 ; CHECK:       // %bb.0:
@@ -113,81 +76,38 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
 
 ; Test a call of an intrinsic that gets expanded to a library call.
 define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
-; CHECK-COMMON-LABEL: test_lazy_save_expanded_intrinsic:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    sub x10, x29, #16
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-COMMON-NEXT:    bl cosf
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB2_2
-; CHECK-COMMON-NEXT:  // %bb.1:
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB2_2:
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: test_lazy_save_expanded_intrinsic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    sub x10, x29, #16
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    msr TPIDR2_EL0, x10
+; CHECK-NEXT:    bl cosf
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB2_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   %res = call float @llvm.cos.f32(float %a)
   ret float %res
 }
 
 ; Test a combination of streaming-compatible -> normal call with lazy-save.
 define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" {
-; CHECK-SDAG-LABEL: test_lazy_save_and_conditional_smstart:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    add x29, sp, #64
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mrs x20, SVCR
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    sub x10, x29, #80
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-80]
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-SDAG-NEXT:    tbz w20, #0, .LBB3_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    smstop sm
-; CHECK-SDAG-NEXT:  .LBB3_2:
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    tbz w20, #0, .LBB3_4
-; CHECK-SDAG-NEXT:  // %bb.3:
-; CHECK-SDAG-NEXT:    smstart sm
-; CHECK-SDAG-NEXT:  .LBB3_4:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #80
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB3_6
-; CHECK-SDAG-NEXT:  // %bb.5:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB3_6:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    sub sp, x29, #64
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: test_lazy_save_and_conditional_smstart:
 ; CHECK:       // %bb.0:
@@ -240,58 +160,6 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
 ; restore from it (since ZA is off on return). We could improve this case
 ; by turning ZA off before the final private ZA call.
 define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za"
-; CHECK-SDAG-LABEL: test_lazy_save_mixed_shared_and_private_callees:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w20, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB4_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB4_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    zero {za}
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB4_4
-; CHECK-SDAG-NEXT:  // %bb.3:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB4_4:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    bl shared_za_callee
-; CHECK-SDAG-NEXT:    bl preserves_za_callee
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB4_6
-; CHECK-SDAG-NEXT:  // %bb.5:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB4_6:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: test_lazy_save_mixed_shared_and_private_callees:
 ; CHECK:       // %bb.0:
@@ -347,89 +215,6 @@ define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za"
 }
 
 define void @test_many_back2back_private_za_calls() "aarch64_inout_za" {
-; CHECK-SDAG-LABEL: test_many_back2back_private_za_calls:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w20, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    bl shared_za_callee
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_2:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_4
-; CHECK-SDAG-NEXT:  // %bb.3:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_4:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_6
-; CHECK-SDAG-NEXT:  // %bb.5:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_6:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_8
-; CHECK-SDAG-NEXT:  // %bb.7:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_8:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_10
-; CHECK-SDAG-NEXT:  // %bb.9:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_10:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_callee
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_12
-; CHECK-SDAG-NEXT:  // %bb.11:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_12:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    bl shared_za_callee
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: test_many_back2back_private_za_calls:
 ; CHECK:       // %bb.0:
@@ -480,34 +265,34 @@ define void @test_many_back2back_private_za_calls() "aarch64_inout_za" {
 }
 
 define void @test_shared_private_shared() nounwind "aarch64_inout_za" {
-; CHECK-COMMON-LABEL: test_shared_private_shared:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    bl shared_za_callee
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-COMMON-NEXT:    bl private_za_callee
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB6_2
-; CHECK-COMMON-NEXT:  // %bb.1:
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB6_2:
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:    bl shared_za_callee
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: test_shared_private_shared:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    bl shared_za_callee
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    bl private_za_callee
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB6_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    bl shared_za_callee
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   call void @shared_za_callee()
   call void @private_za_callee()
   call void @shared_za_callee()
@@ -515,12 +300,12 @@ define void @test_shared_private_shared() nounwind "aarch64_inout_za" {
 }
 
 define void @test_only_shared_za() nounwind "aarch64_inout_za" {
-; CHECK-COMMON-LABEL: test_only_shared_za:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    bl shared_za_callee
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: test_only_shared_za:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl shared_za_callee
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   call void @shared_za_callee()
   ret void
 }
@@ -529,36 +314,36 @@ declare i64 @shared_za_callee_i64(i64) "aarch64_inout_za"
 declare i64 @private_za_callee_i64(i64)
 
 define i64 @test_shared_private_shared_i64(i64 %x) nounwind "aarch64_inout_za" {
-; CHECK-COMMON-LABEL: test_shared_private_shared_i64:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    bl shared_za_callee_i64
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-COMMON-NEXT:    bl private_za_callee_i64
-; CHECK-COMMON-NEXT:    mov x1, x0
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB8_2
-; CHECK-COMMON-NEXT:  // %bb.1:
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB8_2:
-; CHECK-COMMON-NEXT:    mov x0, x1
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:    bl shared_za_callee_i64
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: test_shared_private_shared_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    bl shared_za_callee_i64
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    bl private_za_callee_i64
+; CHECK-NEXT:    mov x1, x0
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB8_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB8_2:
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    bl shared_za_callee_i64
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   %a = call i64 @shared_za_callee_i64(i64 %x)
   %b = call i64 @private_za_callee_i64(i64 %a)
   %c = call i64 @shared_za_callee_i64(i64 %b)
@@ -572,37 +357,6 @@ declare i64 @many_args_private_za_callee(
 ; stack pointer before the call -- in this test the lazy save should be setup
 ; before the stack decrement.
 define i64  @test_many_callee_arguments(
-; CHECK-SDAG-LABEL: test_many_callee_arguments:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    mov x8, sp
-; CHECK-SDAG-NEXT:    rdsvl x9, #1
-; CHECK-SDAG-NEXT:    msub x8, x9, x9, x8
-; CHECK-SDAG-NEXT:    mov sp, x8
-; CHECK-SDAG-NEXT:    ldp x10, x11, [x29, #32]
-; CHECK-SDAG-NEXT:    sub x12, x29, #16
-; CHECK-SDAG-NEXT:    stp x8, x9, [x29, #-16]
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x12
-; CHECK-SDAG-NEXT:    stp x10, x11, [sp, #-16]!
-; CHECK-SDAG-NEXT:    bl many_args_private_za_callee
-; CHECK-SDAG-NEXT:    add sp, sp, #16
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB9_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB9_2:
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
 ;
 ; CHECK-LABEL: test_many_callee_arguments:
 ; CHECK:       // %bb.0:

diff  --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
index 648cba57b95cf..6e2d2c0269757 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme -aarch64-new-sme-abi < %s | FileCheck %s
 
 declare void @private_za_callee()
 declare void @shared_za_callee() "aarch64_inout_za"

diff  --git a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
index 9f33c0614cee0..9a2811bc1abef 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
@@ -1,5 +1,5 @@
 # RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme -run-pass=aarch64-machine-sme-abi -verify-machineinstrs %s -o - | FileCheck %s
-# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme -aarch64-new-sme-abi %s -o - | FileCheck %s --check-prefix=CHECK-ASM
+# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme %s -o - | FileCheck %s --check-prefix=CHECK-ASM
 
 # This tests the unfortunate case the status flags ($nzcv) are live at the point
 # we want to restore ZA. Currently, this is handled by saving them to a scratch

diff  --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
index 6995cfae8e459..bc32dc26228ea 100644
--- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
+++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
@@ -1,32 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs -aarch64-new-sme-abi=false < %s | FileCheck %s --check-prefix=CHECK-SDAG
 ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
 
 declare void @shared_za_callee() "aarch64_inout_za"
 
 define void @private_za() "aarch64_new_za" {
-; CHECK-SDAG-LABEL: private_za:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB0_2
-; CHECK-SDAG-NEXT:    b .LBB0_1
-; CHECK-SDAG-NEXT:  .LBB0_1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    mov x8, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    b .LBB0_2
-; CHECK-SDAG-NEXT:  .LBB0_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero {za}
-; CHECK-SDAG-NEXT:    bl shared_za_callee
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: private_za:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -52,48 +29,6 @@ define void @private_za() "aarch64_new_za" {
 
 ; Note: This test must run at -O0 as otherwise the multiple exits are optimized out.
 define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" {
-; CHECK-SDAG-LABEL: private_za_multiple_exit:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    sub sp, sp, #32
-; CHECK-SDAG-NEXT:    str x30, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -16
-; CHECK-SDAG-NEXT:    str x2, [sp] // 8-byte Spill
-; CHECK-SDAG-NEXT:    str w1, [sp, #8] // 4-byte Spill
-; CHECK-SDAG-NEXT:    str w0, [sp, #12] // 4-byte Spill
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB1_2
-; CHECK-SDAG-NEXT:    b .LBB1_1
-; CHECK-SDAG-NEXT:  .LBB1_1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    mov x8, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    b .LBB1_2
-; CHECK-SDAG-NEXT:  .LBB1_2: // %entry
-; CHECK-SDAG-NEXT:    ldr x8, [sp] // 8-byte Reload
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero {za}
-; CHECK-SDAG-NEXT:    subs x8, x8, #1
-; CHECK-SDAG-NEXT:    b.ne .LBB1_4
-; CHECK-SDAG-NEXT:    b .LBB1_3
-; CHECK-SDAG-NEXT:  .LBB1_3: // %if.else
-; CHECK-SDAG-NEXT:    ldr w8, [sp, #12] // 4-byte Reload
-; CHECK-SDAG-NEXT:    ldr w9, [sp, #8] // 4-byte Reload
-; CHECK-SDAG-NEXT:    add w0, w8, w9
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldr x30, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #32
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB1_4: // %if.end
-; CHECK-SDAG-NEXT:    ldr w8, [sp, #12] // 4-byte Reload
-; CHECK-SDAG-NEXT:    ldr w9, [sp, #8] // 4-byte Reload
-; CHECK-SDAG-NEXT:    subs w0, w8, w9
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldr x30, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #32
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: private_za_multiple_exit:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #16
@@ -130,32 +65,6 @@ if.end:
 
 ; In simple cases like this we should omit all ZA setup.
 define i32 @private_za_trivially_does_not_use_za(i32 %x) "aarch64_new_za" {
-; CHECK-SDAG-LABEL: private_za_trivially_does_not_use_za:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    sub sp, sp, #32
-; CHECK-SDAG-NEXT:    str x30, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -16
-; CHECK-SDAG-NEXT:    str w0, [sp, #12] // 4-byte Spill
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB2_2
-; CHECK-SDAG-NEXT:    b .LBB2_1
-; CHECK-SDAG-NEXT:  .LBB2_1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    mov x8, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    b .LBB2_2
-; CHECK-SDAG-NEXT:  .LBB2_2:
-; CHECK-SDAG-NEXT:    ldr w8, [sp, #12] // 4-byte Reload
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero {za}
-; CHECK-SDAG-NEXT:    add w0, w8, w8
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldr x30, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #32
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: private_za_trivially_does_not_use_za:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w0, w0, w0

diff  --git a/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll b/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll
deleted file mode 100644
index 94968ab4fd9ac..0000000000000
--- a/llvm/test/CodeGen/AArch64/sme-new-zt0-function.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
-
-declare void @callee();
-
-define void @private_za() "aarch64_new_zt0" {
-  call void @callee()
-  ret void
-}
-
-; CHECK: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() #[[TPIDR2_SAVE_CALL_ATTR:[0-9]+]]
-; CHECK: declare void @__arm_tpidr2_save() #[[TPIDR2_SAVE_DECL_ATTR:[0-9]+]]
-
-; CHECK: attributes #[[TPIDR2_SAVE_DECL_ATTR]] = { "aarch64_pstate_sm_compatible" }
-; CHECK: attributes #[[TPIDR2_SAVE_CALL_ATTR]] = { "aarch64_zt0_undef" }

diff  --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
index ea1341186ddfa..bdfddad32ff3a 100644
--- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-new-sme-abi -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s
 
 declare void @callee()
 declare void @callee_sm() "aarch64_pstate_sm_enabled"

diff  --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index c8915aac56084..26f289e9699b3 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs -aarch64-new-sme-abi  < %s | FileCheck %s --check-prefix=CHECK-NEWLOWERING
 
 declare void @private_za_callee()
 
@@ -30,31 +29,6 @@ define void @disable_tailcallopt() "aarch64_inout_za" nounwind {
 ; CHECK-NEXT:    mov sp, x29
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-NEXT:    ret
-;
-; CHECK-NEWLOWERING-LABEL: disable_tailcallopt:
-; CHECK-NEWLOWERING:       // %bb.0:
-; CHECK-NEWLOWERING-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT:    mov x29, sp
-; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT:    rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT:    mov x9, sp
-; CHECK-NEWLOWERING-NEXT:    msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT:    mov sp, x9
-; CHECK-NEWLOWERING-NEXT:    sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT:    bl private_za_callee
-; CHECK-NEWLOWERING-NEXT:    smstart za
-; CHECK-NEWLOWERING-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT:    sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT:    cbnz x8, .LBB0_2
-; CHECK-NEWLOWERING-NEXT:  // %bb.1:
-; CHECK-NEWLOWERING-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT:  .LBB0_2:
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT:    mov sp, x29
-; CHECK-NEWLOWERING-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT:    ret
   tail call void @private_za_callee()
   ret void
 }
@@ -85,31 +59,6 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
 ; CHECK-NEXT:    mov sp, x29
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-NEXT:    ret
-;
-; CHECK-NEWLOWERING-LABEL: f128_call_za:
-; CHECK-NEWLOWERING:       // %bb.0:
-; CHECK-NEWLOWERING-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT:    mov x29, sp
-; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT:    rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT:    mov x9, sp
-; CHECK-NEWLOWERING-NEXT:    msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT:    mov sp, x9
-; CHECK-NEWLOWERING-NEXT:    sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT:    bl __addtf3
-; CHECK-NEWLOWERING-NEXT:    smstart za
-; CHECK-NEWLOWERING-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT:    sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT:    cbnz x8, .LBB1_2
-; CHECK-NEWLOWERING-NEXT:  // %bb.1:
-; CHECK-NEWLOWERING-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT:  .LBB1_2:
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT:    mov sp, x29
-; CHECK-NEWLOWERING-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT:    ret
   %res = fadd fp128 %a, %b
   ret fp128 %res
 }

diff  --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
index aae1d3b756f4e..8068e11b37a65 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
@@ -1,51 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -aarch64-new-sme-abi=false < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-SDAG
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
 
 declare void @private_za_call()
 declare void @shared_za_call() "aarch64_inout_za"
 
 define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: private_za_loop:
-; CHECK-SDAG:       // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    cmp w0, #1
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    b.lt .LBB0_5
-; CHECK-SDAG-NEXT:  // %bb.1: // %loop.preheader
-; CHECK-SDAG-NEXT:    mov w19, w0
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    b .LBB0_3
-; CHECK-SDAG-NEXT:  .LBB0_2: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB0_3 Depth=1
-; CHECK-SDAG-NEXT:    subs w19, w19, #1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    b.eq .LBB0_5
-; CHECK-SDAG-NEXT:  .LBB0_3: // %loop
-; CHECK-SDAG-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB0_2
-; CHECK-SDAG-NEXT:  // %bb.4: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB0_3 Depth=1
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:    b .LBB0_2
-; CHECK-SDAG-NEXT:  .LBB0_5: // %exit
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: private_za_loop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -102,47 +61,6 @@ exit:
 
 ; FIXME: In the new lowering we could weight edges to avoid doing the lazy save in the loop.
 define void @private_za_loop_active_entry_and_exit(i32 %n) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: private_za_loop_active_entry_and_exit:
-; CHECK-SDAG:       // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov w19, w0
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    cmp w19, #1
-; CHECK-SDAG-NEXT:    b.lt .LBB1_5
-; CHECK-SDAG-NEXT:  // %bb.1: // %loop.preheader
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    b .LBB1_3
-; CHECK-SDAG-NEXT:  .LBB1_2: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB1_3 Depth=1
-; CHECK-SDAG-NEXT:    subs w19, w19, #1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    b.eq .LBB1_5
-; CHECK-SDAG-NEXT:  .LBB1_3: // %loop
-; CHECK-SDAG-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_2
-; CHECK-SDAG-NEXT:  // %bb.4: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB1_3 Depth=1
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:    b .LBB1_2
-; CHECK-SDAG-NEXT:  .LBB1_5: // %exit
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    b shared_za_call
-;
 ; CHECK-LABEL: private_za_loop_active_entry_and_exit:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -201,22 +119,22 @@ exit:
 }
 
 define void @shared_za_loop(i32 %n) "aarch64_inout_za" nounwind {
-; CHECK-COMMON-LABEL: shared_za_loop:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    cmp w0, #1
-; CHECK-COMMON-NEXT:    b.lt .LBB2_4
-; CHECK-COMMON-NEXT:  // %bb.1: // %loop.preheader
-; CHECK-COMMON-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov w19, w0
-; CHECK-COMMON-NEXT:  .LBB2_2: // %loop
-; CHECK-COMMON-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-COMMON-NEXT:    bl shared_za_call
-; CHECK-COMMON-NEXT:    subs w19, w19, #1
-; CHECK-COMMON-NEXT:    b.ne .LBB2_2
-; CHECK-COMMON-NEXT:  // %bb.3:
-; CHECK-COMMON-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:  .LBB2_4: // %exit
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: shared_za_loop:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, #1
+; CHECK-NEXT:    b.lt .LBB2_4
+; CHECK-NEXT:  // %bb.1: // %loop.preheader
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:  .LBB2_2: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    bl shared_za_call
+; CHECK-NEXT:    subs w19, w19, #1
+; CHECK-NEXT:    b.ne .LBB2_2
+; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:  .LBB2_4: // %exit
+; CHECK-NEXT:    ret
 entry:
   %cmpgt = icmp sgt i32 %n, 0
   br i1 %cmpgt, label %loop, label %exit
@@ -233,33 +151,33 @@ exit:
 }
 
 define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-COMMON-LABEL: cond_private_za_call:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    tbz w0, #0, .LBB3_4
-; CHECK-COMMON-NEXT:  // %bb.1: // %private_za_call
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-COMMON-NEXT:    bl private_za_call
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB3_3
-; CHECK-COMMON-NEXT:  // %bb.2: // %private_za_call
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB3_3: // %private_za_call
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:  .LBB3_4: // %exit
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    b shared_za_call
+; CHECK-LABEL: cond_private_za_call:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    tbz w0, #0, .LBB3_4
+; CHECK-NEXT:  // %bb.1: // %private_za_call
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    bl private_za_call
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB3_3
+; CHECK-NEXT:  // %bb.2: // %private_za_call
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB3_3: // %private_za_call
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:  .LBB3_4: // %exit
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    b shared_za_call
   br i1 %cond, label %private_za_call, label %exit
 
 private_za_call:
@@ -272,45 +190,6 @@ exit:
 }
 
 define void @mixed_shared_private_za_loop(ptr %cond) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: mixed_shared_private_za_loop:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov x19, x0
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    b .LBB4_2
-; CHECK-SDAG-NEXT:  .LBB4_1: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    ldrb w8, [x19]
-; CHECK-SDAG-NEXT:    tbz w8, #0, .LBB4_4
-; CHECK-SDAG-NEXT:  .LBB4_2: // %loop
-; CHECK-SDAG-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB4_1
-; CHECK-SDAG-NEXT:  // %bb.3: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:    b .LBB4_1
-; CHECK-SDAG-NEXT:  .LBB4_4: // %exit
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: mixed_shared_private_za_loop:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -368,49 +247,6 @@ exit:
 
 
 define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: cond_clobber_followed_by_clobber:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov w19, w0
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    tbz w19, #0, .LBB5_4
-; CHECK-SDAG-NEXT:  // %bb.1: // %cond_clobber
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_3
-; CHECK-SDAG-NEXT:  // %bb.2: // %cond_clobber
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_3: // %cond_clobber
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB5_4: // %exit
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB5_6
-; CHECK-SDAG-NEXT:  // %bb.5: // %exit
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB5_6: // %exit
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    b shared_za_call
-;
 ; CHECK-LABEL: cond_clobber_followed_by_clobber:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -457,34 +293,34 @@ exit:
 }
 
 define void @conditionally_use_za(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-COMMON-LABEL: conditionally_use_za:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    tbz w0, #0, .LBB6_4
-; CHECK-COMMON-NEXT:  // %bb.1: // %use_za
-; CHECK-COMMON-NEXT:    bl shared_za_call
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-COMMON-NEXT:    bl private_za_call
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB6_3
-; CHECK-COMMON-NEXT:  // %bb.2: // %use_za
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB6_3: // %use_za
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:  .LBB6_4: // %exit
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: conditionally_use_za:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    tbz w0, #0, .LBB6_4
+; CHECK-NEXT:  // %bb.1: // %use_za
+; CHECK-NEXT:    bl shared_za_call
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    bl private_za_call
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB6_3
+; CHECK-NEXT:  // %bb.2: // %use_za
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB6_3: // %use_za
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:  .LBB6_4: // %exit
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
   br i1 %cond, label %use_za, label %exit
 
 use_za:
@@ -498,37 +334,37 @@ exit:
 
 
 define void @diamond_mixed_za_merge_shared(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-COMMON-LABEL: diamond_mixed_za_merge_shared:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    tbz w0, #0, .LBB7_2
-; CHECK-COMMON-NEXT:  // %bb.1: // %then
-; CHECK-COMMON-NEXT:    bl shared_za_call
-; CHECK-COMMON-NEXT:    b .LBB7_5
-; CHECK-COMMON-NEXT:  .LBB7_2: // %else
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-COMMON-NEXT:    bl private_za_call
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB7_4
-; CHECK-COMMON-NEXT:  // %bb.3: // %else
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB7_4: // %else
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:  .LBB7_5: // %merge_shared
-; CHECK-COMMON-NEXT:    bl shared_za_call
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: diamond_mixed_za_merge_shared:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    tbz w0, #0, .LBB7_2
+; CHECK-NEXT:  // %bb.1: // %then
+; CHECK-NEXT:    bl shared_za_call
+; CHECK-NEXT:    b .LBB7_5
+; CHECK-NEXT:  .LBB7_2: // %else
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-NEXT:    bl private_za_call
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB7_4
+; CHECK-NEXT:  // %bb.3: // %else
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB7_4: // %else
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:  .LBB7_5: // %merge_shared
+; CHECK-NEXT:    bl shared_za_call
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   br i1 %cond, label %then, label %else
 
@@ -547,48 +383,6 @@ merge_shared:
 
 
 define void @diamond_mixed_za_merge_private(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: diamond_mixed_za_merge_private:
-; CHECK-SDAG:       // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    tbz w0, #0, .LBB8_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %then
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    b .LBB8_5
-; CHECK-SDAG-NEXT:  .LBB8_2: // %else
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB8_4
-; CHECK-SDAG-NEXT:  // %bb.3: // %else
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB8_4: // %else
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB8_5: // %merge_private_za
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB8_7
-; CHECK-SDAG-NEXT:  // %bb.6: // %merge_private_za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB8_7: // %merge_private_za
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: diamond_mixed_za_merge_private:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
@@ -639,56 +433,6 @@ merge_private_za:
 }
 
 define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: critical_edge_mixed_za:
-; CHECK-SDAG:       // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov w19, w1
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    tbz w0, #0, .LBB9_5
-; CHECK-SDAG-NEXT:  // %bb.1: // %shared_path
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    tbz w19, #0, .LBB9_8
-; CHECK-SDAG-NEXT:  .LBB9_2: // %exit_private
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB9_4
-; CHECK-SDAG-NEXT:  // %bb.3: // %exit_private
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB9_4: // %exit_private
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    b .LBB9_9
-; CHECK-SDAG-NEXT:  .LBB9_5: // %private_path
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB9_7
-; CHECK-SDAG-NEXT:  // %bb.6: // %private_path
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB9_7: // %private_path
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    tbnz w19, #0, .LBB9_2
-; CHECK-SDAG-NEXT:  .LBB9_8: // %exit_shared
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:  .LBB9_9: // %common.ret
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: critical_edge_mixed_za:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -762,58 +506,58 @@ exit_shared:
 }
 
 define void @nested_cond_in_loop(i32 %n, i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-COMMON-LABEL: nested_cond_in_loop:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    rdsvl x8, #1
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    cmp w0, #1
-; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-COMMON-NEXT:    b.lt .LBB10_8
-; CHECK-COMMON-NEXT:  // %bb.1: // %loop.preheader
-; CHECK-COMMON-NEXT:    mov w19, w1
-; CHECK-COMMON-NEXT:    mov w20, w0
-; CHECK-COMMON-NEXT:    mov w21, wzr
-; CHECK-COMMON-NEXT:    sub x22, x29, #16
-; CHECK-COMMON-NEXT:    b .LBB10_4
-; CHECK-COMMON-NEXT:  .LBB10_2: // %use_shared
-; CHECK-COMMON-NEXT:    // in Loop: Header=BB10_4 Depth=1
-; CHECK-COMMON-NEXT:    bl shared_za_call
-; CHECK-COMMON-NEXT:  .LBB10_3: // %latch
-; CHECK-COMMON-NEXT:    // in Loop: Header=BB10_4 Depth=1
-; CHECK-COMMON-NEXT:    add w21, w21, #1
-; CHECK-COMMON-NEXT:    cmp w21, w20
-; CHECK-COMMON-NEXT:    b.ge .LBB10_8
-; CHECK-COMMON-NEXT:  .LBB10_4: // %loop
-; CHECK-COMMON-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-COMMON-NEXT:    tbnz w19, #0, .LBB10_2
-; CHECK-COMMON-NEXT:  // %bb.5: // %use_private
-; CHECK-COMMON-NEXT:    // in Loop: Header=BB10_4 Depth=1
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x22
-; CHECK-COMMON-NEXT:    bl private_za_call
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-COMMON-NEXT:    sub x0, x29, #16
-; CHECK-COMMON-NEXT:    cbnz x8, .LBB10_7
-; CHECK-COMMON-NEXT:  // %bb.6: // %use_private
-; CHECK-COMMON-NEXT:    // in Loop: Header=BB10_4 Depth=1
-; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
-; CHECK-COMMON-NEXT:  .LBB10_7: // %use_private
-; CHECK-COMMON-NEXT:    // in Loop: Header=BB10_4 Depth=1
-; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-COMMON-NEXT:    b .LBB10_3
-; CHECK-COMMON-NEXT:  .LBB10_8: // %exit
-; CHECK-COMMON-NEXT:    mov sp, x29
-; CHECK-COMMON-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: nested_cond_in_loop:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    cmp w0, #1
+; CHECK-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-NEXT:    b.lt .LBB10_8
+; CHECK-NEXT:  // %bb.1: // %loop.preheader
+; CHECK-NEXT:    mov w19, w1
+; CHECK-NEXT:    mov w20, w0
+; CHECK-NEXT:    mov w21, wzr
+; CHECK-NEXT:    sub x22, x29, #16
+; CHECK-NEXT:    b .LBB10_4
+; CHECK-NEXT:  .LBB10_2: // %use_shared
+; CHECK-NEXT:    // in Loop: Header=BB10_4 Depth=1
+; CHECK-NEXT:    bl shared_za_call
+; CHECK-NEXT:  .LBB10_3: // %latch
+; CHECK-NEXT:    // in Loop: Header=BB10_4 Depth=1
+; CHECK-NEXT:    add w21, w21, #1
+; CHECK-NEXT:    cmp w21, w20
+; CHECK-NEXT:    b.ge .LBB10_8
+; CHECK-NEXT:  .LBB10_4: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    tbnz w19, #0, .LBB10_2
+; CHECK-NEXT:  // %bb.5: // %use_private
+; CHECK-NEXT:    // in Loop: Header=BB10_4 Depth=1
+; CHECK-NEXT:    msr TPIDR2_EL0, x22
+; CHECK-NEXT:    bl private_za_call
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #16
+; CHECK-NEXT:    cbnz x8, .LBB10_7
+; CHECK-NEXT:  // %bb.6: // %use_private
+; CHECK-NEXT:    // in Loop: Header=BB10_4 Depth=1
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB10_7: // %use_private
+; CHECK-NEXT:    // in Loop: Header=BB10_4 Depth=1
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    b .LBB10_3
+; CHECK-NEXT:  .LBB10_8: // %exit
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %n, 0
   br i1 %cmp, label %loop, label %exit
@@ -840,46 +584,6 @@ exit:
 }
 
 define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind {
-; CHECK-SDAG-LABEL: loop_with_external_entry:
-; CHECK-SDAG:       // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov w19, w1
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    tbz w0, #0, .LBB11_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %init
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:  .LBB11_2: // %loop.preheader
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    b .LBB11_4
-; CHECK-SDAG-NEXT:  .LBB11_3: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB11_4 Depth=1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    tbz w19, #0, .LBB11_6
-; CHECK-SDAG-NEXT:  .LBB11_4: // %loop
-; CHECK-SDAG-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl private_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB11_3
-; CHECK-SDAG-NEXT:  // %bb.5: // %loop
-; CHECK-SDAG-NEXT:    // in Loop: Header=BB11_4 Depth=1
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:    b .LBB11_3
-; CHECK-SDAG-NEXT:  .LBB11_6: // %exit
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: loop_with_external_entry:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill

diff  --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index 19ea1e47f84ff..6eb4de449aaa6 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -aarch64-new-sme-abi=false -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-SDAG
 
 ; A simple EH test case that corresponds to the following C++ source:
 ;
@@ -88,90 +87,6 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
 ; CHECK-NEXT:    mov x0, x19
 ; CHECK-NEXT:    msr TPIDR2_EL0, x8
 ; CHECK-NEXT:    bl _Unwind_Resume
-;
-; CHECK-SDAG-LABEL: za_with_raii:
-; CHECK-SDAG:       .Lfunc_begin0:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception0
-; CHECK-SDAG-NEXT:  // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w20, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    tbnz w0, #0, .LBB0_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %return_normally
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    b shared_za_call
-; CHECK-SDAG-NEXT:  .LBB0_2: // %throw_exception
-; CHECK-SDAG-NEXT:    sub x20, x29, #16
-; CHECK-SDAG-NEXT:    mov w0, #8 // =0x8
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl __cxa_allocate_exception
-; CHECK-SDAG-NEXT:    mov x8, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x9, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x9, .LBB0_4
-; CHECK-SDAG-NEXT:  // %bb.3: // %throw_exception
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB0_4: // %throw_exception
-; CHECK-SDAG-NEXT:    adrp x9, .L.str
-; CHECK-SDAG-NEXT:    add x9, x9, :lo12:.L.str
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    str x9, [x8]
-; CHECK-SDAG-NEXT:  .Ltmp0: // EH_LABEL
-; CHECK-SDAG-NEXT:    adrp x1, :got:typeinfo_for_char_const_ptr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    mov x0, x8
-; CHECK-SDAG-NEXT:    ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
-; CHECK-SDAG-NEXT:    mov x2, xzr
-; CHECK-SDAG-NEXT:    bl __cxa_throw
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB0_6
-; CHECK-SDAG-NEXT:  // %bb.5: // %throw_exception
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB0_6: // %throw_exception
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .Ltmp1: // EH_LABEL
-; CHECK-SDAG-NEXT:  // %bb.7: // %throw_fail
-; CHECK-SDAG-NEXT:  .LBB0_8: // %unwind_dtors
-; CHECK-SDAG-NEXT:  .Ltmp2: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x19, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB0_10
-; CHECK-SDAG-NEXT:  // %bb.9: // %unwind_dtors
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB0_10: // %unwind_dtors
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x20
-; CHECK-SDAG-NEXT:    bl _Unwind_Resume
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB0_12
-; CHECK-SDAG-NEXT:  // %bb.11: // %unwind_dtors
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB0_12: // %unwind_dtors
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
   br i1 %fail, label %throw_exception, label %return_normally
 
 throw_exception:
@@ -268,78 +183,6 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v
 ; CHECK-NEXT:  .LBB1_8: // %catch
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
 ; CHECK-NEXT:    b .LBB1_3
-;
-; CHECK-SDAG-LABEL: try_catch:
-; CHECK-SDAG:       .Lfunc_begin1:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception1
-; CHECK-SDAG-NEXT:  // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:  .Ltmp3: // EH_LABEL
-; CHECK-SDAG-NEXT:    sub x19, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl may_throw
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_2:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .Ltmp4: // EH_LABEL
-; CHECK-SDAG-NEXT:  .LBB1_3: // %after_catch
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    b shared_za_call
-; CHECK-SDAG-NEXT:  .LBB1_4: // %catch
-; CHECK-SDAG-NEXT:  .Ltmp5: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_6
-; CHECK-SDAG-NEXT:  // %bb.5: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_6: // %catch
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_8
-; CHECK-SDAG-NEXT:  // %bb.7: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_8: // %catch
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl __cxa_end_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_10
-; CHECK-SDAG-NEXT:  // %bb.9: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_10: // %catch
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    b .LBB1_3
   invoke void @may_throw()
           to label %after_catch unwind label %catch
 
@@ -426,78 +269,6 @@ define void @try_catch_shared_za_callee() "aarch64_new_za" personality ptr @__gx
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
 ; CHECK-NEXT:    smstop za
 ; CHECK-NEXT:    b .LBB2_3
-;
-; CHECK-SDAG-LABEL: try_catch_shared_za_callee:
-; CHECK-SDAG:       .Lfunc_begin2:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception2
-; CHECK-SDAG-NEXT:  // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB2_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB2_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero {za}
-; CHECK-SDAG-NEXT:  .Ltmp6: // EH_LABEL
-; CHECK-SDAG-NEXT:    bl shared_za_call
-; CHECK-SDAG-NEXT:  .Ltmp7: // EH_LABEL
-; CHECK-SDAG-NEXT:  .LBB2_3: // %exit
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB2_4: // %catch
-; CHECK-SDAG-NEXT:  .Ltmp8: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    sub x19, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB2_6
-; CHECK-SDAG-NEXT:  // %bb.5: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB2_6: // %catch
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB2_8
-; CHECK-SDAG-NEXT:  // %bb.7: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB2_8: // %catch
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    bl noexcept_shared_za_call
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl __cxa_end_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB2_10
-; CHECK-SDAG-NEXT:  // %bb.9: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB2_10: // %catch
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    b .LBB2_3
   invoke void @shared_za_call() #4
           to label %exit unwind label %catch
 catch:
@@ -566,46 +337,6 @@ define void @try_catch_shared_zt0_callee() "aarch64_inout_zt0" personality ptr @
 ; CHECK-NEXT:    smstop za
 ; CHECK-NEXT:    mov x0, x19
 ; CHECK-NEXT:    bl _Unwind_Resume
-;
-; CHECK-SDAG-LABEL: try_catch_shared_zt0_callee:
-; CHECK-SDAG:       .Lfunc_begin3:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception3
-; CHECK-SDAG-NEXT:  // %bb.0:
-; CHECK-SDAG-NEXT:    sub sp, sp, #96
-; CHECK-SDAG-NEXT:    str x30, [sp, #64] // 8-byte Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w20, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -32
-; CHECK-SDAG-NEXT:  .Ltmp9: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    bl may_throw
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:  .Ltmp10: // EH_LABEL
-; CHECK-SDAG-NEXT:  // %bb.1: // %return_normally
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldr x30, [sp, #64] // 8-byte Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #96
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB3_2: // %unwind_dtors
-; CHECK-SDAG-NEXT:  .Ltmp11: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x20, sp
-; CHECK-SDAG-NEXT:    mov x19, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    bl shared_zt0_call
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl _Unwind_Resume
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
   invoke void @may_throw()
           to label %return_normally unwind label %unwind_dtors
 
@@ -667,52 +398,6 @@ define void @try_catch_agnostic_za() "aarch64_za_state_agnostic" personality ptr
 ; CHECK-NEXT:    mov x0, x19
 ; CHECK-NEXT:    bl __arm_sme_restore
 ; CHECK-NEXT:    b .LBB4_1
-;
-; CHECK-SDAG-LABEL: try_catch_agnostic_za:
-; CHECK-SDAG:       .Lfunc_begin4:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception4
-; CHECK-SDAG-NEXT:  // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
-; CHECK-SDAG-NEXT:    sub sp, sp, x0
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:  .Ltmp12: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    bl may_throw
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:  .Ltmp13: // EH_LABEL
-; CHECK-SDAG-NEXT:  .LBB4_1: // %exit
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB4_2: // %catch
-; CHECK-SDAG-NEXT:  .Ltmp14: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    bl __cxa_end_catch
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    b .LBB4_1
   invoke void @may_throw()
           to label %exit unwind label %catch
 catch:
@@ -779,52 +464,6 @@ define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personal
 ; CHECK-NEXT:    mov x0, x19
 ; CHECK-NEXT:    bl __arm_sme_restore
 ; CHECK-NEXT:    b .LBB5_1
-;
-; CHECK-SDAG-LABEL: try_catch_agnostic_za_invoke:
-; CHECK-SDAG:       .Lfunc_begin5:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception5
-; CHECK-SDAG-NEXT:  // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    bl __arm_sme_state_size
-; CHECK-SDAG-NEXT:    sub sp, sp, x0
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:  .Ltmp15: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    bl agnostic_za_call
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:  .Ltmp16: // EH_LABEL
-; CHECK-SDAG-NEXT:  .LBB5_1: // %exit
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB5_2: // %catch
-; CHECK-SDAG-NEXT:  .Ltmp17: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_save
-; CHECK-SDAG-NEXT:    bl __cxa_end_catch
-; CHECK-SDAG-NEXT:    mov x0, x19
-; CHECK-SDAG-NEXT:    bl __arm_sme_restore
-; CHECK-SDAG-NEXT:    b .LBB5_1
 entry:
   invoke void @agnostic_za_call() "aarch64_za_state_agnostic"
           to label %exit unwind label %catch
@@ -891,77 +530,6 @@ define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personal
 ; CHECK-NEXT:  .LBB6_6: // %catch
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
 ; CHECK-NEXT:    b .LBB6_3
-;
-; CHECK-SDAG-LABEL: try_catch_inout_za_agnostic_za_callee:
-; CHECK-SDAG:       .Lfunc_begin6:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception6
-; CHECK-SDAG-NEXT:  // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 32
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -32
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:  .Ltmp18: // EH_LABEL
-; CHECK-SDAG-NEXT:    sub x19, x29, #16
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl agnostic_za_call
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %entry
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB6_2: // %entry
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .Ltmp19: // EH_LABEL
-; CHECK-SDAG-NEXT:  .LBB6_3: // %exit
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB6_4: // %catch
-; CHECK-SDAG-NEXT:  .Ltmp20: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x1, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_6
-; CHECK-SDAG-NEXT:  // %bb.5: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB6_6: // %catch
-; CHECK-SDAG-NEXT:    mov x0, x1
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_8
-; CHECK-SDAG-NEXT:  // %bb.7: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB6_8: // %catch
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x19
-; CHECK-SDAG-NEXT:    bl __cxa_end_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB6_10
-; CHECK-SDAG-NEXT:  // %bb.9: // %catch
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB6_10: // %catch
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    b .LBB6_3
 entry:
   invoke void @agnostic_za_call()
           to label %exit unwind label %catch
@@ -1009,45 +577,6 @@ define void @try_catch_inout_zt0() "aarch64_inout_zt0" personality ptr @__gxx_pe
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    ldr zt0, [x19]
 ; CHECK-NEXT:    b .LBB7_1
-;
-; CHECK-SDAG-LABEL: try_catch_inout_zt0:
-; CHECK-SDAG:       .Lfunc_begin7:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception7
-; CHECK-SDAG-NEXT:  // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    sub sp, sp, #80
-; CHECK-SDAG-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -16
-; CHECK-SDAG-NEXT:  .Ltmp21: // EH_LABEL
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    bl may_throw
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:  .Ltmp22: // EH_LABEL
-; CHECK-SDAG-NEXT:  .LBB7_1: // %exit
-; CHECK-SDAG-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #80
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB7_2: // %catch
-; CHECK-SDAG-NEXT:  .Ltmp23: // EH_LABEL
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    bl __cxa_begin_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    bl __cxa_end_catch
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:    b .LBB7_1
 entry:
   invoke void @may_throw()
           to label %exit unwind label %catch
@@ -1123,83 +652,6 @@ define void @try_catch_shared_za_callee_zt0_saved(ptr %callee) "aarch64_inout_za
 ; CHECK-NEXT:    mov x0, x20
 ; CHECK-NEXT:    msr TPIDR2_EL0, x8
 ; CHECK-NEXT:    bl _Unwind_Resume
-;
-; CHECK-SDAG-LABEL: try_catch_shared_za_callee_zt0_saved:
-; CHECK-SDAG:       .Lfunc_begin8:
-; CHECK-SDAG-NEXT:    .cfi_startproc
-; CHECK-SDAG-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
-; CHECK-SDAG-NEXT:    .cfi_lsda 28, .Lexception8
-; CHECK-SDAG-NEXT:  // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    sub sp, sp, #80
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 48
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w20, -16
-; CHECK-SDAG-NEXT:    .cfi_offset w21, -24
-; CHECK-SDAG-NEXT:    .cfi_offset w22, -32
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -40
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -48
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov x19, x0
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:  .Ltmp24: // EH_LABEL
-; CHECK-SDAG-NEXT:    sub x8, x29, #16
-; CHECK-SDAG-NEXT:    sub x20, x29, #80
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    bl may_throw
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB8_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB8_2:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .Ltmp25: // EH_LABEL
-; CHECK-SDAG-NEXT:  // %bb.3: // %return_normally
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-; CHECK-SDAG-NEXT:  .LBB8_4: // %unwind_dtors
-; CHECK-SDAG-NEXT:  .Ltmp26: // EH_LABEL
-; CHECK-SDAG-NEXT:    sub x21, x29, #80
-; CHECK-SDAG-NEXT:    sub x22, x29, #16
-; CHECK-SDAG-NEXT:    mov x20, x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x21]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB8_6
-; CHECK-SDAG-NEXT:  // %bb.5: // %unwind_dtors
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB8_6: // %unwind_dtors
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    str zt0, [x21]
-; CHECK-SDAG-NEXT:    blr x19
-; CHECK-SDAG-NEXT:    ldr zt0, [x21]
-; CHECK-SDAG-NEXT:    mov x0, x20
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x22
-; CHECK-SDAG-NEXT:    str zt0, [x21]
-; CHECK-SDAG-NEXT:    bl _Unwind_Resume
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x21]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB8_8
-; CHECK-SDAG-NEXT:  // %bb.7: // %unwind_dtors
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB8_8: // %unwind_dtors
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
   invoke void @may_throw()
           to label %return_normally unwind label %unwind_dtors
 

diff  --git a/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll b/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll
index 0306b27cb17e1..01a1746866f4f 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -aarch64-new-sme-abi < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s
 
 ; This test case was generated by lowering mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir to LLVM IR.
 ; The actual contents of the function are not that important. The main interesting quality here is that many blocks

diff  --git a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
index 26fc39e271090..27082d9af93b3 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -aarch64-new-sme-abi=false < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s
 
 define i32 @no_tpidr2_save_required() "aarch64_inout_za" {

diff  --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 4cbdca7d41aac..d3c3c111c205b 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -aarch64-new-sme-abi=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-SDAG
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
 
 ;
 ; Private-ZA Callee
@@ -9,19 +8,19 @@
 ; Expect spill & fill of ZT0 around call
 ; Expect smstop/smstart za around call
 define void @zt0_in_caller_no_state_callee(ptr %callee) "aarch64_in_zt0" nounwind {
-; CHECK-COMMON-LABEL: zt0_in_caller_no_state_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    sub sp, sp, #80
-; CHECK-COMMON-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x19, sp
-; CHECK-COMMON-NEXT:    str zt0, [x19]
-; CHECK-COMMON-NEXT:    smstop za
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    ldr zt0, [x19]
-; CHECK-COMMON-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    add sp, sp, #80
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: zt0_in_caller_no_state_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    str zt0, [x19]
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    ldr zt0, [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
   call void %callee();
   ret void;
 }
@@ -30,36 +29,6 @@ define void @zt0_in_caller_no_state_callee(ptr %callee) "aarch64_in_zt0" nounwin
 ; Expect setup and restore lazy-save around call
 ; Expect smstart za after call
 define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind {
-; CHECK-SDAG-LABEL: za_zt0_shared_caller_no_state_callee:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #80
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    sub x10, x29, #16
-; CHECK-SDAG-NEXT:    sub x19, x29, #80
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    blr x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB1_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB1_2:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: za_zt0_shared_caller_no_state_callee:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
@@ -99,41 +68,41 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za
 
 ; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required
 define void @zt0_shared_caller_zt0_shared_callee(ptr %callee) "aarch64_in_zt0" nounwind {
-; CHECK-COMMON-LABEL: zt0_shared_caller_zt0_shared_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   call void %callee() "aarch64_in_zt0";
   ret void;
 }
 
 ; Expect spill & fill of ZT0 around call
 define void @za_zt0_shared_caller_za_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind {
-; CHECK-COMMON-LABEL: za_zt0_shared_caller_za_shared_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    sub sp, sp, #80
-; CHECK-COMMON-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x19, sp
-; CHECK-COMMON-NEXT:    str zt0, [x19]
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    ldr zt0, [x19]
-; CHECK-COMMON-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    add sp, sp, #80
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    str zt0, [x19]
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    ldr zt0, [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
   call void %callee() "aarch64_inout_za";
   ret void;
 }
 
 ; Caller and callee have shared ZA & ZT0
 define void @za_zt0_shared_caller_za_zt0_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind {
-; CHECK-COMMON-LABEL: za_zt0_shared_caller_za_zt0_shared_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
   ret void;
 }
@@ -143,19 +112,19 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee(ptr %callee) "aarch64_ino
 ; Expect spill & fill of ZT0 around call
 ; Expect smstop/smstart za around call
 define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind {
-; CHECK-COMMON-LABEL: zt0_in_caller_zt0_new_callee:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    sub sp, sp, #80
-; CHECK-COMMON-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x19, sp
-; CHECK-COMMON-NEXT:    str zt0, [x19]
-; CHECK-COMMON-NEXT:    smstop za
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    ldr zt0, [x19]
-; CHECK-COMMON-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    add sp, sp, #80
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: zt0_in_caller_zt0_new_callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    str zt0, [x19]
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    ldr zt0, [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
   call void %callee() "aarch64_new_zt0";
   ret void;
 }
@@ -167,29 +136,6 @@ define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind
 ; Expect spill & fill of ZT0 around call
 ; Before return, expect smstop ZA
 define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwind {
-; CHECK-SDAG-LABEL: zt0_new_caller_zt0_new_callee:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    sub sp, sp, #80
-; CHECK-SDAG-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB6_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB6_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero { zt0 }
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    blr x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #80
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: zt0_new_caller_zt0_new_callee:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
@@ -219,27 +165,6 @@ define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwi
 ; Expect spill & fill of ZT0 around __arm_sme_state call
 ; Before return, expect smstop ZA
 define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind {
-; CHECK-SDAG-LABEL: zt0_new_caller_abi_routine_callee:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    sub sp, sp, #80
-; CHECK-SDAG-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB7_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB7_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero { zt0 }
-; CHECK-SDAG-NEXT:    mov x19, sp
-; CHECK-SDAG-NEXT:    str zt0, [x19]
-; CHECK-SDAG-NEXT:    bl __arm_sme_state
-; CHECK-SDAG-NEXT:    ldr zt0, [x19]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #80
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: zt0_new_caller_abi_routine_callee:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
@@ -274,22 +199,6 @@ declare {i64, i64} @__arm_sme_state()
 ; Expect smstart ZA & clear ZT0
 ; Before return, expect smstop ZA
 define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind {
-; CHECK-SDAG-LABEL: zt0_new_caller:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB8_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB8_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero { zt0 }
-; CHECK-SDAG-NEXT:    blr x0
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: zt0_new_caller:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -313,23 +222,6 @@ define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind {
 ; Expect smstart ZA, clear ZA & clear ZT0
 ; Before return, expect smstop ZA
 define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" nounwind {
-; CHECK-SDAG-LABEL: new_za_zt0_caller:
-; CHECK-SDAG:       // %bb.0: // %prelude
-; CHECK-SDAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    cbz x8, .LBB9_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %save.za
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_save
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:  .LBB9_2:
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    zero {za}
-; CHECK-SDAG-NEXT:    zero { zt0 }
-; CHECK-SDAG-NEXT:    blr x0
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: new_za_zt0_caller:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -352,64 +244,32 @@ define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" n
 
 ; Expect clear ZA on entry
 define void @new_za_shared_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_in_zt0" nounwind {
-; CHECK-COMMON-LABEL: new_za_shared_zt0_caller:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    zero {za}
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: new_za_shared_zt0_caller:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    zero {za}
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
   ret void;
 }
 
 ; Expect clear ZT0 on entry
 define void @shared_za_new_zt0(ptr %callee) "aarch64_inout_za" "aarch64_new_zt0" nounwind {
-; CHECK-COMMON-LABEL: shared_za_new_zt0:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    zero { zt0 }
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: shared_za_new_zt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   call void %callee() "aarch64_inout_za" "aarch64_in_zt0";
   ret void;
 }
 
 
 define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwind {
-; CHECK-SDAG-LABEL: zt0_multiple_private_za_calls:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    sub sp, sp, #96
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x20, sp
-; CHECK-SDAG-NEXT:    mov x19, x0
-; CHECK-SDAG-NEXT:    str x30, [sp, #64] // 8-byte Spill
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    blr x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    blr x19
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    blr x19
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    smstop za
-; CHECK-SDAG-NEXT:    blr x19
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldr x30, [sp, #64] // 8-byte Reload
-; CHECK-SDAG-NEXT:    add sp, sp, #96
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: zt0_multiple_private_za_calls:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #96
@@ -437,62 +297,27 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin
 }
 
 define void @disable_tailcallopt(ptr %callee) "aarch64_inout_zt0" nounwind {
-; CHECK-COMMON-LABEL: disable_tailcallopt:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    sub sp, sp, #80
-; CHECK-COMMON-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x19, sp
-; CHECK-COMMON-NEXT:    str zt0, [x19]
-; CHECK-COMMON-NEXT:    smstop za
-; CHECK-COMMON-NEXT:    blr x0
-; CHECK-COMMON-NEXT:    smstart za
-; CHECK-COMMON-NEXT:    ldr zt0, [x19]
-; CHECK-COMMON-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    add sp, sp, #80
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    str zt0, [x19]
+; CHECK-NEXT:    smstop za
+; CHECK-NEXT:    blr x0
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    ldr zt0, [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
   tail call void %callee()
   ret void
 }
 
-; Expected new lowering (not CHECK-SDAG)
 ; - Lazy save and spill of ZT0 before first call
 ; - Restore of ZA before second call
 ; - Reload of ZT0 after second call
 define void @za_zt0_private_za_to_shared_za(ptr %callee) "aarch64_inout_za" "aarch64_inout_zt0" nounwind {
-; CHECK-SDAG-LABEL: za_zt0_private_za_to_shared_za:
-; CHECK-SDAG:       // %bb.0:
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    mov x29, sp
-; CHECK-SDAG-NEXT:    sub sp, sp, #80
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov x19, x0
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    sub x10, x29, #16
-; CHECK-SDAG-NEXT:    sub x20, x29, #80
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    blr x0
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #16
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB14_2
-; CHECK-SDAG-NEXT:  // %bb.1:
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB14_2:
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    str zt0, [x20]
-; CHECK-SDAG-NEXT:    blr x19
-; CHECK-SDAG-NEXT:    ldr zt0, [x20]
-; CHECK-SDAG-NEXT:    mov sp, x29
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: za_zt0_private_za_to_shared_za:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill

diff  --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 4dec5471e689c..2cedcfec77826 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 -aarch64-new-sme-abi=false | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-SDAG
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 | FileCheck %s
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-streaming-hazard-size=0 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
 
 ; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
@@ -11,28 +10,28 @@
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible" {
-; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x29, [sp, #8] // 8-byte Spill
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 32 + 8 * VG
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -8
-; CHECK-COMMON-NEXT:    .cfi_offset b8, -16
-; CHECK-COMMON-NEXT:    mov z1.s, #0 // =0x0
-; CHECK-COMMON-NEXT:    add x8, sp, #16
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [sp, #12]
-; CHECK-COMMON-NEXT:    str d0, [sp]
-; CHECK-COMMON-NEXT:    str z1, [x8]
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    add sp, sp, #16
-; CHECK-COMMON-NEXT:    ldr x29, [sp, #8] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    str x29, [sp, #8] // 8-byte Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 32 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -8
+; CHECK-NEXT:    .cfi_offset b8, -16
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    add x8, sp, #16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    str d0, [sp]
+; CHECK-NEXT:    str z1, [x8]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ldr x29, [sp, #8] // 8-byte Reload
+; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 ; CHECK-NE
 entry:
   %a = alloca <vscale x 4 x i32>
@@ -54,31 +53,31 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @csr_d8_allocnxv4i32i32f64_fp(double %d) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
-; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64_fp:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    add x29, sp, #16
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-COMMON-NEXT:    .cfi_offset w30, -8
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -16
-; CHECK-COMMON-NEXT:    .cfi_offset b8, -32
-; CHECK-COMMON-NEXT:    mov z1.s, #0 // =0x0
-; CHECK-COMMON-NEXT:    addvl x8, sp, #1
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [x8, #28]
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    str d0, [sp, #8]
-; CHECK-COMMON-NEXT:    str z1, [x8, #-1, mul vl]
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    add sp, sp, #16
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_fp:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    addvl x8, sp, #1
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [x8, #28]
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str d0, [sp, #8]
+; CHECK-NEXT:    str z1, [x8, #-1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %a = alloca <vscale x 4 x i32>
   %b = alloca i32
@@ -104,30 +103,30 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128-16 x vscale], Type: Variable, Align: 128, Size: 4
 
 define i32 @csr_d8_allocnxv4i32i32f64_dynamicrealign(double %d) "aarch64_pstate_sm_compatible" {
-; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64_dynamicrealign:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    sub x9, sp, #96
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    add x29, sp, #16
-; CHECK-COMMON-NEXT:    addvl x9, x9, #-1
-; CHECK-COMMON-NEXT:    and sp, x9, #0xffffffffffffff80
-; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-COMMON-NEXT:    .cfi_offset w30, -8
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -16
-; CHECK-COMMON-NEXT:    .cfi_offset b8, -32
-; CHECK-COMMON-NEXT:    mov z1.s, #0 // =0x0
-; CHECK-COMMON-NEXT:    sub x8, x29, #16
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [sp]
-; CHECK-COMMON-NEXT:    stur d0, [x29, #-8]
-; CHECK-COMMON-NEXT:    str z1, [x8, #-1, mul vl]
-; CHECK-COMMON-NEXT:    sub sp, x29, #16
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_dynamicrealign:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    sub x9, sp, #96
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    addvl x9, x9, #-1
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffff80
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp]
+; CHECK-NEXT:    stur d0, [x29, #-8]
+; CHECK-NEXT:    str z1, [x8, #-1, mul vl]
+; CHECK-NEXT:    sub sp, x29, #16
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %a = alloca <vscale x 4 x i32>
   %b = alloca i32, align 128
@@ -153,44 +152,44 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
 
 define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
-; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64_vla:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #8] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    add x29, sp, #8
-; CHECK-COMMON-NEXT:    str x19, [sp, #24] // 8-byte Spill
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    mov x19, sp
-; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 24
-; CHECK-COMMON-NEXT:    .cfi_offset w19, -8
-; CHECK-COMMON-NEXT:    .cfi_offset w30, -16
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -24
-; CHECK-COMMON-NEXT:    .cfi_offset b8, -32
-; CHECK-COMMON-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-COMMON-NEXT:    ubfiz x8, x0, #2, #32
-; CHECK-COMMON-NEXT:    mov x9, sp
-; CHECK-COMMON-NEXT:    add x8, x8, #15
-; CHECK-COMMON-NEXT:    and x8, x8, #0x7fffffff0
-; CHECK-COMMON-NEXT:    sub x9, x9, x8
-; CHECK-COMMON-NEXT:    mov sp, x9
-; CHECK-COMMON-NEXT:    mov x10, sp
-; CHECK-COMMON-NEXT:    sub x8, x10, x8
-; CHECK-COMMON-NEXT:    mov sp, x8
-; CHECK-COMMON-NEXT:    mov z1.s, #0 // =0x0
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [x8]
-; CHECK-COMMON-NEXT:    sub x8, x29, #8
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    str wzr, [x9]
-; CHECK-COMMON-NEXT:    str d0, [x19, #8]
-; CHECK-COMMON-NEXT:    str z1, [x8, #-1, mul vl]
-; CHECK-COMMON-NEXT:    sub sp, x29, #8
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp, #8] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr x19, [sp, #24] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #8
+; CHECK-NEXT:    str x19, [sp, #24] // 8-byte Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 24
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset w29, -24
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    ubfiz x8, x0, #2, #32
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    add x8, x8, #15
+; CHECK-NEXT:    and x8, x8, #0x7fffffff0
+; CHECK-NEXT:    sub x9, x9, x8
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    mov x10, sp
+; CHECK-NEXT:    sub x8, x10, x8
+; CHECK-NEXT:    mov sp, x8
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [x8]
+; CHECK-NEXT:    sub x8, x29, #8
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str wzr, [x9]
+; CHECK-NEXT:    str d0, [x19, #8]
+; CHECK-NEXT:    str z1, [x8, #-1, mul vl]
+; CHECK-NEXT:    sub sp, x29, #8
+; CHECK-NEXT:    ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp, #24] // 8-byte Reload
+; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %a = alloca <vscale x 4 x i32>
   %0 = zext i32 %i to i64
@@ -215,28 +214,28 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @csr_d8_allocnxv4i32i32f64_stackargsi32f64(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) "aarch64_pstate_sm_compatible" {
-; CHECK-COMMON-LABEL: csr_d8_allocnxv4i32i32f64_stackargsi32f64:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x29, [sp, #8] // 8-byte Spill
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 32 + 8 * VG
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -8
-; CHECK-COMMON-NEXT:    .cfi_offset b8, -16
-; CHECK-COMMON-NEXT:    mov z1.s, #0 // =0x0
-; CHECK-COMMON-NEXT:    add x8, sp, #16
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [sp, #12]
-; CHECK-COMMON-NEXT:    str d0, [sp]
-; CHECK-COMMON-NEXT:    str z1, [x8]
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    add sp, sp, #16
-; CHECK-COMMON-NEXT:    ldr x29, [sp, #8] // 8-byte Reload
-; CHECK-COMMON-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_stackargsi32f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    str x29, [sp, #8] // 8-byte Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 32 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -8
+; CHECK-NEXT:    .cfi_offset b8, -16
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    add x8, sp, #16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    str d0, [sp]
+; CHECK-NEXT:    str z1, [x8]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ldr x29, [sp, #8] // 8-byte Reload
+; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %a = alloca <vscale x 4 x i32>
   %b = alloca i32
@@ -257,29 +256,29 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-32 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @svecc_z8_allocnxv4i32i32f64_fp(double %d, <vscale x 4 x i32> %v) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
-; CHECK-COMMON-LABEL: svecc_z8_allocnxv4i32i32f64_fp:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    str z8, [sp] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-COMMON-NEXT:    .cfi_offset w30, -8
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -16
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [sp, #12]
-; CHECK-COMMON-NEXT:    str z1, [x29, #-2, mul vl]
-; CHECK-COMMON-NEXT:    str d0, [sp], #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: svecc_z8_allocnxv4i32i32f64_fp:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str z8, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    str z1, [x29, #-2, mul vl]
+; CHECK-NEXT:    str d0, [sp], #16
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %a = alloca <vscale x 4 x i32>
   %b = alloca i32
@@ -301,29 +300,29 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-32 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @svecc_z8_allocnxv4i32i32f64_stackargsi32_fp(double %d, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, <vscale x 4 x i32> %v) "aarch64_pstate_sm_compatible" "frame-pointer"="all"{
-; CHECK-COMMON-LABEL: svecc_z8_allocnxv4i32i32f64_stackargsi32_fp:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    str z8, [sp] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    sub sp, sp, #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-1
-; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-COMMON-NEXT:    .cfi_offset w30, -8
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -16
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
-; CHECK-COMMON-NEXT:    mov w0, wzr
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    str wzr, [sp, #12]
-; CHECK-COMMON-NEXT:    str z1, [x29, #-2, mul vl]
-; CHECK-COMMON-NEXT:    str d0, [sp], #16
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    addvl sp, sp, #1
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: svecc_z8_allocnxv4i32i32f64_stackargsi32_fp:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str z8, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    str z1, [x29, #-2, mul vl]
+; CHECK-NEXT:    str d0, [sp], #16
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %a = alloca <vscale x 4 x i32>
   %b = alloca i32
@@ -373,129 +372,129 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-280 x vscale], Type: Spill, Align: 2, Size: vscale x 2
 
 define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
-; CHECK-COMMON-LABEL: svecc_call:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-COMMON-NEXT:    cntd x9
-; CHECK-COMMON-NEXT:    stp x28, x27, [sp, #32] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str x9, [sp, #16] // 8-byte Spill
-; CHECK-COMMON-NEXT:    stp x26, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    mov x29, sp
-; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 64
-; CHECK-COMMON-NEXT:    .cfi_offset w19, -8
-; CHECK-COMMON-NEXT:    .cfi_offset w26, -16
-; CHECK-COMMON-NEXT:    .cfi_offset w27, -24
-; CHECK-COMMON-NEXT:    .cfi_offset w28, -32
-; CHECK-COMMON-NEXT:    .cfi_offset vg, -48
-; CHECK-COMMON-NEXT:    .cfi_offset w30, -56
-; CHECK-COMMON-NEXT:    .cfi_offset w29, -64
-; CHECK-COMMON-NEXT:    addvl sp, sp, #-18
-; CHECK-COMMON-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Spill
-; CHECK-COMMON-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d8 @ cfa - 8 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d9 @ cfa - 16 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d10 @ cfa - 24 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d11 @ cfa - 32 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d12 @ cfa - 40 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d13 @ cfa - 48 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d14 @ cfa - 56 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d15 @ cfa - 64 * IncomingVG - 64
-; CHECK-COMMON-NEXT:    mov x8, x0
-; CHECK-COMMON-NEXT:    bl __arm_sme_state
-; CHECK-COMMON-NEXT:    mov x19, x0
-; CHECK-COMMON-NEXT:    //APP
-; CHECK-COMMON-NEXT:    //NO_APP
-; CHECK-COMMON-NEXT:    tbz w19, #0, .LBB7_2
-; CHECK-COMMON-NEXT:  // %bb.1: // %entry
-; CHECK-COMMON-NEXT:    smstop sm
-; CHECK-COMMON-NEXT:  .LBB7_2: // %entry
-; CHECK-COMMON-NEXT:    mov x0, x8
-; CHECK-COMMON-NEXT:    mov w1, #45 // =0x2d
-; CHECK-COMMON-NEXT:    mov w2, #37 // =0x25
-; CHECK-COMMON-NEXT:    bl memset
-; CHECK-COMMON-NEXT:    tbz w19, #0, .LBB7_4
-; CHECK-COMMON-NEXT:  // %bb.3: // %entry
-; CHECK-COMMON-NEXT:    smstart sm
-; CHECK-COMMON-NEXT:  .LBB7_4: // %entry
-; CHECK-COMMON-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    mov w0, #22647 // =0x5877
-; CHECK-COMMON-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    movk w0, #59491, lsl #16
-; CHECK-COMMON-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Reload
-; CHECK-COMMON-NEXT:    addvl sp, sp, #18
-; CHECK-COMMON-NEXT:    .cfi_restore z8
-; CHECK-COMMON-NEXT:    .cfi_restore z9
-; CHECK-COMMON-NEXT:    .cfi_restore z10
-; CHECK-COMMON-NEXT:    .cfi_restore z11
-; CHECK-COMMON-NEXT:    .cfi_restore z12
-; CHECK-COMMON-NEXT:    .cfi_restore z13
-; CHECK-COMMON-NEXT:    .cfi_restore z14
-; CHECK-COMMON-NEXT:    .cfi_restore z15
-; CHECK-COMMON-NEXT:    .cfi_def_cfa wsp, 64
-; CHECK-COMMON-NEXT:    ldp x26, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldp x28, x27, [sp, #32] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #64 // 16-byte Folded Reload
-; CHECK-COMMON-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-COMMON-NEXT:    .cfi_restore w19
-; CHECK-COMMON-NEXT:    .cfi_restore w26
-; CHECK-COMMON-NEXT:    .cfi_restore w27
-; CHECK-COMMON-NEXT:    .cfi_restore w28
-; CHECK-COMMON-NEXT:    .cfi_restore vg
-; CHECK-COMMON-NEXT:    .cfi_restore w30
-; CHECK-COMMON-NEXT:    .cfi_restore w29
-; CHECK-COMMON-NEXT:    ret
+; CHECK-LABEL: svecc_call:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    stp x28, x27, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    str x9, [sp, #16] // 8-byte Spill
+; CHECK-NEXT:    stp x26, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w26, -16
+; CHECK-NEXT:    .cfi_offset w27, -24
+; CHECK-NEXT:    .cfi_offset w28, -32
+; CHECK-NEXT:    .cfi_offset vg, -48
+; CHECK-NEXT:    .cfi_offset w30, -56
+; CHECK-NEXT:    .cfi_offset w29, -64
+; CHECK-NEXT:    addvl sp, sp, #-18
+; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Spill
+; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d8 @ cfa - 8 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d9 @ cfa - 16 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d10 @ cfa - 24 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d11 @ cfa - 32 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d12 @ cfa - 40 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d13 @ cfa - 48 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d14 @ cfa - 56 * IncomingVG - 64
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d15 @ cfa - 64 * IncomingVG - 64
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    tbz w19, #0, .LBB7_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB7_2: // %entry
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    mov w1, #45 // =0x2d
+; CHECK-NEXT:    mov w2, #37 // =0x25
+; CHECK-NEXT:    bl memset
+; CHECK-NEXT:    tbz w19, #0, .LBB7_4
+; CHECK-NEXT:  // %bb.3: // %entry
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB7_4: // %entry
+; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w0, #22647 // =0x5877
+; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    movk w0, #59491, lsl #16
+; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Reload
+; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Reload
+; CHECK-NEXT:    addvl sp, sp, #18
+; CHECK-NEXT:    .cfi_restore z8
+; CHECK-NEXT:    .cfi_restore z9
+; CHECK-NEXT:    .cfi_restore z10
+; CHECK-NEXT:    .cfi_restore z11
+; CHECK-NEXT:    .cfi_restore z12
+; CHECK-NEXT:    .cfi_restore z13
+; CHECK-NEXT:    .cfi_restore z14
+; CHECK-NEXT:    .cfi_restore z15
+; CHECK-NEXT:    .cfi_def_cfa wsp, 64
+; CHECK-NEXT:    ldp x26, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #64 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w19
+; CHECK-NEXT:    .cfi_restore w26
+; CHECK-NEXT:    .cfi_restore w27
+; CHECK-NEXT:    .cfi_restore w28
+; CHECK-NEXT:    .cfi_restore vg
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
 entry:
   tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
   %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
@@ -524,77 +523,6 @@ declare ptr @memset(ptr, i32, i32)
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128], Type: VariableSized, Align: 16, Size: 0
 
 define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "target-features"="+sme" {
-; CHECK-SDAG-LABEL: vastate:
-; CHECK-SDAG:       // %bb.0: // %entry
-; CHECK-SDAG-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-SDAG-NEXT:    cntd x9
-; CHECK-SDAG-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    str x9, [sp, #80] // 8-byte Spill
-; CHECK-SDAG-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-SDAG-NEXT:    add x29, sp, #64
-; CHECK-SDAG-NEXT:    .cfi_def_cfa w29, 48
-; CHECK-SDAG-NEXT:    .cfi_offset w19, -8
-; CHECK-SDAG-NEXT:    .cfi_offset w20, -16
-; CHECK-SDAG-NEXT:    .cfi_offset vg, -32
-; CHECK-SDAG-NEXT:    .cfi_offset w30, -40
-; CHECK-SDAG-NEXT:    .cfi_offset w29, -48
-; CHECK-SDAG-NEXT:    .cfi_offset b8, -56
-; CHECK-SDAG-NEXT:    .cfi_offset b9, -64
-; CHECK-SDAG-NEXT:    .cfi_offset b10, -72
-; CHECK-SDAG-NEXT:    .cfi_offset b11, -80
-; CHECK-SDAG-NEXT:    .cfi_offset b12, -88
-; CHECK-SDAG-NEXT:    .cfi_offset b13, -96
-; CHECK-SDAG-NEXT:    .cfi_offset b14, -104
-; CHECK-SDAG-NEXT:    .cfi_offset b15, -112
-; CHECK-SDAG-NEXT:    sub sp, sp, #16
-; CHECK-SDAG-NEXT:    rdsvl x8, #1
-; CHECK-SDAG-NEXT:    mov x9, sp
-; CHECK-SDAG-NEXT:    mov w20, w0
-; CHECK-SDAG-NEXT:    msub x9, x8, x8, x9
-; CHECK-SDAG-NEXT:    mov sp, x9
-; CHECK-SDAG-NEXT:    sub x10, x29, #80
-; CHECK-SDAG-NEXT:    stp x9, x8, [x29, #-80]
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-SDAG-NEXT:    smstop sm
-; CHECK-SDAG-NEXT:    bl other
-; CHECK-SDAG-NEXT:    smstart sm
-; CHECK-SDAG-NEXT:    smstart za
-; CHECK-SDAG-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-SDAG-NEXT:    sub x0, x29, #80
-; CHECK-SDAG-NEXT:    cbnz x8, .LBB8_2
-; CHECK-SDAG-NEXT:  // %bb.1: // %entry
-; CHECK-SDAG-NEXT:    bl __arm_tpidr2_restore
-; CHECK-SDAG-NEXT:  .LBB8_2: // %entry
-; CHECK-SDAG-NEXT:    mov w0, w20
-; CHECK-SDAG-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-SDAG-NEXT:    sub sp, x29, #64
-; CHECK-SDAG-NEXT:    .cfi_def_cfa wsp, 112
-; CHECK-SDAG-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SDAG-NEXT:    .cfi_restore w19
-; CHECK-SDAG-NEXT:    .cfi_restore w20
-; CHECK-SDAG-NEXT:    .cfi_restore vg
-; CHECK-SDAG-NEXT:    .cfi_restore w30
-; CHECK-SDAG-NEXT:    .cfi_restore w29
-; CHECK-SDAG-NEXT:    .cfi_restore b8
-; CHECK-SDAG-NEXT:    .cfi_restore b9
-; CHECK-SDAG-NEXT:    .cfi_restore b10
-; CHECK-SDAG-NEXT:    .cfi_restore b11
-; CHECK-SDAG-NEXT:    .cfi_restore b12
-; CHECK-SDAG-NEXT:    .cfi_restore b13
-; CHECK-SDAG-NEXT:    .cfi_restore b14
-; CHECK-SDAG-NEXT:    .cfi_restore b15
-; CHECK-SDAG-NEXT:    ret
-;
 ; CHECK-LABEL: vastate:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill

diff  --git a/llvm/test/Verifier/sme-attributes.ll b/llvm/test/Verifier/sme-attributes.ll
index 0ae2b9fd91f52..4bf5e813daf2f 100644
--- a/llvm/test/Verifier/sme-attributes.ll
+++ b/llvm/test/Verifier/sme-attributes.ll
@@ -68,6 +68,3 @@ declare void @zt0_inout_out() "aarch64_inout_zt0" "aarch64_out_zt0";
 
 declare void @zt0_inout_agnostic() "aarch64_inout_zt0" "aarch64_za_state_agnostic";
 ; CHECK: Attributes 'aarch64_new_zt0', 'aarch64_in_zt0', 'aarch64_out_zt0', 'aarch64_inout_zt0', 'aarch64_preserves_zt0' and 'aarch64_za_state_agnostic' are mutually exclusive
-
-declare void @zt0_undef_function() "aarch64_zt0_undef";
-; CHECK: Attribute 'aarch64_zt0_undef' can only be applied to a callsite.

diff  --git a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
index 595dcd2f4dcc5..f628755c4426f 100644
--- a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
+++ b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp
@@ -72,14 +72,6 @@ TEST(SMEAttributes, Constructors) {
                       ->getFunction("foo"))
                   .isNewZT0());
 
-  auto CallModule = parseIR("declare void @callee()\n"
-                            "define void @foo() {"
-                            "call void @callee() \"aarch64_zt0_undef\"\n"
-                            "ret void\n}");
-  CallBase &Call =
-      cast<CallBase>((CallModule->getFunction("foo")->begin()->front()));
-  ASSERT_TRUE(SMECallAttrs(Call, nullptr).callsite().hasUndefZT0());
-
   // Invalid combinations.
   EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible),
                      "SM_Enabled and SM_Compatible are mutually exclusive");
@@ -225,18 +217,6 @@ TEST(SMEAttributes, Basics) {
   ASSERT_FALSE(ZT0_New.hasSharedZAInterface());
   ASSERT_TRUE(ZT0_New.hasPrivateZAInterface());
 
-  SA ZT0_Undef = SA(SA::ZT0_Undef | SA::encodeZT0State(SA::StateValue::New));
-  ASSERT_TRUE(ZT0_Undef.isNewZT0());
-  ASSERT_FALSE(ZT0_Undef.isInZT0());
-  ASSERT_FALSE(ZT0_Undef.isOutZT0());
-  ASSERT_FALSE(ZT0_Undef.isInOutZT0());
-  ASSERT_FALSE(ZT0_Undef.isPreservesZT0());
-  ASSERT_FALSE(ZT0_Undef.sharesZT0());
-  ASSERT_TRUE(ZT0_Undef.hasZT0State());
-  ASSERT_FALSE(ZT0_Undef.hasSharedZAInterface());
-  ASSERT_TRUE(ZT0_Undef.hasPrivateZAInterface());
-  ASSERT_TRUE(ZT0_Undef.hasUndefZT0());
-
   ASSERT_FALSE(SA(SA::Normal).isInZT0());
   ASSERT_FALSE(SA(SA::Normal).isOutZT0());
   ASSERT_FALSE(SA(SA::Normal).isInOutZT0());
@@ -305,7 +285,6 @@ TEST(SMEAttributes, Transitions) {
   SA ZT0_Shared = SA(SA::encodeZT0State(SA::StateValue::In));
   SA ZA_ZT0_Shared = SA(SA::encodeZAState(SA::StateValue::In) |
                         SA::encodeZT0State(SA::StateValue::In));
-  SA Undef_ZT0 = SA(SA::ZT0_Undef);
 
   // Shared ZA -> Private ZA Interface
   ASSERT_FALSE(CA(ZA_Shared, Private_ZA).requiresDisablingZABeforeCall());
@@ -316,15 +295,6 @@ TEST(SMEAttributes, Transitions) {
   ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresPreservingZT0());
   ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresEnablingZAAfterCall());
 
-  // Shared Undef ZT0 -> Private ZA Interface
-  // Note: "Undef ZT0" is a callsite attribute that means ZT0 is undefined at
-  // point the of the call.
-  ASSERT_TRUE(
-      CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresDisablingZABeforeCall());
-  ASSERT_FALSE(CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresPreservingZT0());
-  ASSERT_TRUE(
-      CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresEnablingZAAfterCall());
-
   // Shared ZA & ZT0 -> Private ZA Interface
   ASSERT_FALSE(CA(ZA_ZT0_Shared, Private_ZA).requiresDisablingZABeforeCall());
   ASSERT_TRUE(CA(ZA_ZT0_Shared, Private_ZA).requiresPreservingZT0());

diff  --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
index a7e0eeb3e7d3c..0787cee98d464 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
@@ -181,7 +181,6 @@ static_library("LLVMAArch64CodeGen") {
     "GISel/AArch64PreLegalizerCombiner.cpp",
     "GISel/AArch64RegisterBankInfo.cpp",
     "MachineSMEABIPass.cpp",
-    "SMEABIPass.cpp",
     "SMEPeepholeOpt.cpp",
     "SVEIntrinsicOpts.cpp",
   ]


        


More information about the cfe-commits mailing list