[llvm-branch-commits] [llvm] [AArch64][SME] Propagate desired ZA states in the MachineSMEABIPass (PR #149510)

Benjamin Maxwell via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Sep 5 07:32:40 PDT 2025


https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/149510

>From d07322bddea4f6286eef5cd29e8e06b0939f8b2e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 15 Jul 2025 17:00:04 +0000
Subject: [PATCH] [AArch64][SME] Propagate desired ZA states in the
 MachineSMEABIPass

This patch adds a propagation step to the MachineSMEABIPass that
propagates desired ZA states forwards/backwards (from predecessors to
successors, or vice versa).

The aim of this is to pick better ZA states for edge bundles, as when
many (or all) blocks in a bundle do not have a preferred ZA state, the
ZA state assigned to a bundle can be less than ideal.

An important case is nested loops, where only the inner loop has a
preferred ZA state. Here we'd like to propagate the ZA state up from the
inner loop to the outer loops (to avoid saves/restores in any loop).

Change-Id: I39f9c7d7608e2fa070be2fb88351b4d1d0079041
---
 llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 101 +++++++++++++---
 llvm/test/CodeGen/AArch64/sme-agnostic-za.ll  |   9 +-
 .../CodeGen/AArch64/sme-za-control-flow.ll    |  85 +++++---------
 .../test/CodeGen/AArch64/sme-za-exceptions.ll |  36 +++---
 .../AArch64/sme-za-lazy-save-buffer.ll        | 110 ++++++------------
 5 files changed, 163 insertions(+), 178 deletions(-)

diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index a96edf523ab1b..9aa43eea3d977 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -213,6 +213,11 @@ struct MachineSMEABI : public MachineFunctionPass {
   /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
   void insertStateChanges();
 
+  /// Propagates desired states forwards (from predecessors -> successors) if
+  /// \p Forwards, otherwise, propagates backwards (from successors ->
+  /// predecessors).
+  void propagateDesiredStates(bool Forwards = true);
+
   // Emission routines for private and shared ZA functions (using lazy saves).
   void emitNewZAPrologue(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI);
@@ -287,8 +292,10 @@ struct MachineSMEABI : public MachineFunctionPass {
   /// Contains the needed ZA state for each instruction in a block.
   /// Instructions that do not require a ZA state are not recorded.
   struct BlockInfo {
-    ZAState FixedEntryState{ZAState::ANY};
     SmallVector<InstInfo> Insts;
+    ZAState FixedEntryState{ZAState::ANY};
+    ZAState DesiredIncomingState{ZAState::ANY};
+    ZAState DesiredOutgoingState{ZAState::ANY};
     LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
     LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
   };
@@ -381,28 +388,80 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
 
     // Reverse vector (as we had to iterate backwards for liveness).
     std::reverse(Block.Insts.begin(), Block.Insts.end());
+
+    // Record the desired states on entry/exit of this block. These are the
+    // states that would not incur a state transition.
+    if (!Block.Insts.empty()) {
+      Block.DesiredIncomingState = Block.Insts.front().NeededState;
+      Block.DesiredOutgoingState = Block.Insts.back().NeededState;
+    }
+  }
+}
+
+void MachineSMEABI::propagateDesiredStates(bool Forwards) {
+  // If `Forwards`, this propagates desired states from predecessors to
+  // successors, otherwise, this propagates states from successors to
+  // predecessors.
+  auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
+    return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
+  };
+
+  SmallVector<MachineBasicBlock *> Worklist;
+  for (auto [BlockID, BlockInfo] : enumerate(State.Blocks)) {
+    if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
+      Worklist.push_back(MF->getBlockNumbered(BlockID));
+  }
+
+  while (!Worklist.empty()) {
+    MachineBasicBlock *MBB = Worklist.pop_back_val();
+    auto &BlockInfo = State.Blocks[MBB->getNumber()];
+
+    // Pick a legal edge bundle state that matches the majority of
+    // predecessors/successors.
+    int StateCounts[ZAState::NUM_ZA_STATE] = {0};
+    for (MachineBasicBlock *PredOrSucc :
+         Forwards ? predecessors(MBB) : successors(MBB)) {
+      auto &PredOrSuccBlockInfo = State.Blocks[PredOrSucc->getNumber()];
+      auto ZAState = GetBlockState(PredOrSuccBlockInfo, !Forwards);
+      if (isLegalEdgeBundleZAState(ZAState))
+        StateCounts[ZAState]++;
+    }
+
+    ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
+    auto &CurrentState = GetBlockState(BlockInfo, Forwards);
+    if (PropagatedState != CurrentState) {
+      CurrentState = PropagatedState;
+      auto &OtherState = GetBlockState(BlockInfo, !Forwards);
+      // Propagate to the incoming/outgoing state if that is also "ANY".
+      if (OtherState == ZAState::ANY)
+        OtherState = PropagatedState;
+      // Push any successors/predecessors that may need updating to the
+      // worklist.
+      for (MachineBasicBlock *SuccOrPred :
+           Forwards ? successors(MBB) : predecessors(MBB)) {
+        auto &SuccOrPredBlockInfo = State.Blocks[SuccOrPred->getNumber()];
+        if (!isLegalEdgeBundleZAState(
+                GetBlockState(SuccOrPredBlockInfo, Forwards)))
+          Worklist.push_back(SuccOrPred);
+      }
+    }
   }
 }
 
 void MachineSMEABI::assignBundleZAStates() {
   State.BundleStates.resize(Bundles->getNumBundles());
+
   for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) {
     LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
 
     // Attempt to assign a ZA state for this bundle that minimizes state
     // transitions. Edges within loops are given a higher weight as we assume
     // they will be executed more than once.
-    // TODO: We should propagate desired incoming/outgoing states through blocks
-    // that have the "ANY" state first to make better global decisions.
     int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
     for (unsigned BlockID : Bundles->getBlocks(I)) {
       LLVM_DEBUG(dbgs() << "- bb." << BlockID);
 
-      const BlockInfo &Block = State.Blocks[BlockID];
-      if (Block.Insts.empty()) {
-        LLVM_DEBUG(dbgs() << " (no state preference)\n");
-        continue;
-      }
+      BlockInfo &Block = State.Blocks[BlockID];
       bool IsLoop = MLI && MLI->getLoopFor(MF->getBlockNumbered(BlockID));
       bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I;
       bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I;
@@ -411,26 +470,28 @@ void MachineSMEABI::assignBundleZAStates() {
         LLVM_DEBUG(dbgs() << " IsLoop");
 
       LLVM_DEBUG(dbgs() << " (EdgeWeight: " << EdgeWeight << ')');
-      ZAState DesiredIncomingState = Block.Insts.front().NeededState;
-      if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
-        EdgeStateCounts[DesiredIncomingState] += EdgeWeight;
+      bool LegalInEdge =
+          InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
+      bool LegalOutEgde =
+          OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
+      if (LegalInEdge) {
         LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
-                          << getZAStateString(DesiredIncomingState));
+                          << getZAStateString(Block.DesiredIncomingState));
+        EdgeStateCounts[Block.DesiredIncomingState] += EdgeWeight;
       }
-      ZAState DesiredOutgoingState = Block.Insts.back().NeededState;
-      if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
-        EdgeStateCounts[DesiredOutgoingState] += EdgeWeight;
+      if (LegalOutEgde) {
         LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
-                          << getZAStateString(DesiredOutgoingState));
+                          << getZAStateString(Block.DesiredOutgoingState));
+        EdgeStateCounts[Block.DesiredOutgoingState] += EdgeWeight;
       }
+      if (!LegalInEdge && !LegalOutEgde)
+        LLVM_DEBUG(dbgs() << " (no state preference)");
       LLVM_DEBUG(dbgs() << '\n');
     }
 
     ZAState BundleState =
         ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
 
-    // Force ZA to be active in bundles that don't have a preferred state.
-    // TODO: Something better here (to avoid extra mode switches).
     if (BundleState == ZAState::ANY)
       BundleState = ZAState::ACTIVE;
 
@@ -839,6 +900,10 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
     MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
 
   collectNeededZAStates(SMEFnAttrs);
+  if (OptLevel != CodeGenOptLevel::None) {
+    for (bool Forwards : {true, false})
+      propagateDesiredStates(Forwards);
+  }
   assignBundleZAStates();
   insertStateChanges();
 
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 4479cbe2075ff..e53a250258330 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -363,7 +363,6 @@ define i64  @test_many_callee_arguments(
   ret i64 %ret
 }
 
-; FIXME: The new lowering should avoid saves/restores in the probing loop.
 define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
 ; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
 ; CHECK:       // %bb.0:
@@ -401,18 +400,14 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
 ; CHECK-NEWLOWERING-NEXT:    bl __arm_sme_state_size
 ; CHECK-NEWLOWERING-NEXT:    mov x8, sp
 ; CHECK-NEWLOWERING-NEXT:    sub x19, x8, x0
+; CHECK-NEWLOWERING-NEXT:    mov x0, x19
+; CHECK-NEWLOWERING-NEXT:    bl __arm_sme_save
 ; CHECK-NEWLOWERING-NEXT:  .LBB7_1: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16, lsl #12 // =65536
 ; CHECK-NEWLOWERING-NEXT:    cmp sp, x19
-; CHECK-NEWLOWERING-NEXT:    mov x0, x19
-; CHECK-NEWLOWERING-NEXT:    mrs x8, NZCV
-; CHECK-NEWLOWERING-NEXT:    bl __arm_sme_save
-; CHECK-NEWLOWERING-NEXT:    msr NZCV, x8
 ; CHECK-NEWLOWERING-NEXT:    b.le .LBB7_3
 ; CHECK-NEWLOWERING-NEXT:  // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-NEWLOWERING-NEXT:    mov x0, x19
 ; CHECK-NEWLOWERING-NEXT:    str xzr, [sp]
-; CHECK-NEWLOWERING-NEXT:    bl __arm_sme_restore
 ; CHECK-NEWLOWERING-NEXT:    b .LBB7_1
 ; CHECK-NEWLOWERING-NEXT:  .LBB7_3:
 ; CHECK-NEWLOWERING-NEXT:    mov sp, x19
diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
index 1ede38bd731f9..1ffa01675aafc 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
@@ -223,65 +223,34 @@ exit:
   ret void
 }
 
-; FIXME: The codegen for this case could be improved (by tuning weights).
-; Here the ZA save has been hoisted out of the conditional, but would be better
-; to sink it.
 define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-LABEL: cond_private_za_call:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    mov x29, sp
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    rdsvl x8, #1
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    msub x9, x8, x8, x9
-; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEXT:    tbz w0, #0, .LBB3_4
-; CHECK-NEXT:  // %bb.1: // %private_za_call
-; CHECK-NEXT:    sub x8, x29, #16
-; CHECK-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-NEXT:    bl private_za_call
-; CHECK-NEXT:    smstart za
-; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    sub x0, x29, #16
-; CHECK-NEXT:    cbnz x8, .LBB3_3
-; CHECK-NEXT:  // %bb.2: // %private_za_call
-; CHECK-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEXT:  .LBB3_3: // %private_za_call
-; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  .LBB3_4: // %exit
-; CHECK-NEXT:    mov sp, x29
-; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    b shared_za_call
-;
-; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
-; CHECK-NEWLOWERING:       // %bb.0:
-; CHECK-NEWLOWERING-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT:    mov x29, sp
-; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT:    rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT:    mov x9, sp
-; CHECK-NEWLOWERING-NEXT:    msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT:    mov sp, x9
-; CHECK-NEWLOWERING-NEXT:    sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT:    tbz w0, #0, .LBB3_2
-; CHECK-NEWLOWERING-NEXT:  // %bb.1: // %private_za_call
-; CHECK-NEWLOWERING-NEXT:    bl private_za_call
-; CHECK-NEWLOWERING-NEXT:  .LBB3_2: // %exit
-; CHECK-NEWLOWERING-NEXT:    smstart za
-; CHECK-NEWLOWERING-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT:    sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT:    cbnz x8, .LBB3_4
-; CHECK-NEWLOWERING-NEXT:  // %bb.3: // %exit
-; CHECK-NEWLOWERING-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT:  .LBB3_4: // %exit
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT:    mov sp, x29
-; CHECK-NEWLOWERING-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT:    b shared_za_call
+; CHECK-COMMON-LABEL: cond_private_za_call:
+; CHECK-COMMON:       // %bb.0:
+; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    mov x29, sp
+; CHECK-COMMON-NEXT:    sub sp, sp, #16
+; CHECK-COMMON-NEXT:    rdsvl x8, #1
+; CHECK-COMMON-NEXT:    mov x9, sp
+; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT:    mov sp, x9
+; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT:    tbz w0, #0, .LBB3_4
+; CHECK-COMMON-NEXT:  // %bb.1: // %private_za_call
+; CHECK-COMMON-NEXT:    sub x8, x29, #16
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT:    bl private_za_call
+; CHECK-COMMON-NEXT:    smstart za
+; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT:    sub x0, x29, #16
+; CHECK-COMMON-NEXT:    cbnz x8, .LBB3_3
+; CHECK-COMMON-NEXT:  // %bb.2: // %private_za_call
+; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT:  .LBB3_3: // %private_za_call
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT:  .LBB3_4: // %exit
+; CHECK-COMMON-NEXT:    mov sp, x29
+; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    b shared_za_call
   br i1 %cond, label %private_za_call, label %exit
 
 private_za_call:
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index bb88142efa592..506974a14c3be 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -56,31 +56,23 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
 ; CHECK-NEXT:    adrp x8, .L.str
 ; CHECK-NEXT:    add x8, x8, :lo12:.L.str
 ; CHECK-NEXT:    str x8, [x0]
-; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:  .Ltmp0: // EH_LABEL
 ; CHECK-NEXT:    adrp x1, :got:typeinfo_for_char_const_ptr
 ; CHECK-NEXT:    mov x2, xzr
 ; CHECK-NEXT:    ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
 ; CHECK-NEXT:    bl __cxa_throw
-; CHECK-NEXT:  .Ltmp1:
-; CHECK-NEXT:    smstart za
-; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    sub x0, x29, #16
-; CHECK-NEXT:    cbnz x8, .LBB0_4
-; CHECK-NEXT:  // %bb.3: // %throw_exception
-; CHECK-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEXT:  .LBB0_4: // %throw_exception
-; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  // %bb.5: // %throw_fail
-; CHECK-NEXT:  .LBB0_6: // %unwind_dtors
-; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:  .Ltmp1: // EH_LABEL
+; CHECK-NEXT:  // %bb.3: // %throw_fail
+; CHECK-NEXT:  .LBB0_4: // %unwind_dtors
+; CHECK-NEXT:  .Ltmp2: // EH_LABEL
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
 ; CHECK-NEXT:    sub x0, x29, #16
-; CHECK-NEXT:    cbnz x8, .LBB0_8
-; CHECK-NEXT:  // %bb.7: // %unwind_dtors
+; CHECK-NEXT:    cbnz x8, .LBB0_6
+; CHECK-NEXT:  // %bb.5: // %unwind_dtors
 ; CHECK-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEXT:  .LBB0_8: // %unwind_dtors
+; CHECK-NEXT:  .LBB0_6: // %unwind_dtors
 ; CHECK-NEXT:    msr TPIDR2_EL0, xzr
 ; CHECK-NEXT:    bl shared_za_call
 ; CHECK-NEXT:    sub x8, x29, #16
@@ -142,11 +134,11 @@ define dso_local void @try_catch() "aarch64_inout_za" personality ptr @__gxx_per
 ; CHECK-NEXT:    msub x9, x8, x8, x9
 ; CHECK-NEXT:    mov sp, x9
 ; CHECK-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:  .Ltmp3: // EH_LABEL
 ; CHECK-NEXT:    sub x8, x29, #16
 ; CHECK-NEXT:    msr TPIDR2_EL0, x8
 ; CHECK-NEXT:    bl may_throw
-; CHECK-NEXT:  .Ltmp4:
+; CHECK-NEXT:  .Ltmp4: // EH_LABEL
 ; CHECK-NEXT:  .LBB1_1: // %after_catch
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
@@ -160,7 +152,7 @@ define dso_local void @try_catch() "aarch64_inout_za" personality ptr @__gxx_per
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-NEXT:    b shared_za_call
 ; CHECK-NEXT:  .LBB1_4: // %catch
-; CHECK-NEXT:  .Ltmp5:
+; CHECK-NEXT:  .Ltmp5: // EH_LABEL
 ; CHECK-NEXT:    bl __cxa_begin_catch
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
@@ -235,16 +227,16 @@ define void @try_catch_shared_za_callee() "aarch64_new_za" personality ptr @__gx
 ; CHECK-NEXT:    zero {za}
 ; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    smstart za
-; CHECK-NEXT:  .Ltmp6:
+; CHECK-NEXT:  .Ltmp6: // EH_LABEL
 ; CHECK-NEXT:    bl shared_za_call
-; CHECK-NEXT:  .Ltmp7:
+; CHECK-NEXT:  .Ltmp7: // EH_LABEL
 ; CHECK-NEXT:  .LBB2_3: // %exit
 ; CHECK-NEXT:    smstop za
 ; CHECK-NEXT:    mov sp, x29
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB2_4: // %catch
-; CHECK-NEXT:  .Ltmp8:
+; CHECK-NEXT:  .Ltmp8: // EH_LABEL
 ; CHECK-NEXT:    bl __cxa_begin_catch
 ; CHECK-NEXT:    smstart za
 ; CHECK-NEXT:    mrs x8, TPIDR2_EL0
diff --git a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
index 066ee3b040469..afd56d198d0d3 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
@@ -12,77 +12,41 @@ entry:
 }
 
 define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch64_inout_za" {
-; CHECK-LABEL: multi_bb_stpidr2_save_required:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    mov x29, sp
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdsvl x8, #1
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    msub x9, x8, x8, x9
-; CHECK-NEXT:    mov sp, x9
-; CHECK-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEXT:    cbz w0, .LBB1_2
-; CHECK-NEXT:  // %bb.1: // %use_b
-; CHECK-NEXT:    fmov s1, #4.00000000
-; CHECK-NEXT:    fadd s0, s0, s1
-; CHECK-NEXT:    b .LBB1_5
-; CHECK-NEXT:  .LBB1_2: // %use_c
-; CHECK-NEXT:    fmov s0, s1
-; CHECK-NEXT:    sub x8, x29, #16
-; CHECK-NEXT:    msr TPIDR2_EL0, x8
-; CHECK-NEXT:    bl cosf
-; CHECK-NEXT:    smstart za
-; CHECK-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEXT:    sub x0, x29, #16
-; CHECK-NEXT:    cbnz x8, .LBB1_4
-; CHECK-NEXT:  // %bb.3: // %use_c
-; CHECK-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEXT:  .LBB1_4: // %use_c
-; CHECK-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEXT:  .LBB1_5: // %exit
-; CHECK-NEXT:    mov sp, x29
-; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    ret
-;
-; CHECK-NEWLOWERING-LABEL: multi_bb_stpidr2_save_required:
-; CHECK-NEWLOWERING:       // %bb.0:
-; CHECK-NEWLOWERING-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT:    mov x29, sp
-; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-NEWLOWERING-NEXT:    .cfi_offset w30, -8
-; CHECK-NEWLOWERING-NEXT:    .cfi_offset w29, -16
-; CHECK-NEWLOWERING-NEXT:    rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT:    mov x9, sp
-; CHECK-NEWLOWERING-NEXT:    msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT:    mov sp, x9
-; CHECK-NEWLOWERING-NEXT:    sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT:    cbz w0, .LBB1_2
-; CHECK-NEWLOWERING-NEXT:  // %bb.1: // %use_b
-; CHECK-NEWLOWERING-NEXT:    fmov s1, #4.00000000
-; CHECK-NEWLOWERING-NEXT:    fadd s0, s0, s1
-; CHECK-NEWLOWERING-NEXT:    b .LBB1_3
-; CHECK-NEWLOWERING-NEXT:  .LBB1_2: // %use_c
-; CHECK-NEWLOWERING-NEXT:    fmov s0, s1
-; CHECK-NEWLOWERING-NEXT:    bl cosf
-; CHECK-NEWLOWERING-NEXT:  .LBB1_3: // %exit
-; CHECK-NEWLOWERING-NEXT:    smstart za
-; CHECK-NEWLOWERING-NEXT:    mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT:    sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT:    cbnz x8, .LBB1_5
-; CHECK-NEWLOWERING-NEXT:  // %bb.4: // %exit
-; CHECK-NEWLOWERING-NEXT:    bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT:  .LBB1_5: // %exit
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT:    mov sp, x29
-; CHECK-NEWLOWERING-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT:    ret
+; CHECK-COMMON-LABEL: multi_bb_stpidr2_save_required:
+; CHECK-COMMON:       // %bb.0:
+; CHECK-COMMON-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT:    mov x29, sp
+; CHECK-COMMON-NEXT:    sub sp, sp, #16
+; CHECK-COMMON-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-COMMON-NEXT:    .cfi_offset w30, -8
+; CHECK-COMMON-NEXT:    .cfi_offset w29, -16
+; CHECK-COMMON-NEXT:    rdsvl x8, #1
+; CHECK-COMMON-NEXT:    mov x9, sp
+; CHECK-COMMON-NEXT:    msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT:    mov sp, x9
+; CHECK-COMMON-NEXT:    stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT:    cbz w0, .LBB1_2
+; CHECK-COMMON-NEXT:  // %bb.1: // %use_b
+; CHECK-COMMON-NEXT:    fmov s1, #4.00000000
+; CHECK-COMMON-NEXT:    fadd s0, s0, s1
+; CHECK-COMMON-NEXT:    b .LBB1_5
+; CHECK-COMMON-NEXT:  .LBB1_2: // %use_c
+; CHECK-COMMON-NEXT:    fmov s0, s1
+; CHECK-COMMON-NEXT:    sub x8, x29, #16
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT:    bl cosf
+; CHECK-COMMON-NEXT:    smstart za
+; CHECK-COMMON-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT:    sub x0, x29, #16
+; CHECK-COMMON-NEXT:    cbnz x8, .LBB1_4
+; CHECK-COMMON-NEXT:  // %bb.3: // %use_c
+; CHECK-COMMON-NEXT:    bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT:  .LBB1_4: // %use_c
+; CHECK-COMMON-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT:  .LBB1_5: // %exit
+; CHECK-COMMON-NEXT:    mov sp, x29
+; CHECK-COMMON-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT:    ret
   %cmp = icmp ne i32 %a, 0
   br i1 %cmp, label %use_b, label %use_c
 
@@ -155,7 +119,9 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
 ; CHECK-NEWLOWERING-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEWLOWERING-NEXT:    rdsvl x8, #1
 ; CHECK-NEWLOWERING-NEXT:    mov x9, sp
+; CHECK-NEWLOWERING-NEXT:    sub x10, x29, #16
 ; CHECK-NEWLOWERING-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEWLOWERING-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16, lsl #12 // =65536
 ; CHECK-NEWLOWERING-NEXT:    cmp sp, x9
@@ -166,9 +132,7 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
 ; CHECK-NEWLOWERING-NEXT:  .LBB2_3:
 ; CHECK-NEWLOWERING-NEXT:    mov sp, x9
 ; CHECK-NEWLOWERING-NEXT:    ldr xzr, [sp]
-; CHECK-NEWLOWERING-NEXT:    sub x10, x29, #16
 ; CHECK-NEWLOWERING-NEXT:    stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT:    msr TPIDR2_EL0, x10
 ; CHECK-NEWLOWERING-NEXT:    cbz w0, .LBB2_5
 ; CHECK-NEWLOWERING-NEXT:  // %bb.4: // %use_b
 ; CHECK-NEWLOWERING-NEXT:    fmov s1, #4.00000000



More information about the llvm-branch-commits mailing list