[llvm-branch-commits] [llvm] [AArch64][SME] Propagate desired ZA states in the MachineSMEABIPass (PR #149510)
Benjamin Maxwell via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Sep 5 07:32:40 PDT 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/149510
>From d07322bddea4f6286eef5cd29e8e06b0939f8b2e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 15 Jul 2025 17:00:04 +0000
Subject: [PATCH] [AArch64][SME] Propagate desired ZA states in the
MachineSMEABIPass
This patch adds a propagation step to the MachineSMEABIPass that
propagates desired ZA states forwards/backwards (from predecessors to
successors, or vice versa).
The aim of this is to pick better ZA states for edge bundles, as when
many (or all) blocks in a bundle do not have a preferred ZA state, the
ZA state assigned to a bundle can be less than ideal.
An important case is nested loops, where only the inner loop has a
preferred ZA state. Here we'd like to propagate the ZA state up from the
inner loop to the outer loops (to avoid saves/restores in any loop).
Change-Id: I39f9c7d7608e2fa070be2fb88351b4d1d0079041
---
llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 101 +++++++++++++---
llvm/test/CodeGen/AArch64/sme-agnostic-za.ll | 9 +-
.../CodeGen/AArch64/sme-za-control-flow.ll | 85 +++++---------
.../test/CodeGen/AArch64/sme-za-exceptions.ll | 36 +++---
.../AArch64/sme-za-lazy-save-buffer.ll | 110 ++++++------------
5 files changed, 163 insertions(+), 178 deletions(-)
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index a96edf523ab1b..9aa43eea3d977 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -213,6 +213,11 @@ struct MachineSMEABI : public MachineFunctionPass {
/// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
void insertStateChanges();
+ /// Propagates desired states forwards (from predecessors -> successors) if
+ /// \p Forwards, otherwise, propagates backwards (from successors ->
+ /// predecessors).
+ void propagateDesiredStates(bool Forwards = true);
+
// Emission routines for private and shared ZA functions (using lazy saves).
void emitNewZAPrologue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
@@ -287,8 +292,10 @@ struct MachineSMEABI : public MachineFunctionPass {
/// Contains the needed ZA state for each instruction in a block.
/// Instructions that do not require a ZA state are not recorded.
struct BlockInfo {
- ZAState FixedEntryState{ZAState::ANY};
SmallVector<InstInfo> Insts;
+ ZAState FixedEntryState{ZAState::ANY};
+ ZAState DesiredIncomingState{ZAState::ANY};
+ ZAState DesiredOutgoingState{ZAState::ANY};
LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
};
@@ -381,28 +388,80 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
// Reverse vector (as we had to iterate backwards for liveness).
std::reverse(Block.Insts.begin(), Block.Insts.end());
+
+ // Record the desired states on entry/exit of this block. These are the
+ // states that would not incur a state transition.
+ if (!Block.Insts.empty()) {
+ Block.DesiredIncomingState = Block.Insts.front().NeededState;
+ Block.DesiredOutgoingState = Block.Insts.back().NeededState;
+ }
+ }
+}
+
+void MachineSMEABI::propagateDesiredStates(bool Forwards) {
+ // If `Forwards`, this propagates desired states from predecessors to
+ // successors, otherwise, this propagates states from successors to
+ // predecessors.
+ auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
+ return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
+ };
+
+ SmallVector<MachineBasicBlock *> Worklist;
+ for (auto [BlockID, BlockInfo] : enumerate(State.Blocks)) {
+ if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
+ Worklist.push_back(MF->getBlockNumbered(BlockID));
+ }
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
+ auto &BlockInfo = State.Blocks[MBB->getNumber()];
+
+ // Pick a legal edge bundle state that matches the majority of
+ // predecessors/successors.
+ int StateCounts[ZAState::NUM_ZA_STATE] = {0};
+ for (MachineBasicBlock *PredOrSucc :
+ Forwards ? predecessors(MBB) : successors(MBB)) {
+ auto &PredOrSuccBlockInfo = State.Blocks[PredOrSucc->getNumber()];
+ auto ZAState = GetBlockState(PredOrSuccBlockInfo, !Forwards);
+ if (isLegalEdgeBundleZAState(ZAState))
+ StateCounts[ZAState]++;
+ }
+
+ ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
+ auto &CurrentState = GetBlockState(BlockInfo, Forwards);
+ if (PropagatedState != CurrentState) {
+ CurrentState = PropagatedState;
+ auto &OtherState = GetBlockState(BlockInfo, !Forwards);
+ // Propagate to the incoming/outgoing state if that is also "ANY".
+ if (OtherState == ZAState::ANY)
+ OtherState = PropagatedState;
+ // Push any successors/predecessors that may need updating to the
+ // worklist.
+ for (MachineBasicBlock *SuccOrPred :
+ Forwards ? successors(MBB) : predecessors(MBB)) {
+ auto &SuccOrPredBlockInfo = State.Blocks[SuccOrPred->getNumber()];
+ if (!isLegalEdgeBundleZAState(
+ GetBlockState(SuccOrPredBlockInfo, Forwards)))
+ Worklist.push_back(SuccOrPred);
+ }
+ }
}
}
void MachineSMEABI::assignBundleZAStates() {
State.BundleStates.resize(Bundles->getNumBundles());
+
for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) {
LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
// Attempt to assign a ZA state for this bundle that minimizes state
// transitions. Edges within loops are given a higher weight as we assume
// they will be executed more than once.
- // TODO: We should propagate desired incoming/outgoing states through blocks
- // that have the "ANY" state first to make better global decisions.
int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
for (unsigned BlockID : Bundles->getBlocks(I)) {
LLVM_DEBUG(dbgs() << "- bb." << BlockID);
- const BlockInfo &Block = State.Blocks[BlockID];
- if (Block.Insts.empty()) {
- LLVM_DEBUG(dbgs() << " (no state preference)\n");
- continue;
- }
+ BlockInfo &Block = State.Blocks[BlockID];
bool IsLoop = MLI && MLI->getLoopFor(MF->getBlockNumbered(BlockID));
bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I;
bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I;
@@ -411,26 +470,28 @@ void MachineSMEABI::assignBundleZAStates() {
LLVM_DEBUG(dbgs() << " IsLoop");
LLVM_DEBUG(dbgs() << " (EdgeWeight: " << EdgeWeight << ')');
- ZAState DesiredIncomingState = Block.Insts.front().NeededState;
- if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
- EdgeStateCounts[DesiredIncomingState] += EdgeWeight;
+ bool LegalInEdge =
+ InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
+ bool LegalOutEgde =
+ OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
+ if (LegalInEdge) {
LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
- << getZAStateString(DesiredIncomingState));
+ << getZAStateString(Block.DesiredIncomingState));
+ EdgeStateCounts[Block.DesiredIncomingState] += EdgeWeight;
}
- ZAState DesiredOutgoingState = Block.Insts.back().NeededState;
- if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
- EdgeStateCounts[DesiredOutgoingState] += EdgeWeight;
+ if (LegalOutEgde) {
LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
- << getZAStateString(DesiredOutgoingState));
+ << getZAStateString(Block.DesiredOutgoingState));
+ EdgeStateCounts[Block.DesiredOutgoingState] += EdgeWeight;
}
+ if (!LegalInEdge && !LegalOutEgde)
+ LLVM_DEBUG(dbgs() << " (no state preference)");
LLVM_DEBUG(dbgs() << '\n');
}
ZAState BundleState =
ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
- // Force ZA to be active in bundles that don't have a preferred state.
- // TODO: Something better here (to avoid extra mode switches).
if (BundleState == ZAState::ANY)
BundleState = ZAState::ACTIVE;
@@ -839,6 +900,10 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
collectNeededZAStates(SMEFnAttrs);
+ if (OptLevel != CodeGenOptLevel::None) {
+ for (bool Forwards : {true, false})
+ propagateDesiredStates(Forwards);
+ }
assignBundleZAStates();
insertStateChanges();
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 4479cbe2075ff..e53a250258330 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -363,7 +363,6 @@ define i64 @test_many_callee_arguments(
ret i64 %ret
}
-; FIXME: The new lowering should avoid saves/restores in the probing loop.
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
; CHECK: // %bb.0:
@@ -401,18 +400,14 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: mov x8, sp
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
+; CHECK-NEWLOWERING-NEXT: mov x0, x19
+; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
-; CHECK-NEWLOWERING-NEXT: mov x0, x19
-; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
-; CHECK-NEWLOWERING-NEXT: msr NZCV, x8
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
; CHECK-NEWLOWERING-NEXT: mov sp, x19
diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
index 1ede38bd731f9..1ffa01675aafc 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
@@ -223,65 +223,34 @@ exit:
ret void
}
-; FIXME: The codegen for this case could be improved (by tuning weights).
-; Here the ZA save has been hoisted out of the conditional, but would be better
-; to sink it.
define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-LABEL: cond_private_za_call:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: tbz w0, #0, .LBB3_4
-; CHECK-NEXT: // %bb.1: // %private_za_call
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: msr TPIDR2_EL0, x8
-; CHECK-NEXT: bl private_za_call
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB3_3
-; CHECK-NEXT: // %bb.2: // %private_za_call
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB3_3: // %private_za_call
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: .LBB3_4: // %exit
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: b shared_za_call
-;
-; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: mov x29, sp
-; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT: mov x9, sp
-; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT: mov sp, x9
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2
-; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call
-; CHECK-NEWLOWERING-NEXT: bl private_za_call
-; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit
-; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4
-; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
-; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT: b shared_za_call
+; CHECK-COMMON-LABEL: cond_private_za_call:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x29, sp
+; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: rdsvl x8, #1
+; CHECK-COMMON-NEXT: mov x9, sp
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT: tbz w0, #0, .LBB3_4
+; CHECK-COMMON-NEXT: // %bb.1: // %private_za_call
+; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT: bl private_za_call
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: cbnz x8, .LBB3_3
+; CHECK-COMMON-NEXT: // %bb.2: // %private_za_call
+; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT: .LBB3_3: // %private_za_call
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT: .LBB3_4: // %exit
+; CHECK-COMMON-NEXT: mov sp, x29
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: b shared_za_call
br i1 %cond, label %private_za_call, label %exit
private_za_call:
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index bb88142efa592..506974a14c3be 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -56,31 +56,23 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
; CHECK-NEXT: adrp x8, .L.str
; CHECK-NEXT: add x8, x8, :lo12:.L.str
; CHECK-NEXT: str x8, [x0]
-; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: .Ltmp0: // EH_LABEL
; CHECK-NEXT: adrp x1, :got:typeinfo_for_char_const_ptr
; CHECK-NEXT: mov x2, xzr
; CHECK-NEXT: ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
; CHECK-NEXT: bl __cxa_throw
-; CHECK-NEXT: .Ltmp1:
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_4
-; CHECK-NEXT: // %bb.3: // %throw_exception
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_4: // %throw_exception
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: // %bb.5: // %throw_fail
-; CHECK-NEXT: .LBB0_6: // %unwind_dtors
-; CHECK-NEXT: .Ltmp2:
+; CHECK-NEXT: .Ltmp1: // EH_LABEL
+; CHECK-NEXT: // %bb.3: // %throw_fail
+; CHECK-NEXT: .LBB0_4: // %unwind_dtors
+; CHECK-NEXT: .Ltmp2: // EH_LABEL
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_8
-; CHECK-NEXT: // %bb.7: // %unwind_dtors
+; CHECK-NEXT: cbnz x8, .LBB0_6
+; CHECK-NEXT: // %bb.5: // %unwind_dtors
; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_8: // %unwind_dtors
+; CHECK-NEXT: .LBB0_6: // %unwind_dtors
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: sub x8, x29, #16
@@ -142,11 +134,11 @@ define dso_local void @try_catch() "aarch64_inout_za" personality ptr @__gxx_per
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: .Ltmp3:
+; CHECK-NEXT: .Ltmp3: // EH_LABEL
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl may_throw
-; CHECK-NEXT: .Ltmp4:
+; CHECK-NEXT: .Ltmp4: // EH_LABEL
; CHECK-NEXT: .LBB1_1: // %after_catch
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -160,7 +152,7 @@ define dso_local void @try_catch() "aarch64_inout_za" personality ptr @__gxx_per
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: b shared_za_call
; CHECK-NEXT: .LBB1_4: // %catch
-; CHECK-NEXT: .Ltmp5:
+; CHECK-NEXT: .Ltmp5: // EH_LABEL
; CHECK-NEXT: bl __cxa_begin_catch
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
@@ -235,16 +227,16 @@ define void @try_catch_shared_za_callee() "aarch64_new_za" personality ptr @__gx
; CHECK-NEXT: zero {za}
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: smstart za
-; CHECK-NEXT: .Ltmp6:
+; CHECK-NEXT: .Ltmp6: // EH_LABEL
; CHECK-NEXT: bl shared_za_call
-; CHECK-NEXT: .Ltmp7:
+; CHECK-NEXT: .Ltmp7: // EH_LABEL
; CHECK-NEXT: .LBB2_3: // %exit
; CHECK-NEXT: smstop za
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_4: // %catch
-; CHECK-NEXT: .Ltmp8:
+; CHECK-NEXT: .Ltmp8: // EH_LABEL
; CHECK-NEXT: bl __cxa_begin_catch
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
diff --git a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
index 066ee3b040469..afd56d198d0d3 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
@@ -12,77 +12,41 @@ entry:
}
define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch64_inout_za" {
-; CHECK-LABEL: multi_bb_stpidr2_save_required:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: cbz w0, .LBB1_2
-; CHECK-NEXT: // %bb.1: // %use_b
-; CHECK-NEXT: fmov s1, #4.00000000
-; CHECK-NEXT: fadd s0, s0, s1
-; CHECK-NEXT: b .LBB1_5
-; CHECK-NEXT: .LBB1_2: // %use_c
-; CHECK-NEXT: fmov s0, s1
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: msr TPIDR2_EL0, x8
-; CHECK-NEXT: bl cosf
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB1_4
-; CHECK-NEXT: // %bb.3: // %use_c
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB1_4: // %use_c
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: .LBB1_5: // %exit
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; CHECK-NEWLOWERING-LABEL: multi_bb_stpidr2_save_required:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: mov x29, sp
-; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -8
-; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
-; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT: mov x9, sp
-; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT: mov sp, x9
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT: cbz w0, .LBB1_2
-; CHECK-NEWLOWERING-NEXT: // %bb.1: // %use_b
-; CHECK-NEWLOWERING-NEXT: fmov s1, #4.00000000
-; CHECK-NEWLOWERING-NEXT: fadd s0, s0, s1
-; CHECK-NEWLOWERING-NEXT: b .LBB1_3
-; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %use_c
-; CHECK-NEWLOWERING-NEXT: fmov s0, s1
-; CHECK-NEWLOWERING-NEXT: bl cosf
-; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %exit
-; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_5
-; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
-; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT: .LBB1_5: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-COMMON-LABEL: multi_bb_stpidr2_save_required:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x29, sp
+; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: .cfi_def_cfa w29, 16
+; CHECK-COMMON-NEXT: .cfi_offset w30, -8
+; CHECK-COMMON-NEXT: .cfi_offset w29, -16
+; CHECK-COMMON-NEXT: rdsvl x8, #1
+; CHECK-COMMON-NEXT: mov x9, sp
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT: cbz w0, .LBB1_2
+; CHECK-COMMON-NEXT: // %bb.1: // %use_b
+; CHECK-COMMON-NEXT: fmov s1, #4.00000000
+; CHECK-COMMON-NEXT: fadd s0, s0, s1
+; CHECK-COMMON-NEXT: b .LBB1_5
+; CHECK-COMMON-NEXT: .LBB1_2: // %use_c
+; CHECK-COMMON-NEXT: fmov s0, s1
+; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT: bl cosf
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: cbnz x8, .LBB1_4
+; CHECK-COMMON-NEXT: // %bb.3: // %use_c
+; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT: .LBB1_4: // %use_c
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT: .LBB1_5: // %exit
+; CHECK-COMMON-NEXT: mov sp, x29
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ret
%cmp = icmp ne i32 %a, 0
br i1 %cmp, label %use_b, label %use_c
@@ -155,7 +119,9 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
+; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
+; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: cmp sp, x9
@@ -166,9 +132,7 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
; CHECK-NEWLOWERING-NEXT: .LBB2_3:
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp]
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: cbz w0, .LBB2_5
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %use_b
; CHECK-NEWLOWERING-NEXT: fmov s1, #4.00000000
More information about the llvm-branch-commits
mailing list