[llvm-branch-commits] [llvm] release/22.x: [AArch64][SME] Limit where SME ABI optimizations apply (#179273) (PR #179473)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Feb 3 06:25:45 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This cherry-picks 79eb804. These optimizations were added shortly before the branch, however, we're concerned they're not quite ready for production use. This commit limits the optimizations to the simplest cases.
---
Patch is 32.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/179473.diff
6 Files Affected:
- (modified) llvm/lib/Target/AArch64/MachineSMEABIPass.cpp (+18-150)
- (modified) llvm/test/CodeGen/AArch64/sme-agnostic-za.ll (+27-55)
- (modified) llvm/test/CodeGen/AArch64/sme-new-za-function.ll (+4-17)
- (modified) llvm/test/CodeGen/AArch64/sme-za-control-flow.ll (+15-11)
- (modified) llvm/test/CodeGen/AArch64/sme-za-exceptions.ll (+50-19)
- (modified) llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll (+48-93)
``````````diff
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 823c754a0ac05..9b96bed823817 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -201,23 +201,6 @@ class EmitContext {
Register AgnosticZABufferPtr = AArch64::NoRegister;
};
-/// Checks if \p State is a legal edge bundle state. For a state to be a legal
-/// bundle state, it must be possible to transition from it to any other bundle
-/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED,
-/// as you can transition between those states by saving/restoring ZA. The OFF
-/// state would not be legal, as transitioning to it drops the content of ZA.
-static bool isLegalEdgeBundleZAState(ZAState State) {
- switch (State) {
- case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
- case ZAState::ACTIVE_ZT0_SAVED: // ZT0 is saved (ZA is active).
- case ZAState::LOCAL_SAVED: // ZA state may be saved on the stack.
- case ZAState::LOCAL_COMMITTED: // ZA state is saved on the stack.
- return true;
- default:
- return false;
- }
-}
-
StringRef getZAStateString(ZAState State) {
#define MAKE_CASE(V) \
case V: \
@@ -325,11 +308,6 @@ struct MachineSMEABI : public MachineFunctionPass {
const EdgeBundles &Bundles,
ArrayRef<ZAState> BundleStates);
- /// Propagates desired states forwards (from predecessors -> successors) if
- /// \p Forwards, otherwise, propagates backwards (from successors ->
- /// predecessors).
- void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
-
void emitZT0SaveRestore(EmitContext &, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsSave);
@@ -526,110 +504,36 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
PhysLiveRegsAfterSMEPrologue};
}
-void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
- bool Forwards) {
- // If `Forwards`, this propagates desired states from predecessors to
- // successors, otherwise, this propagates states from successors to
- // predecessors.
- auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
- return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
- };
-
- SmallVector<MachineBasicBlock *> Worklist;
- for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
- if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
- Worklist.push_back(MF->getBlockNumbered(BlockID));
- }
-
- while (!Worklist.empty()) {
- MachineBasicBlock *MBB = Worklist.pop_back_val();
- BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()];
-
- // Pick a legal edge bundle state that matches the majority of
- // predecessors/successors.
- int StateCounts[ZAState::NUM_ZA_STATE] = {0};
- for (MachineBasicBlock *PredOrSucc :
- Forwards ? predecessors(MBB) : successors(MBB)) {
- BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()];
- ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards);
- if (isLegalEdgeBundleZAState(ZAState))
- StateCounts[ZAState]++;
- }
-
- ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
- ZAState &CurrentState = GetBlockState(Block, Forwards);
- if (PropagatedState != CurrentState) {
- CurrentState = PropagatedState;
- ZAState &OtherState = GetBlockState(Block, !Forwards);
- // Propagate to the incoming/outgoing state if that is also "ANY".
- if (OtherState == ZAState::ANY)
- OtherState = PropagatedState;
- // Push any successors/predecessors that may need updating to the
- // worklist.
- for (MachineBasicBlock *SuccOrPred :
- Forwards ? successors(MBB) : predecessors(MBB)) {
- BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()];
- if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards)))
- Worklist.push_back(SuccOrPred);
- }
- }
- }
-}
-
/// Assigns each edge bundle a ZA state based on the needed states of blocks
-/// that have incoming or outgoing edges in that bundle.
+/// that have incoming or outgoing blocks in that bundle.
SmallVector<ZAState>
MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
const FunctionInfo &FnInfo) {
SmallVector<ZAState> BundleStates(Bundles.getNumBundles());
for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) {
- LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
-
- // Attempt to assign a ZA state for this bundle that minimizes state
- // transitions. Edges within loops are given a higher weight as we assume
- // they will be executed more than once.
- int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
+ std::optional<ZAState> BundleState;
for (unsigned BlockID : Bundles.getBlocks(I)) {
- LLVM_DEBUG(dbgs() << "- bb." << BlockID);
-
const BlockInfo &Block = FnInfo.Blocks[BlockID];
- bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
- bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
-
- bool LegalInEdge =
- InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
- bool LegalOutEgde =
- OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
- if (LegalInEdge) {
- LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
- << getZAStateString(Block.DesiredIncomingState));
- EdgeStateCounts[Block.DesiredIncomingState]++;
- }
- if (LegalOutEgde) {
- LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
- << getZAStateString(Block.DesiredOutgoingState));
- EdgeStateCounts[Block.DesiredOutgoingState]++;
- }
- if (!LegalInEdge && !LegalOutEgde)
- LLVM_DEBUG(dbgs() << " (no state preference)");
- LLVM_DEBUG(dbgs() << '\n');
+ // Check if the block is an incoming block in the bundle. Note: We skip
+ // Block.FixedEntryState != ANY to ignore EH pads (which are only
+ // reachable via exceptions).
+ if (Block.FixedEntryState != ZAState::ANY ||
+ Bundles.getBundle(BlockID, /*Out=*/false) != I)
+ continue;
+
+ // Pick a state that matches all incoming blocks. Fallback to "ACTIVE" if
+ // any blocks doesn't match. This will hoist the state from incoming
+ // blocks to outgoing blocks.
+ if (!BundleState)
+ BundleState = Block.DesiredIncomingState;
+ else if (BundleState != Block.DesiredIncomingState)
+ BundleState = ZAState::ACTIVE;
}
- ZAState BundleState =
- ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
-
- if (BundleState == ZAState::ANY)
+ if (!BundleState || BundleState == ZAState::ANY)
BundleState = ZAState::ACTIVE;
- LLVM_DEBUG({
- dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n'
- << "Edge counts:";
- for (auto [State, Count] : enumerate(EdgeStateCounts))
- dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count;
- dbgs() << "\n\n";
- });
-
- BundleStates[I] = BundleState;
+ BundleStates[I] = *BundleState;
}
return BundleStates;
@@ -1268,42 +1172,6 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
- if (OptLevel != CodeGenOptLevel::None) {
- // Propagate desired states forward, then backwards. Most of the propagation
- // should be done in the forward step, and backwards propagation is then
- // used to fill in the gaps. Note: Doing both in one step can give poor
- // results. For example, consider this subgraph:
- //
- // ┌─────┐
- // ┌─┤ BB0 ◄───┐
- // │ └─┬───┘ │
- // │ ┌─▼───◄──┐│
- // │ │ BB1 │ ││
- // │ └─┬┬──┘ ││
- // │ │└─────┘│
- // │ ┌─▼───┐ │
- // │ │ BB2 ├───┘
- // │ └─┬───┘
- // │ ┌─▼───┐
- // └─► BB3 │
- // └─────┘
- //
- // If:
- // - "BB0" and "BB2" (outer loop) has no state preference
- // - "BB1" (inner loop) desires the ACTIVE state on entry/exit
- // - "BB3" desires the LOCAL_SAVED state on entry
- //
- // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
- // then from BB2 to BB0. Which results in the inner and outer loops having
- // the "ACTIVE" state. This avoids any state changes in the loops.
- //
- // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
- // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
- // in the outer loop.
- for (bool Forwards : {true, false})
- propagateDesiredStates(FnInfo, Forwards);
- }
-
SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
EmitContext Context;
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 344f1ef24b843..4a18b9f61d69f 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -352,61 +352,33 @@ define i64 @test_many_callee_arguments(
}
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
-; CHECK-SDAG-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
-; CHECK-SDAG: // %bb.0:
-; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Spill
-; CHECK-SDAG-NEXT: mov x29, sp
-; CHECK-SDAG-NEXT: bl __arm_sme_state_size
-; CHECK-SDAG-NEXT: mov x8, sp
-; CHECK-SDAG-NEXT: sub x19, x8, x0
-; CHECK-SDAG-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
-; CHECK-SDAG-NEXT: sub sp, sp, #16, lsl #12 // =65536
-; CHECK-SDAG-NEXT: cmp sp, x19
-; CHECK-SDAG-NEXT: b.le .LBB7_3
-; CHECK-SDAG-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-SDAG-NEXT: str xzr, [sp]
-; CHECK-SDAG-NEXT: b .LBB7_1
-; CHECK-SDAG-NEXT: .LBB7_3:
-; CHECK-SDAG-NEXT: mov sp, x19
-; CHECK-SDAG-NEXT: ldr xzr, [sp]
-; CHECK-SDAG-NEXT: mov x0, x19
-; CHECK-SDAG-NEXT: bl __arm_sme_save
-; CHECK-SDAG-NEXT: bl private_za
-; CHECK-SDAG-NEXT: mov x0, x19
-; CHECK-SDAG-NEXT: bl __arm_sme_restore
-; CHECK-SDAG-NEXT: mov sp, x29
-; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-SDAG-NEXT: ret
-;
-; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: bl __arm_sme_state_size
-; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: sub x19, x8, x0
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: bl __arm_sme_save
-; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
-; CHECK-NEXT: cmp sp, x19
-; CHECK-NEXT: b.le .LBB7_3
-; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-NEXT: str xzr, [sp]
-; CHECK-NEXT: b .LBB7_1
-; CHECK-NEXT: .LBB7_3:
-; CHECK-NEXT: mov sp, x19
-; CHECK-NEXT: ldr xzr, [sp]
-; CHECK-NEXT: bl private_za
-; CHECK-NEXT: mov x0, x19
-; CHECK-NEXT: bl __arm_sme_restore
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-COMMON-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Spill
+; CHECK-COMMON-NEXT: mov x29, sp
+; CHECK-COMMON-NEXT: bl __arm_sme_state_size
+; CHECK-COMMON-NEXT: mov x8, sp
+; CHECK-COMMON-NEXT: sub x19, x8, x0
+; CHECK-COMMON-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
+; CHECK-COMMON-NEXT: sub sp, sp, #16, lsl #12 // =65536
+; CHECK-COMMON-NEXT: cmp sp, x19
+; CHECK-COMMON-NEXT: b.le .LBB7_3
+; CHECK-COMMON-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
+; CHECK-COMMON-NEXT: str xzr, [sp]
+; CHECK-COMMON-NEXT: b .LBB7_1
+; CHECK-COMMON-NEXT: .LBB7_3:
+; CHECK-COMMON-NEXT: mov sp, x19
+; CHECK-COMMON-NEXT: ldr xzr, [sp]
+; CHECK-COMMON-NEXT: mov x0, x19
+; CHECK-COMMON-NEXT: bl __arm_sme_save
+; CHECK-COMMON-NEXT: bl private_za
+; CHECK-COMMON-NEXT: mov x0, x19
+; CHECK-COMMON-NEXT: bl __arm_sme_restore
+; CHECK-COMMON-NEXT: mov sp, x29
+; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ret
call void @private_za()
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
index d2715b58439d8..6995cfae8e459 100644
--- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
+++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll
@@ -51,7 +51,6 @@ define void @private_za() "aarch64_new_za" {
}
; Note: This test must run at -O0 as otherwise the multiple exits are optimized out.
-; TODO: We should be able to omit the ZA save here (as this function does not use ZA).
define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" {
; CHECK-SDAG-LABEL: private_za_multiple_exit:
; CHECK-SDAG: // %bb.0: // %prelude
@@ -99,33 +98,21 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za"
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: cbnz x8, .LBB1_1
-; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_1: // %entry
-; CHECK-NEXT: bl __arm_tpidr2_save
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: zero {za}
-; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_2: // %entry
-; CHECK-NEXT: smstart za
; CHECK-NEXT: str w1, [sp, #8] // 4-byte Spill
; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill
; CHECK-NEXT: subs x8, x2, #1
-; CHECK-NEXT: b.ne .LBB1_4
-; CHECK-NEXT: b .LBB1_3
-; CHECK-NEXT: .LBB1_3: // %if.else
+; CHECK-NEXT: b.ne .LBB1_2
+; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: .LBB1_1: // %if.else
; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload
; CHECK-NEXT: add w0, w8, w9
-; CHECK-NEXT: smstop za
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_4: // %if.end
+; CHECK-NEXT: .LBB1_2: // %if.end
; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload
; CHECK-NEXT: subs w0, w8, w9
-; CHECK-NEXT: smstop za
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
index 50449172ce85b..aae1d3b756f4e 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
@@ -49,36 +49,40 @@ define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind {
; CHECK-LABEL: private_za_loop:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: sub x10, x29, #16
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEXT: b.lt .LBB0_3
+; CHECK-NEXT: b.lt .LBB0_5
; CHECK-NEXT: // %bb.1: // %loop.preheader
; CHECK-NEXT: mov w19, w0
+; CHECK-NEXT: sub x20, x29, #16
+; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_2: // %loop
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: cbz w19, .LBB0_5
+; CHECK-NEXT: .LBB0_3: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_call
-; CHECK-NEXT: subs w19, w19, #1
-; CHECK-NEXT: b.ne .LBB0_2
-; CHECK-NEXT: .LBB0_3: // %exit
+; CHECK-NEXT: sub w19, w19, #1
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_5
-; CHECK-NEXT: // %bb.4: // %exit
+; CHECK-NEXT: cbnz x8, .LBB0_2
+; CHECK-NEXT: // %bb.4: // %loop
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: bl __arm_tpidr2_restore
+; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_5: // %exit
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index 5243b8d7203d8..19ea1e47f84ff 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -63,17 +63,25 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
; CHECK-NEXT: ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
; CHECK-NEXT: bl __cxa_throw
; CHECK-NEXT: .Ltmp1: // EH_LABEL
-; CHECK-NEXT: // %bb.3: // %throw_fail
-; CHECK-NEXT: .LBB0_4: // %unwind_dtors
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: cbnz x8, .LBB0_4
+; CHECK-NEXT: // %bb.3: // %throw_exception
+; CHECK-NEXT: bl __arm_tpidr2_restore
+; CHECK-NEXT: .LBB0_4: // %throw_exception
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: // %bb.5: // %throw_fail
+; CHECK-NEXT: .LBB0_6: // %unwind_dtors
; CHECK-NEXT: .Ltmp2: // EH_LABEL
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_6
-; CHECK-NEXT: // %bb.5: // %unwind_dtors
+; CHECK-NEXT: cbnz x8, .LBB0_8
+; CHECK-NEXT: // %bb.7: // %unwind_dtors
; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_6: // %unwind_dtors
+; CHECK-NEXT: .LBB0_8: // %unwind_dtors
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: sub x8, x29, #16
@@ -224,15 +232,15 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl may_throw
; CHECK-NEXT: .Ltmp4: // EH_LABEL
-; CHECK-NEXT: .LBB1_1: // %after_catch
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB1_3
-; CHECK-NEXT: // %bb.2: // %after_catch
+; CHECK-NEXT: cbnz x8, .LBB1_2
+; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB1_3: // %after_catch
+; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: .LBB1_3: // %after_catch
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: b shared_za_call
@@ -251,7 +259,15 @@ define void @try_catch() "aarch64_inout_za" personality ptr @__gxx_personality_v
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl __cxa_end_catch
-; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: cbnz x8, .LBB1_8
+; CHECK-NEXT: // %bb.7: // %catch
+; CHECK-NEXT: bl __arm_tpidr2_restore
+; CHECK-NEXT: .LBB1_8: // %catch
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/179473
More information about the llvm-branch-commits
mailing list