[llvm] [AArch64][SME] Propagate desired ZA states in the MachineSMEABIPass (PR #149510)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 07:28:08 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This patch adds a step to the MachineSMEABIPass that propagates desired ZA states.
This aims to pick better ZA states for edge bundles, as when many (or all) blocks in a bundle do not have a preferred ZA state, the ZA state assigned to a bundle can be less than ideal.
An important case is nested loops, where only the inner loop has a preferred ZA state. Here we'd like to propagate the ZA state from the inner loop to the outer loops (to avoid saves/restores in any loop).
---
Patch is 42.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149510.diff
8 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64.h (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+3-3)
- (modified) llvm/lib/Target/AArch64/MachineSMEABIPass.cpp (+91-20)
- (modified) llvm/test/CodeGen/AArch64/sme-agnostic-za.ll (+2-7)
- (modified) llvm/test/CodeGen/AArch64/sme-za-control-flow.ll (+40-67)
- (modified) llvm/test/CodeGen/AArch64/sme-za-exceptions.ll (+14-22)
- (added) llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll (+296)
- (modified) llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll (+37-73)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 8d0ff41fc8c08..139684172f1bb 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -60,7 +60,7 @@ FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
FunctionPass *createAArch64CollectLOHPass();
FunctionPass *createSMEABIPass();
FunctionPass *createSMEPeepholeOptPass();
-FunctionPass *createMachineSMEABIPass();
+FunctionPass *createMachineSMEABIPass(CodeGenOptLevel);
ModulePass *createSVEIntrinsicOptsPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 4650b2d0c8151..1dc2ec2d01b88 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -792,8 +792,8 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
}
void AArch64PassConfig::addMachineSSAOptimization() {
- if (EnableNewSMEABILowering && TM->getOptLevel() != CodeGenOptLevel::None)
- addPass(createMachineSMEABIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableNewSMEABILowering)
+ addPass(createMachineSMEABIPass(TM->getOptLevel()));
if (TM->getOptLevel() != CodeGenOptLevel::None && EnableSMEPeepholeOpt)
addPass(createSMEPeepholeOptPass());
@@ -826,7 +826,7 @@ bool AArch64PassConfig::addILPOpts() {
void AArch64PassConfig::addPreRegAlloc() {
if (TM->getOptLevel() == CodeGenOptLevel::None && EnableNewSMEABILowering)
- addPass(createMachineSMEABIPass());
+ addPass(createMachineSMEABIPass(CodeGenOptLevel::None));
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOptLevel::None &&
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index c39a5cc2fcb16..25f23bc310681 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -176,7 +176,8 @@ getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
struct MachineSMEABI : public MachineFunctionPass {
inline static char ID = 0;
- MachineSMEABI() : MachineFunctionPass(ID) {}
+ MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
+ : MachineFunctionPass(ID), OptLevel(OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -202,6 +203,11 @@ struct MachineSMEABI : public MachineFunctionPass {
/// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
void insertStateChanges();
+ /// Propagates desired states forwards (from predecessors -> successors) if
+ /// \p Forwards, otherwise, propagates backwards (from successors ->
+ /// predecessors).
+ void propagateDesiredStates(bool Forwards = true);
+
// Emission routines for private and shared ZA functions (using lazy saves).
void emitNewZAPrologue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
@@ -276,12 +282,16 @@ struct MachineSMEABI : public MachineFunctionPass {
/// Contains the needed ZA state for each instruction in a block.
/// Instructions that do not require a ZA state are not recorded.
struct BlockInfo {
- ZAState FixedEntryState{ZAState::ANY};
SmallVector<InstInfo> Insts;
+ ZAState FixedEntryState{ZAState::ANY};
+ ZAState DesiredIncomingState{ZAState::ANY};
+ ZAState DesiredOutgoingState{ZAState::ANY};
LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
};
+ CodeGenOptLevel OptLevel = CodeGenOptLevel::Default;
+
// All pass state that must be cleared between functions.
struct PassState {
SmallVector<BlockInfo> Blocks;
@@ -299,6 +309,7 @@ struct MachineSMEABI : public MachineFunctionPass {
const AArch64FunctionInfo *AFI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ MachineLoopInfo *MLI = nullptr;
};
void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
@@ -367,51 +378,105 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
// Reverse vector (as we had to iterate backwards for liveness).
std::reverse(Block.Insts.begin(), Block.Insts.end());
+
+ // Record the desired states on entry/exit of this block. These are the
+ // states that would not incur a state transition.
+ if (!Block.Insts.empty()) {
+ Block.DesiredIncomingState = Block.Insts.front().NeededState;
+ Block.DesiredOutgoingState = Block.Insts.back().NeededState;
+ }
+ }
+}
+
+void MachineSMEABI::propagateDesiredStates(bool Forwards) {
+ // If `Forwards`, this propagates desired states from predecessors to
+ // successors, otherwise, this propagates states from successors to
+ // predecessors.
+ auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
+ return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
+ };
+
+ SmallVector<MachineBasicBlock *> Worklist;
+ for (auto [BlockID, BlockInfo] : enumerate(State.Blocks)) {
+ if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
+ Worklist.push_back(MF->getBlockNumbered(BlockID));
+ }
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
+ auto &BlockInfo = State.Blocks[MBB->getNumber()];
+
+ // Pick a legal edge bundle state that matches the majority of
+ // predecessors/successors.
+ int StateCounts[ZAState::NUM_ZA_STATE] = {0};
+ for (MachineBasicBlock *PredOrSucc :
+ Forwards ? predecessors(MBB) : successors(MBB)) {
+ auto &PredOrSuccBlockInfo = State.Blocks[PredOrSucc->getNumber()];
+ auto ZAState = GetBlockState(PredOrSuccBlockInfo, !Forwards);
+ if (isLegalEdgeBundleZAState(ZAState))
+ StateCounts[ZAState]++;
+ }
+
+ ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
+ auto &CurrentState = GetBlockState(BlockInfo, Forwards);
+ if (PropagatedState != CurrentState) {
+ CurrentState = PropagatedState;
+ auto &OtherState = GetBlockState(BlockInfo, !Forwards);
+ // Propagate to the incoming/outgoing state if that is also "ANY".
+ if (OtherState == ZAState::ANY)
+ OtherState = PropagatedState;
+ // Push any successors/predecessors that may need updating to the
+ // worklist.
+ for (MachineBasicBlock *SuccOrPred :
+ Forwards ? successors(MBB) : predecessors(MBB)) {
+ auto &SuccOrPredBlockInfo = State.Blocks[SuccOrPred->getNumber()];
+ if (!isLegalEdgeBundleZAState(
+ GetBlockState(SuccOrPredBlockInfo, Forwards)))
+ Worklist.push_back(SuccOrPred);
+ }
+ }
}
}
void MachineSMEABI::assignBundleZAStates() {
State.BundleStates.resize(Bundles->getNumBundles());
+
for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) {
LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
// Attempt to assign a ZA state for this bundle that minimizes state
// transitions. Edges within loops are given a higher weight as we assume
// they will be executed more than once.
- // TODO: We should propagate desired incoming/outgoing states through blocks
- // that have the "ANY" state first to make better global decisions.
int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
for (unsigned BlockID : Bundles->getBlocks(I)) {
LLVM_DEBUG(dbgs() << "- bb." << BlockID);
- const BlockInfo &Block = State.Blocks[BlockID];
- if (Block.Insts.empty()) {
- LLVM_DEBUG(dbgs() << " (no state preference)\n");
- continue;
- }
+ BlockInfo &Block = State.Blocks[BlockID];
bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I;
bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I;
- ZAState DesiredIncomingState = Block.Insts.front().NeededState;
- if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
- EdgeStateCounts[DesiredIncomingState]++;
+ bool LegalInEdge =
+ InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
+ bool LegalOutEgde =
+ OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
+ if (LegalInEdge) {
LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
- << getZAStateString(DesiredIncomingState));
+ << getZAStateString(Block.DesiredIncomingState));
+ EdgeStateCounts[Block.DesiredIncomingState]++;
}
- ZAState DesiredOutgoingState = Block.Insts.back().NeededState;
- if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
- EdgeStateCounts[DesiredOutgoingState]++;
+ if (LegalOutEgde) {
LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
- << getZAStateString(DesiredOutgoingState));
+ << getZAStateString(Block.DesiredOutgoingState));
+ EdgeStateCounts[Block.DesiredOutgoingState]++;
}
+ if (!LegalInEdge && !LegalOutEgde)
+ LLVM_DEBUG(dbgs() << " (no state preference)");
LLVM_DEBUG(dbgs() << '\n');
}
ZAState BundleState =
ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
- // Force ZA to be active in bundles that don't have a preferred state.
- // TODO: Something better here (to avoid extra mode switches).
if (BundleState == ZAState::ANY)
BundleState = ZAState::ACTIVE;
@@ -817,6 +882,10 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
collectNeededZAStates(SMEFnAttrs);
+ if (OptLevel != CodeGenOptLevel::None) {
+ for (bool Forwards : {true, false})
+ propagateDesiredStates(Forwards);
+ }
assignBundleZAStates();
insertStateChanges();
@@ -839,4 +908,6 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-FunctionPass *llvm::createMachineSMEABIPass() { return new MachineSMEABI(); }
+FunctionPass *llvm::createMachineSMEABIPass(CodeGenOptLevel OptLevel) {
+ return new MachineSMEABI(OptLevel);
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index a0a14f2ffae3f..077e9b5ced624 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -361,7 +361,6 @@ define i64 @test_many_callee_arguments(
ret i64 %ret
}
-; FIXME: The new lowering should avoid saves/restores in the probing loop.
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
; CHECK: // %bb.0:
@@ -399,18 +398,14 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: mov x8, sp
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
+; CHECK-NEWLOWERING-NEXT: mov x0, x19
+; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
-; CHECK-NEWLOWERING-NEXT: mov x0, x19
-; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
-; CHECK-NEWLOWERING-NEXT: msr NZCV, x8
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
; CHECK-NEWLOWERING-NEXT: mov sp, x19
diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
index 18ea07e38fe89..c753e9c569d22 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
@@ -228,65 +228,34 @@ exit:
ret void
}
-; FIXME: The codegen for this case could be improved (by tuning weights).
-; Here the ZA save has been hoisted out of the conditional, but would be better
-; to sink it.
define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-LABEL: cond_private_za_call:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: tbz w0, #0, .LBB3_4
-; CHECK-NEXT: // %bb.1: // %private_za_call
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: msr TPIDR2_EL0, x8
-; CHECK-NEXT: bl private_za_call
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB3_3
-; CHECK-NEXT: // %bb.2: // %private_za_call
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB3_3: // %private_za_call
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: .LBB3_4: // %exit
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: b shared_za_call
-;
-; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: mov x29, sp
-; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT: mov x9, sp
-; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT: mov sp, x9
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2
-; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call
-; CHECK-NEWLOWERING-NEXT: bl private_za_call
-; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit
-; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4
-; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
-; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT: b shared_za_call
+; CHECK-COMMON-LABEL: cond_private_za_call:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x29, sp
+; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: rdsvl x8, #1
+; CHECK-COMMON-NEXT: mov x9, sp
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT: tbz w0, #0, .LBB3_4
+; CHECK-COMMON-NEXT: // %bb.1: // %private_za_call
+; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT: bl private_za_call
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: cbnz x8, .LBB3_3
+; CHECK-COMMON-NEXT: // %bb.2: // %private_za_call
+; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT: .LBB3_3: // %private_za_call
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT: .LBB3_4: // %exit
+; CHECK-COMMON-NEXT: mov sp, x29
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: b shared_za_call
br i1 %cond, label %private_za_call, label %exit
private_za_call:
@@ -910,7 +879,7 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
; CHECK-NEWLOWERING-LABEL: loop_with_external_entry:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
@@ -923,23 +892,27 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader
-; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
+; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16
+; CHECK-NEWLOWERING-NEXT: b .LBB11_4
; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop
+; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
+; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB11_6
+; CHECK-NEWLOWERING-NEXT: .LBB11_4: // %loop
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEWLOWERING-NEXT: bl private_za_call
-; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3
-; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6
-; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit
+; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_3
+; CHECK-NEWLOWERING-NEXT: // %bb.5: // %loop
+; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
+; CHECK-NEWLOWERING-NEXT: b .LBB11_3
; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index bb88142efa592..506974a14c3be 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -56,31 +56,23 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
; CHECK-NEXT: adrp x8, .L.str
; CHECK-NEXT: add x8, x8, :lo12:.L.str
; CHECK-NEXT: str x8, [x0]
-; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: .Ltmp0: // EH_LABEL
; CHECK-NEXT: adrp x1, :got:typeinfo_for_char_const_ptr
; CHECK-NEXT: mov x2, xzr
; CHECK-NEXT: ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
; CHECK-NEXT: bl __cxa_throw
-; CHECK-NEXT: .Ltmp1:
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_4
-; CHECK-NEXT: // %bb.3: // %throw_exception
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_4: // %throw_exception
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: // %bb.5: // %throw_fail
-; CHECK-NEXT: .LBB0_6: // %unwind_dtors
-; CHECK-NEXT: .Ltmp2:
+; CHECK-NEXT: .Ltmp1: // EH_LABEL
+; CHECK-NEXT: // %bb.3: // %throw_fail
+; CHECK-NEXT: .LBB0_4: // %unwind_dtors
+; CHECK-NEXT: .Ltmp2: // EH_LABEL
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_8
-; CHECK-NEXT: // %bb.7: // %unwind_dtors
+; CHECK-NEXT: cbnz x8, .LBB0_6
+; CHECK-NEXT: // %bb.5: // %unwind_dtors
; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_8: // %unwind_dtors
+; CHECK-NEXT: .LBB0_6: // %unwind_dtors
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: sub x8, x29, #16
@@ -142,11 +134,11 @@ define dso_local void @try_catch() "aarch64_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/149510
More information about the llvm-commits
mailing list