[llvm] [AArch64][SME] Use `mrs %reg, SVCR` rather than ABI routine with `+sme` (PR #156878)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 05:54:43 PDT 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/156878
Calling `__arm_sme_state` does more than we need in most cases and clobbers both x0 and x1.
This also removes (set|get)PStateSMRegUsed, since we can directly check the virtual register instead.
>From 582d6efd23bcad37e97e55b720725447c7077e0a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 4 Sep 2025 12:49:55 +0000
Subject: [PATCH] [AArch64][SME] Use `mrs %reg, SVCR` rather than ABI routine
with `+sme`
Calling `__arm_sme_state` does more than we need in most cases and
clobbers both x0 and x1.
This also removes (set|get)PStateSMRegUsed, since we can directly check
the virtual register instead.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 19 ++++++----
.../AArch64/AArch64MachineFunctionInfo.h | 6 ---
llvm/test/CodeGen/AArch64/sme-agnostic-za.ll | 21 +++++-----
.../AArch64/sme-callee-save-restore-pairs.ll | 6 +--
.../AArch64/sme-disable-gisel-fisel.ll | 3 +-
.../CodeGen/AArch64/sme-lazy-save-call.ll | 6 +--
.../test/CodeGen/AArch64/sme-peephole-opts.ll | 6 +--
...ing-body-streaming-compatible-interface.ll | 21 ++++------
.../sme-streaming-compatible-interface.ll | 38 ++++++-------------
.../sme-streaming-mode-changes-unwindinfo.ll | 3 +-
llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll | 17 +++------
.../CodeGen/AArch64/spill-reload-remarks.ll | 2 +-
.../streaming-compatible-memory-ops.ll | 9 ++---
13 files changed, 58 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a5746684308c9..d7c248e444826 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3175,21 +3175,25 @@ MachineBasicBlock *
AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
Register ResultReg = MI.getOperand(0).getReg();
- if (FuncInfo->isPStateSMRegUsed()) {
+ if (MF->getRegInfo().use_empty(ResultReg)) {
+ // Nothing to do. Pseudo erased below.
+ } else if (Subtarget->hasSME()) {
+ BuildMI(*BB, MI, DL, TII->get(AArch64::MRS))
+ .addReg(ResultReg, RegState::Define)
+ .addImm(AArch64SysReg::SVCR)
+ .addReg(AArch64::VG, RegState::Implicit);
+ } else {
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
- BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
+ BuildMI(*BB, MI, DL, TII->get(AArch64::BL))
.addExternalSymbol(getLibcallName(LC))
.addReg(AArch64::X0, RegState::ImplicitDefine)
.addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
- BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), ResultReg)
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), ResultReg)
.addReg(AArch64::X0);
- } else {
- assert(MI.getMF()->getRegInfo().use_empty(ResultReg) &&
- "Expected no users of the entry pstate.sm!");
}
MI.eraseFromParent();
return BB;
@@ -9102,7 +9106,6 @@ SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
SmallVector<SDValue> Ops = {Chain, MSROp};
unsigned Opcode;
if (Condition != AArch64SME::Always) {
- FuncInfo->setPStateSMRegUsed(true);
Register PStateReg = FuncInfo->getPStateSMReg();
assert(PStateReg.isValid() && "PStateSM Register is invalid");
SDValue PStateSM =
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 1fde87e65a34b..1dd89c3e0abb8 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -228,9 +228,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
// on function entry to record the initial pstate of a function.
Register PStateSMReg = MCRegister::NoRegister;
- // true if PStateSMReg is used.
- bool PStateSMRegUsed = false;
-
// Has the PNReg used to build PTRUE instruction.
// The PTRUE is used for the LD/ST of ZReg pairs in save and restore.
unsigned PredicateRegForFillSpill = 0;
@@ -273,9 +270,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
Register getPStateSMReg() const { return PStateSMReg; };
void setPStateSMReg(Register Reg) { PStateSMReg = Reg; };
- unsigned isPStateSMRegUsed() const { return PStateSMRegUsed; };
- void setPStateSMRegUsed(bool Used = true) { PStateSMRegUsed = Used; };
-
bool isSVECC() const { return IsSVECC; };
void setIsSVECC(bool s) { IsSVECC = s; };
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 25a7b87d37d9e..b31ae68e87ec8 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -143,40 +143,39 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
-; CHECK-NEXT: mov x20, sp
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
-; CHECK-NEXT: tbz w19, #0, .LBB5_2
+; CHECK-NEXT: tbz w20, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
-; CHECK-NEXT: tbz w19, #0, .LBB5_4
+; CHECK-NEXT: tbz w20, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
-; CHECK-NEXT: tbz w19, #0, .LBB5_6
+; CHECK-NEXT: tbz w20, #0, .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_6:
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
-; CHECK-NEXT: tbz w19, #0, .LBB5_8
+; CHECK-NEXT: tbz w20, #0, .LBB5_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_8:
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: sub sp, x29, #64
diff --git a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
index 8d6432ced8e1d..cf42db7aa65bd 100644
--- a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
+++ b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
@@ -42,8 +42,7 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
; NOPAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; NOPAIR-NEXT: addvl sp, sp, #-1
; NOPAIR-NEXT: str z0, [sp] // 16-byte Folded Spill
-; NOPAIR-NEXT: bl __arm_sme_state
-; NOPAIR-NEXT: mov x19, x0
+; NOPAIR-NEXT: mrs x19, SVCR
; NOPAIR-NEXT: tbz w19, #0, .LBB0_2
; NOPAIR-NEXT: // %bb.1:
; NOPAIR-NEXT: smstop sm
@@ -123,8 +122,7 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; PAIR-NEXT: addvl sp, sp, #-1
; PAIR-NEXT: str z0, [sp] // 16-byte Folded Spill
-; PAIR-NEXT: bl __arm_sme_state
-; PAIR-NEXT: mov x19, x0
+; PAIR-NEXT: mrs x19, SVCR
; PAIR-NEXT: tbz w19, #0, .LBB0_2
; PAIR-NEXT: // %bb.1:
; PAIR-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 937dd417b9ec2..05d636158b92b 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -409,8 +409,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: bl __arm_sme_state
-; CHECK-COMMON-NEXT: mov x19, x0
+; CHECK-COMMON-NEXT: mrs x19, SVCR
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 67199d9c0970c..a7d51968c5157 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -155,10 +155,9 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mov x20, x0
+; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #80
@@ -205,8 +204,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80]
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state
-; CHECK-NEWLOWERING-NEXT: mov x20, x0
+; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #80
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_2
diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
index ab7c661d27187..80827c2547780 100644
--- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
@@ -63,8 +63,7 @@ define void @test2() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -95,8 +94,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
index 39ea180e7ed81..1f0581a142c4a 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
@@ -8,26 +8,24 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: tbnz w0, #0, .LBB0_2
+; CHECK-NEXT: mrs x8, SVCR
+; CHECK-NEXT: tbnz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: tbnz w0, #0, .LBB0_4
+; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ret
ret float zeroinitializer
}
@@ -40,8 +38,7 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -69,17 +66,15 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB2_2: // %entry
-; CHECK-NEXT: cbz w8, .LBB2_6
+; CHECK-NEXT: cbz w0, .LBB2_6
; CHECK-NEXT: // %bb.3: // %if.else
; CHECK-NEXT: bl streaming_compatible_callee
; CHECK-NEXT: tbnz w19, #0, .LBB2_5
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index ff4f36363edcf..9088986ee9b72 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -41,8 +41,7 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -77,8 +76,7 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -134,10 +132,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: add x8, sp, #16
-; CHECK-NEXT: mov x19, x0
-; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB4_2
@@ -209,8 +204,7 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -301,8 +295,7 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB6_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -365,8 +358,7 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB7_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -381,18 +373,16 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: tbz w8, #0, .LBB8_5
+; CHECK-NEXT: mrs x19, SVCR
+; CHECK-NEXT: tbz w0, #0, .LBB8_5
; CHECK-NEXT: // %bb.1: // %if.then
-; CHECK-NEXT: tbnz w0, #0, .LBB8_3
+; CHECK-NEXT: tbnz w19, #0, .LBB8_3
; CHECK-NEXT: // %bb.2: // %if.then
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB8_3: // %if.then
-; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: bl streaming_callee
; CHECK-NEXT: tbnz w19, #0, .LBB8_5
; CHECK-NEXT: // %bb.4: // %if.then
@@ -422,8 +412,7 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB9_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -469,19 +458,14 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x8, x1
-; CHECK-NEXT: mov x9, x0
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB10_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB10_2: // %entry
; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: mov x0, x9
; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov x1, x8
; CHECK-NEXT: bl bar
; CHECK-NEXT: tbz w19, #0, .LBB10_4
; CHECK-NEXT: // %bb.3: // %entry
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll
index 991776f11ae40..7be5e6fe29869 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll
@@ -283,8 +283,7 @@ define aarch64_sve_vector_pcs void @streaming_compatible_caller_conditional_mode
; CHECK: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * IncomingVG - 48
; CHECK: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * IncomingVG - 48
; CHECK: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * IncomingVG - 48
-; CHECK: bl __arm_sme_state
-; CHECK: mov x19, x0
+; CHECK: mrs x19, SVCR
; CHECK: tbnz w19, #0, .LBB5_2
; CHECK: smstart sm
; CHECK: .LBB5_2:
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index dec8eb0d8a936..c72077bd311b4 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -786,8 +786,7 @@ define void @streaming_compatible_to_streaming() #4 {
; CHECK-NEXT: .cfi_offset b13, -80
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB6_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -842,8 +841,7 @@ define void @streaming_compatible_to_streaming() #4 {
; FP-CHECK-NEXT: .cfi_offset b13, -80
; FP-CHECK-NEXT: .cfi_offset b14, -88
; FP-CHECK-NEXT: .cfi_offset b15, -96
-; FP-CHECK-NEXT: bl __arm_sme_state
-; FP-CHECK-NEXT: mov x19, x0
+; FP-CHECK-NEXT: mrs x19, SVCR
; FP-CHECK-NEXT: tbnz w19, #0, .LBB6_2
; FP-CHECK-NEXT: // %bb.1:
; FP-CHECK-NEXT: smstart sm
@@ -905,8 +903,7 @@ define void @streaming_compatible_to_non_streaming() #4 {
; CHECK-NEXT: .cfi_offset b13, -80
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB7_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -961,8 +958,7 @@ define void @streaming_compatible_to_non_streaming() #4 {
; FP-CHECK-NEXT: .cfi_offset b13, -80
; FP-CHECK-NEXT: .cfi_offset b14, -88
; FP-CHECK-NEXT: .cfi_offset b15, -96
-; FP-CHECK-NEXT: bl __arm_sme_state
-; FP-CHECK-NEXT: mov x19, x0
+; FP-CHECK-NEXT: mrs x19, SVCR
; FP-CHECK-NEXT: tbz w19, #0, .LBB7_2
; FP-CHECK-NEXT: // %bb.1:
; FP-CHECK-NEXT: smstop sm
@@ -1033,14 +1029,11 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
; NO-SVE-CHECK-NEXT: .cfi_offset b13, -80
; NO-SVE-CHECK-NEXT: .cfi_offset b14, -88
; NO-SVE-CHECK-NEXT: .cfi_offset b15, -96
-; NO-SVE-CHECK-NEXT: mov w8, w0
-; NO-SVE-CHECK-NEXT: bl __arm_sme_state
-; NO-SVE-CHECK-NEXT: mov x19, x0
+; NO-SVE-CHECK-NEXT: mrs x19, SVCR
; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_2
; NO-SVE-CHECK-NEXT: // %bb.1:
; NO-SVE-CHECK-NEXT: smstart sm
; NO-SVE-CHECK-NEXT: .LBB8_2:
-; NO-SVE-CHECK-NEXT: mov w0, w8
; NO-SVE-CHECK-NEXT: bl streaming_callee_with_arg
; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_4
; NO-SVE-CHECK-NEXT: // %bb.3:
diff --git a/llvm/test/CodeGen/AArch64/spill-reload-remarks.ll b/llvm/test/CodeGen/AArch64/spill-reload-remarks.ll
index 6c248048e682f..a23854759d688 100644
--- a/llvm/test/CodeGen/AArch64/spill-reload-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/spill-reload-remarks.ll
@@ -2,7 +2,7 @@
; We should have both spill and reload for %arg.
-; CHECK: remark: <unknown>:0:0: 2 spills 2.000000e+00 total spills cost 3 reloads 3.000000e+00 total reloads cost 1 virtual registers copies 1.000000e+00 total copies cost generated in function
+; CHECK: remark: <unknown>:0:0: 2 spills 2.000000e+00 total spills cost 3 reloads 3.000000e+00 total reloads cost generated in function
define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x 2 x i1> %arg) "aarch64_pstate_sm_compatible" nounwind #0 {
%res = call <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1> %arg)
%and = and <vscale x 2 x i1> %res, %arg
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
index 7ad95429949a0..6021f9fab2cdd 100644
--- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -169,15 +169,14 @@ define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NO-SME-ROUTINES-LABEL: sc_memcpy:
; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry
; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst
+; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src
+; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NO-SME-ROUTINES-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NO-SME-ROUTINES-NEXT: bl __arm_sme_state
-; CHECK-NO-SME-ROUTINES-NEXT: mov x19, x0
-; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst
-; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src
+; CHECK-NO-SME-ROUTINES-NEXT: mrs x19, SVCR
; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst]
; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src]
; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_2
More information about the llvm-commits
mailing list