[llvm] [AArch64][SME] Conditionally do smstart/smstop (PR #77113)
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 16 03:19:07 PST 2024
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/77113
>From fbcac4136ba869820c2fa477e919e54b2151fa6f Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Thu, 4 Jan 2024 16:37:12 +0000
Subject: [PATCH 1/6] [AArch64][SME] Conditionally do smstart/smstop
This patch adds conditional enabling/disabling of streaming mode
for functions which have both the aarch64_pstate_sm_compatible
and aarch64_pstate_sm_body attributes.
This combination allows callees to determine
if switching streaming mode is required instead
of relying on the caller.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 37 +++-
.../AArch64/AArch64MachineFunctionInfo.h | 5 +
...sme-streaming-body-streaming-compatible.ll | 177 ++++++++++++++++++
3 files changed, 209 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 50658a855cfb370..2a133e8e63b9176 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4855,14 +4855,14 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
SMEAttrs Attrs, SDLoc DL,
EVT VT) const {
- if (Attrs.hasStreamingInterfaceOrBody())
+ if (Attrs.hasStreamingInterfaceOrBody() &&
+ !Attrs.hasStreamingCompatibleInterface())
return DAG.getConstant(1, DL, VT);
- if (Attrs.hasNonStreamingInterfaceAndBody())
+ if (Attrs.hasNonStreamingInterfaceAndBody() &&
+ !Attrs.hasStreamingCompatibleInterface())
return DAG.getConstant(0, DL, VT);
- assert(Attrs.hasStreamingCompatibleInterface() && "Unexpected interface");
-
SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
getPointerTy(DAG.getDataLayout()));
Type *Int64Ty = Type::getInt64Ty(*DAG.getContext());
@@ -6888,9 +6888,18 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Insert the SMSTART if this is a locally streaming function and
// make sure it is Glued to the last CopyFromReg value.
if (IsLocallyStreaming) {
- Chain =
- changeStreamingMode(DAG, DL, /*Enable*/ true, DAG.getRoot(), Glue,
- DAG.getConstant(0, DL, MVT::i64), /*Entry*/ true);
+ SDValue PStateSM;
+ if (Attrs.hasStreamingCompatibleInterface()) {
+ PStateSM = getPStateSM(DAG, Chain, Attrs, DL, MVT::i64);
+ Register Reg = MF.getRegInfo().createVirtualRegister(
+ getRegClassFor(PStateSM.getValueType().getSimpleVT()));
+ FuncInfo->setPStateSMReg(Reg);
+ Chain = DAG.getCopyToReg(Chain, DL, Reg, PStateSM);
+ } else {
+ PStateSM = DAG.getConstant(0, DL, MVT::i64);
+ }
+ Chain = changeStreamingMode(DAG, DL, /*Enable*/ true, Chain, Glue, PStateSM,
+ /*Entry*/ true);
// Ensure that the SMSTART happens after the CopyWithChain such that its
// chain result is used.
@@ -8200,10 +8209,18 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Emit SMSTOP before returning from a locally streaming function
SMEAttrs FuncAttrs(MF.getFunction());
+
if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
- Chain = changeStreamingMode(
- DAG, DL, /*Enable*/ false, Chain, /*Glue*/ SDValue(),
- DAG.getConstant(1, DL, MVT::i64), /*Entry*/ true);
+ SDValue PStateSM;
+ if (FuncAttrs.hasStreamingCompatibleInterface()) {
+ Register Reg = FuncInfo->getPStateSMReg();
+ assert(Reg.isValid() && "PStateSM Register is invalid");
+ PStateSM = DAG.getCopyFromReg(Chain, DL, Reg, MVT::i64);
+ } else {
+ PStateSM = DAG.getConstant(1, DL, MVT::i64);
+ }
+ Chain = changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
+ /*Glue*/ SDValue(), PStateSM, /*Entry*/ true);
Glue = Chain.getValue(1);
}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index cd4a18bfbc23a89..096fde364a2dcc8 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -208,6 +208,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
int64_t StackProbeSize = 0;
+ Register PStateSMReg = MCRegister::NoRegister;
+
public:
AArch64FunctionInfo(const Function &F, const AArch64Subtarget *STI);
@@ -216,6 +218,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
const override;
+ Register getPStateSMReg() const { return PStateSMReg; };
+ void setPStateSMReg(Register Reg) { PStateSMReg = Reg; };
+
bool isSVECC() const { return IsSVECC; };
void setIsSVECC(bool s) { IsSVECC = s; };
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
new file mode 100644
index 000000000000000..67f4548e09f561f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
@@ -0,0 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
+
+declare void @normal_callee();
+declare void @streaming_callee() "aarch64_pstate_sm_enabled";
+declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
+
+define float @sm_body_sm_compatible_caller_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sm_body_sm_compatible_caller_simple:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x8, x0, #0x1
+; CHECK-NEXT: tbnz w8, #0, .LBB0_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: tbz w8, #0, .LBB0_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: fmov s0, wzr
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ ret float zeroinitializer
+}
+
+define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: .cfi_offset b8, -40
+; CHECK-NEXT: .cfi_offset b9, -48
+; CHECK-NEXT: .cfi_offset b10, -56
+; CHECK-NEXT: .cfi_offset b11, -64
+; CHECK-NEXT: .cfi_offset b12, -72
+; CHECK-NEXT: .cfi_offset b13, -80
+; CHECK-NEXT: .cfi_offset b14, -88
+; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x19, x0, #0x1
+; CHECK-NEXT: tbnz w19, #0, .LBB1_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x20, x0, #0x1
+; CHECK-NEXT: tbz w20, #0, .LBB1_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT: bl normal_callee
+; CHECK-NEXT: tbz w20, #0, .LBB1_6
+; CHECK-NEXT: // %bb.5:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB1_6:
+; CHECK-NEXT: tbz w19, #0, .LBB1_8
+; CHECK-NEXT: // %bb.7:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB1_8:
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @normal_callee()
+ ret void
+}
+
+define void @sm_body_caller_sm_compatible_caller_streaming_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sm_body_caller_sm_compatible_caller_streaming_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x19, x0, #0x1
+; CHECK-NEXT: tbnz w19, #0, .LBB2_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: bl streaming_callee
+; CHECK-NEXT: tbz w19, #0, .LBB2_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @streaming_callee()
+ ret void
+}
+
+define void @sm_body_caller_sm_compatible_caller_streaming_compatible_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sm_body_caller_sm_compatible_caller_streaming_compatible_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x19, x0, #0x1
+; CHECK-NEXT: tbnz w19, #0, .LBB3_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: bl streaming_compatible_callee
+; CHECK-NEXT: tbz w19, #0, .LBB3_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ call void @streaming_compatible_callee()
+ ret void
+}
>From 3f08c6fc947296017d6955b748ed5ef85cca1fcb Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Fri, 5 Jan 2024 15:59:06 +0000
Subject: [PATCH 2/6] fix whitespace + test name
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 1 -
.../AArch64/sme-streaming-body-streaming-compatible.ll | 4 ++--
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2a133e8e63b9176..72ae574efb6191f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8209,7 +8209,6 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Emit SMSTOP before returning from a locally streaming function
SMEAttrs FuncAttrs(MF.getFunction());
-
if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
SDValue PStateSM;
if (FuncAttrs.hasStreamingCompatibleInterface()) {
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
index 67f4548e09f561f..16375041e4298f0 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
@@ -5,8 +5,8 @@ declare void @normal_callee();
declare void @streaming_callee() "aarch64_pstate_sm_enabled";
declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
-define float @sm_body_sm_compatible_caller_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
-; CHECK-LABEL: sm_body_sm_compatible_caller_simple:
+define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
>From 3c5d605d425f2e49d4d1dbb30596d8f6c77e89ab Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Tue, 9 Jan 2024 13:32:21 +0000
Subject: [PATCH 3/6] Add multi basic block test
Remove redundant __arm_sme_state calls
---
.../Target/AArch64/AArch64ISelLowering.cpp | 13 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 +-
.../AArch64/Utils/AArch64SMEAttributes.cpp | 10 ++
...sme-streaming-body-streaming-compatible.ll | 117 +++++++-----------
4 files changed, 66 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 72ae574efb6191f..4bfe968cc53671b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4852,15 +4852,14 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
}
-SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain,
- SMEAttrs Attrs, SDLoc DL,
- EVT VT) const {
- if (Attrs.hasStreamingInterfaceOrBody() &&
- !Attrs.hasStreamingCompatibleInterface())
+SDValue AArch64TargetLowering::getPStateSM(
+ SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs, SDLoc DL, EVT VT,
+ bool AllowStreamingCompatibleInterface) const {
+ if (Attrs.hasStreamingInterfaceOrBody() && !AllowStreamingCompatibleInterface)
return DAG.getConstant(1, DL, VT);
if (Attrs.hasNonStreamingInterfaceAndBody() &&
- !Attrs.hasStreamingCompatibleInterface())
+ !AllowStreamingCompatibleInterface)
return DAG.getConstant(0, DL, VT);
SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
@@ -6890,7 +6889,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (IsLocallyStreaming) {
SDValue PStateSM;
if (Attrs.hasStreamingCompatibleInterface()) {
- PStateSM = getPStateSM(DAG, Chain, Attrs, DL, MVT::i64);
+ PStateSM = getPStateSM(DAG, Chain, Attrs, DL, MVT::i64, true);
Register Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(PStateSM.getValueType().getSimpleVT()));
FuncInfo->setPStateSMReg(Reg);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6ddbcd41dcb7696..1228558e628ae23 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1292,7 +1292,8 @@ class AArch64TargetLowering : public TargetLowering {
// Returns the runtime value for PSTATE.SM. When the function is streaming-
// compatible, this generates a call to __arm_sme_state.
SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs,
- SDLoc DL, EVT VT) const;
+ SDLoc DL, EVT VT,
+ bool AllowStreamingCompatibleInterface = false) const;
bool preferScalarizeSplat(SDNode *N) const override;
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 0082b4017986c6f..65006afe7ed681d 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -65,6 +65,16 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
std::optional<bool>
SMEAttrs::requiresSMChange(const SMEAttrs &Callee,
bool BodyOverridesInterface) const {
+ // If the caller has a streaming body and streaming compatible interface,
+ // we will have already conditionally enabled streaming mode on function
+ // entry. We need to disable streaming mode when a callee does not have A
+ // streaming interface, body, or streaming compatible interface.
+ if (hasStreamingBody() && hasStreamingCompatibleInterface())
+ return (!Callee.hasStreamingInterfaceOrBody() &&
+ !Callee.hasStreamingCompatibleInterface())
+ ? std::optional<bool>(false)
+ : std::nullopt;
+
// If the transition is not through a call (e.g. when considering inlining)
// and Callee has a streaming body, then we can ignore the interface of
// Callee.
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
index 16375041e4298f0..d1cce37bdd6ca93 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
@@ -46,59 +46,6 @@ define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aar
define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -32
-; CHECK-NEXT: .cfi_offset b8, -40
-; CHECK-NEXT: .cfi_offset b9, -48
-; CHECK-NEXT: .cfi_offset b10, -56
-; CHECK-NEXT: .cfi_offset b11, -64
-; CHECK-NEXT: .cfi_offset b12, -72
-; CHECK-NEXT: .cfi_offset b13, -80
-; CHECK-NEXT: .cfi_offset b14, -88
-; CHECK-NEXT: .cfi_offset b15, -96
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbnz w19, #0, .LBB1_2
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: smstart sm
-; CHECK-NEXT: .LBB1_2:
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: and x20, x0, #0x1
-; CHECK-NEXT: tbz w20, #0, .LBB1_4
-; CHECK-NEXT: // %bb.3:
-; CHECK-NEXT: smstop sm
-; CHECK-NEXT: .LBB1_4:
-; CHECK-NEXT: bl normal_callee
-; CHECK-NEXT: tbz w20, #0, .LBB1_6
-; CHECK-NEXT: // %bb.5:
-; CHECK-NEXT: smstart sm
-; CHECK-NEXT: .LBB1_6:
-; CHECK-NEXT: tbz w19, #0, .LBB1_8
-; CHECK-NEXT: // %bb.7:
-; CHECK-NEXT: smstop sm
-; CHECK-NEXT: .LBB1_8:
-; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
-; CHECK-NEXT: ret
- call void @normal_callee()
- ret void
-}
-
-define void @sm_body_caller_sm_compatible_caller_streaming_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
-; CHECK-LABEL: sm_body_caller_sm_compatible_caller_streaming_callee:
-; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
@@ -117,28 +64,31 @@ define void @sm_body_caller_sm_compatible_caller_streaming_callee() "aarch64_pst
; CHECK-NEXT: .cfi_offset b15, -80
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbnz w19, #0, .LBB2_2
+; CHECK-NEXT: tbnz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: bl streaming_callee
-; CHECK-NEXT: tbz w19, #0, .LBB2_4
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: bl normal_callee
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: tbz w19, #0, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
-; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
- call void @streaming_callee()
+ call void @normal_callee()
ret void
}
-define void @sm_body_caller_sm_compatible_caller_streaming_compatible_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
-; CHECK-LABEL: sm_body_caller_sm_compatible_caller_streaming_compatible_callee:
-; CHECK: // %bb.0:
+; Function Attrs: nounwind uwtable vscale_range(1,16)
+define void @foo(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: foo:
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
@@ -155,23 +105,52 @@ define void @sm_body_caller_sm_compatible_caller_streaming_compatible_callee() "
; CHECK-NEXT: .cfi_offset b13, -64
; CHECK-NEXT: .cfi_offset b14, -72
; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: tbnz w19, #0, .LBB3_2
-; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: tbnz w19, #0, .LBB2_2
+; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: .LBB2_2: // %entry
+; CHECK-NEXT: cbz w8, .LBB2_6
+; CHECK-NEXT: // %bb.3: // %if.else
; CHECK-NEXT: bl streaming_compatible_callee
-; CHECK-NEXT: tbz w19, #0, .LBB3_4
-; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: tbz w19, #0, .LBB2_5
+; CHECK-NEXT: // %bb.4: // %if.else
; CHECK-NEXT: smstop sm
-; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: .LBB2_5: // %if.else
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
- call void @streaming_compatible_callee()
+; CHECK-NEXT: .LBB2_6: // %if.then
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: bl normal_callee
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: tbz w19, #0, .LBB2_8
+; CHECK-NEXT: // %bb.7: // %if.then
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB2_8: // %if.then
+; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %cmp = icmp eq i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ tail call void @normal_callee()
+ br label %return
+
+if.else: ; preds = %entry
+ tail call void @streaming_compatible_callee()
+ br label %return
+
+return: ; preds = %if.else, %if.then
ret void
}
>From b1fdadb3d44204a9a5ea72747bb90c391482936e Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Tue, 9 Jan 2024 13:59:38 +0000
Subject: [PATCH 4/6] Rename temporary test name
---
.../AArch64/sme-streaming-body-streaming-compatible.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
index d1cce37bdd6ca93..1fa1c39da432026 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
@@ -86,8 +86,8 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate
}
; Function Attrs: nounwind uwtable vscale_range(1,16)
-define void @foo(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
-; CHECK-LABEL: foo:
+define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+; CHECK-LABEL: streaming_body_and_streaming_compatible_interface_multi_basic_block:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
>From ab9d8208230430ab9d069f748c6b69cab6119298 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 15 Jan 2024 15:11:21 +0000
Subject: [PATCH 5/6] Fix inverted cbz condition
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 ++++++++-------
.../sme-streaming-body-streaming-compatible.ll | 8 ++++----
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4bfe968cc53671b..52603b3924d0c58 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8209,16 +8209,17 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Emit SMSTOP before returning from a locally streaming function
SMEAttrs FuncAttrs(MF.getFunction());
if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
- SDValue PStateSM;
if (FuncAttrs.hasStreamingCompatibleInterface()) {
Register Reg = FuncInfo->getPStateSMReg();
assert(Reg.isValid() && "PStateSM Register is invalid");
- PStateSM = DAG.getCopyFromReg(Chain, DL, Reg, MVT::i64);
- } else {
- PStateSM = DAG.getConstant(1, DL, MVT::i64);
- }
- Chain = changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
- /*Glue*/ SDValue(), PStateSM, /*Entry*/ true);
+ SDValue PStateSM = DAG.getCopyFromReg(Chain, DL, Reg, MVT::i64);
+ Chain =
+ changeStreamingMode(DAG, DL, /*Enable*/ false, Chain,
+ /*Glue*/ SDValue(), PStateSM, /*Entry*/ false);
+ } else
+ Chain = changeStreamingMode(
+ DAG, DL, /*Enable*/ false, Chain,
+ /*Glue*/ SDValue(), DAG.getConstant(1, DL, MVT::i64), /*Entry*/ true);
Glue = Chain.getValue(1);
}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
index 1fa1c39da432026..4b1c84d096c5cb5 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
@@ -29,7 +29,7 @@ define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aar
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: tbz w8, #0, .LBB0_4
+; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
@@ -71,7 +71,7 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl normal_callee
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: tbz w19, #0, .LBB1_4
+; CHECK-NEXT: tbnz w19, #0, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB1_4:
@@ -115,7 +115,7 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block
; CHECK-NEXT: cbz w8, .LBB2_6
; CHECK-NEXT: // %bb.3: // %if.else
; CHECK-NEXT: bl streaming_compatible_callee
-; CHECK-NEXT: tbz w19, #0, .LBB2_5
+; CHECK-NEXT: tbnz w19, #0, .LBB2_5
; CHECK-NEXT: // %bb.4: // %if.else
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB2_5: // %if.else
@@ -129,7 +129,7 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl normal_callee
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: tbz w19, #0, .LBB2_8
+; CHECK-NEXT: tbnz w19, #0, .LBB2_8
; CHECK-NEXT: // %bb.7: // %if.then
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB2_8: // %if.then
>From 9b79d657688178281a222a3f54c4de6ae590bc88 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Tue, 16 Jan 2024 11:15:59 +0000
Subject: [PATCH 6/6] Add nounwind to tests & rename getPStateSM
---
.../Target/AArch64/AArch64ISelLowering.cpp | 22 +++++-----
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 5 +--
...sme-streaming-body-streaming-compatible.ll | 40 ++-----------------
3 files changed, 16 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 52603b3924d0c58..a6be926ac553804 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4852,16 +4852,9 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
}
-SDValue AArch64TargetLowering::getPStateSM(
- SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs, SDLoc DL, EVT VT,
- bool AllowStreamingCompatibleInterface) const {
- if (Attrs.hasStreamingInterfaceOrBody() && !AllowStreamingCompatibleInterface)
- return DAG.getConstant(1, DL, VT);
-
- if (Attrs.hasNonStreamingInterfaceAndBody() &&
- !AllowStreamingCompatibleInterface)
- return DAG.getConstant(0, DL, VT);
-
+SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG,
+ SDValue Chain, SDLoc DL,
+ EVT VT) const {
SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
getPointerTy(DAG.getDataLayout()));
Type *Int64Ty = Type::getInt64Ty(*DAG.getContext());
@@ -6889,7 +6882,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (IsLocallyStreaming) {
SDValue PStateSM;
if (Attrs.hasStreamingCompatibleInterface()) {
- PStateSM = getPStateSM(DAG, Chain, Attrs, DL, MVT::i64, true);
+ PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
Register Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(PStateSM.getValueType().getSimpleVT()));
FuncInfo->setPStateSMReg(Reg);
@@ -7656,7 +7649,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
std::optional<bool> RequiresSMChange =
CallerAttrs.requiresSMChange(CalleeAttrs);
if (RequiresSMChange) {
- PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
+ if (CallerAttrs.hasStreamingInterfaceOrBody())
+ PStateSM = DAG.getConstant(1, DL, MVT::i64);
+ else if (CallerAttrs.hasNonStreamingInterface())
+ PStateSM = DAG.getConstant(0, DL, MVT::i64);
+ else
+ PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1228558e628ae23..66a6ac16acc36ed 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1291,9 +1291,8 @@ class AArch64TargetLowering : public TargetLowering {
// Returns the runtime value for PSTATE.SM. When the function is streaming-
// compatible, this generates a call to __arm_sme_state.
- SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs,
- SDLoc DL, EVT VT,
- bool AllowStreamingCompatibleInterface = false) const;
+ SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
+ EVT VT) const;
bool preferScalarizeSplat(SDNode *N) const override;
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
index 4b1c84d096c5cb5..d67573384ca959b 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible.ll
@@ -5,7 +5,7 @@ declare void @normal_callee();
declare void @streaming_callee() "aarch64_pstate_sm_enabled";
declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
-define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
@@ -13,16 +13,6 @@ define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aar
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: .cfi_offset b10, -40
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: .cfi_offset b12, -56
-; CHECK-NEXT: .cfi_offset b13, -64
-; CHECK-NEXT: .cfi_offset b14, -72
-; CHECK-NEXT: .cfi_offset b15, -80
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: tbnz w8, #0, .LBB0_2
@@ -43,7 +33,7 @@ define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aar
ret float zeroinitializer
}
-define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
@@ -51,17 +41,6 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: .cfi_offset b10, -40
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: .cfi_offset b12, -56
-; CHECK-NEXT: .cfi_offset b13, -64
-; CHECK-NEXT: .cfi_offset b14, -72
-; CHECK-NEXT: .cfi_offset b15, -80
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz w19, #0, .LBB1_2
@@ -86,26 +65,15 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate
}
; Function Attrs: nounwind uwtable vscale_range(1,16)
-define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
+define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: streaming_body_and_streaming_compatible_interface_multi_basic_block:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 80
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_offset b8, -24
-; CHECK-NEXT: .cfi_offset b9, -32
-; CHECK-NEXT: .cfi_offset b10, -40
-; CHECK-NEXT: .cfi_offset b11, -48
-; CHECK-NEXT: .cfi_offset b12, -56
-; CHECK-NEXT: .cfi_offset b13, -64
-; CHECK-NEXT: .cfi_offset b14, -72
-; CHECK-NEXT: .cfi_offset b15, -80
-; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
More information about the llvm-commits
mailing list