[llvm-branch-commits] [llvm] Cherry pick f314e12 into release/19.x (PR #117695)
Sander de Smalen via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Nov 26 04:19:02 PST 2024
https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/117695
>From de526e5893e901c350fc9bd6d8013d7d1dbd42c6 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Tue, 13 Aug 2024 00:39:14 -0700
Subject: [PATCH 1/3] [AArch64][Darwin][SME] Don't try to save VG to the stack
for unwinding.
On Darwin we don't have any hardware that has SVE support, only SME.
Therefore we don't need to save VG for unwinders and can safely omit it.
This also fixes crashes introduced since this feature landed since Darwin's
compact unwind code can't handle the presence of VG anyway.
rdar://131072344
---
.../Target/AArch64/AArch64FrameLowering.cpp | 24 ++-
.../Target/AArch64/AArch64ISelLowering.cpp | 19 ++-
.../CodeGen/AArch64/sme-darwin-no-sve-vg.ll | 161 ++++++++++++++++++
3 files changed, 189 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87e057a468afd6..83d9dd17259733 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
}
+static bool requiresSaveVG(MachineFunction &MF) {
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // For Darwin platforms we don't save VG for non-SVE functions, even if SME
+ // is enabled with streaming mode changes.
+ if (!AFI->hasStreamingModeChanges())
+ return false;
+ auto &ST = MF.getSubtarget<AArch64Subtarget>();
+ if (ST.isTargetDarwin())
+ return ST.hasSVE();
+ return true;
+}
+
bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
unsigned Opc = MBBI->getOpcode();
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// functions, we need to do this for both the streaming and non-streaming
// vector length. Move past these instructions if necessary.
MachineFunction &MF = *MBB.getParent();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
while (isVGInstruction(MBBI))
++MBBI;
@@ -1937,7 +1948,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
// Move past instructions generated to calculate VG
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
while (isVGInstruction(MBBI))
++MBBI;
@@ -3720,7 +3731,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// non-streaming VG value.
const Function &F = MF.getFunction();
SMEAttrs Attrs(F);
- if (AFI->hasStreamingModeChanges()) {
+ if (requiresSaveVG(MF)) {
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
CSStackSize += 16;
else
@@ -3873,7 +3884,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Insert VG into the list of CSRs, immediately before LR if saved.
- if (AFI->hasStreamingModeChanges()) {
+ if (requiresSaveVG(MF)) {
std::vector<CalleeSavedInfo> VGSaves;
SMEAttrs Attrs(MF.getFunction());
@@ -4602,10 +4613,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS = nullptr) const {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
for (auto &BB : MF)
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
II = emitVGSaveRestore(II, this);
if (StackTaggingMergeSetTag)
II = tryMergeAdjacentSTG(II, this, RS);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62078822c89b18..ef2789e96213b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue InGlue;
if (RequiresSMChange) {
-
- Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
- DAG.getVTList(MVT::Other, MVT::Glue), Chain);
- InGlue = Chain.getValue(1);
+ if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+ Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+ InGlue = Chain.getValue(1);
+ }
SDValue NewChain = changeStreamingMode(
DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Result = changeStreamingMode(
DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
- InGlue = Result.getValue(1);
- Result =
- DAG.getNode(AArch64ISD::VG_RESTORE, DL,
- DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+ if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+ InGlue = Result.getValue(1);
+ Result =
+ DAG.getNode(AArch64ISD::VG_RESTORE, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+ }
}
if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
new file mode 100644
index 00000000000000..36a300fea25e5a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Check we don't crash on Darwin and that we don't try to save VG
+; when only SME (and not SVE) is enabled.
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: bl __ZL9sme_crashv
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: mov w0, #0 ; =0x0
+; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+entry:
+ tail call fastcc void @_ZL9sme_crashv() #4
+ ret i32 0
+}
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
+; CHECK-LABEL: _ZL9sme_crashv:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #80
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w27, -24
+; CHECK-NEXT: .cfi_offset w28, -32
+; CHECK-NEXT: .cfi_offset b8, -40
+; CHECK-NEXT: .cfi_offset b9, -48
+; CHECK-NEXT: .cfi_offset b10, -56
+; CHECK-NEXT: .cfi_offset b11, -64
+; CHECK-NEXT: .cfi_offset b12, -72
+; CHECK-NEXT: .cfi_offset b13, -80
+; CHECK-NEXT: .cfi_offset b14, -88
+; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: sub x9, sp, #160
+; CHECK-NEXT: and sp, x9, #0xffffffffffffff00
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, ___stack_chk_guard at GOTPAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard at GOTPAGEOFF]
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: ldr x8, [x8]
+; CHECK-NEXT: str x8, [sp, #152]
+; CHECK-NEXT: mov z0.b, #0 ; =0x0
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [sp, #152]
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: adrp x9, ___stack_chk_guard at GOTPAGE
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard at GOTPAGEOFF]
+; CHECK-NEXT: Lloh5:
+; CHECK-NEXT: ldr x9, [x9]
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: b.ne LBB1_2
+; CHECK-NEXT: ; %bb.1: ; %entry
+; CHECK-NEXT: sub sp, x29, #80
+; CHECK-NEXT: .cfi_def_cfa wsp, 96
+; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: .cfi_restore w27
+; CHECK-NEXT: .cfi_restore w28
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB1_2: ; %entry
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: bl ___stack_chk_fail
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
+; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
+entry:
+ %uu = alloca [16 x float], align 256
+ call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
+ call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
+ call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
+ ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
+attributes #5 = { nounwind }
>From 2d00d4469d902ac8dff86d752591bd0d8f01a1f4 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 19 Aug 2024 10:17:10 +0100
Subject: [PATCH 2/3] [AArch64][SME] Return false from
produceCompactUnwindFrame if VG save required. (#104588)
The compact unwind format requires all registers are stored in pairs, so
return false from produceCompactUnwindFrame if we require saving VG.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 3 +-
.../test/CodeGen/AArch64/sme-darwin-sve-vg.ll | 55 +++++++++++++++++++
2 files changed, 57 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 83d9dd17259733..c09c1792e98982 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2859,7 +2859,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
- MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
+ MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
+ !requiresSaveVG(MF);
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
new file mode 100644
index 00000000000000..c32e9cbc053939
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+declare void @normal_callee();
+
+define void @locally_streaming_fn() #0 {
+; CHECK-LABEL: locally_streaming_fn:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: rdsvl x9, #1
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: lsr x9, x9, #3
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset vg, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: .cfi_offset b8, -40
+; CHECK-NEXT: .cfi_offset b9, -48
+; CHECK-NEXT: .cfi_offset b10, -56
+; CHECK-NEXT: .cfi_offset b11, -64
+; CHECK-NEXT: .cfi_offset b12, -72
+; CHECK-NEXT: .cfi_offset b13, -80
+; CHECK-NEXT: .cfi_offset b14, -88
+; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .cfi_offset vg, -24
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: bl _normal_callee
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .cfi_restore vg
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] ; 8-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+ call void @normal_callee()
+ ret void
+}
+
+attributes #0 = { "aarch64_pstate_sm_body" uwtable(async) }
>From c9b6dec06de9fbb6997ce0aa1aaba13ab4435113 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 15 Oct 2024 11:56:40 +0100
Subject: [PATCH 3/3] [AArch64][SME] Fix iterator to
fixupCalleeSaveRestoreStackOffset (#110855)
The iterator passed to `fixupCalleeSaveRestoreStackOffset` may be
incorrect when it tries to skip over the instructions that get the
current value of 'vg', when there is a 'rdsvl' instruction straight
after the prologue. That's because it doesn't check that the instruction
is still a 'frame-setup' instruction.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 9 ++---
llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll | 38 +++++++++++++++++++
2 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c09c1792e98982..c183ffd384c22f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1947,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// pointer bump above.
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
- // Move past instructions generated to calculate VG
- if (requiresSaveVG(MF))
- while (isVGInstruction(MBBI))
- ++MBBI;
-
- if (CombineSPBump)
+ if (CombineSPBump &&
+ // Only fix-up frame-setup load/store instructions.
+ (!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
++MBBI;
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index fa8f92cb0a2c99..38666a05c20f8c 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
ret void
}
+; The algorithm that fixes up the offsets of the callee-save/restore
+; instructions must jump over the instructions that instantiate the current
+; 'VG' value. We must make sure that it doesn't consider any RDSVL in
+; user-code as if it is part of the frame-setup when doing so.
+define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
+; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
+; NO-SVE-CHECK: // %bb.0:
+; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov x9, x0
+; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg
+; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov x0, x9
+; NO-SVE-CHECK-NEXT: rdsvl x8, #1
+; NO-SVE-CHECK-NEXT: add x29, sp, #64
+; NO-SVE-CHECK-NEXT: lsr x8, x8, #3
+; NO-SVE-CHECK-NEXT: mov x1, x0
+; NO-SVE-CHECK-NEXT: smstart sm
+; NO-SVE-CHECK-NEXT: mov x0, x8
+; NO-SVE-CHECK-NEXT: bl bar
+; NO-SVE-CHECK-NEXT: smstop sm
+; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ret
+ %some_alloc = alloca i64, align 8
+ %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
+ call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
+ ret void
+}
+
+declare void @bar(i64, i64)
+
; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
; if the function contains a streaming-mode change.
More information about the llvm-branch-commits
mailing list