[llvm-branch-commits] [llvm] release/20.x: [AArch64] Ensure the LR is preserved if we must call __arm_get_current_vg (#145760) (PR #147171)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jul 5 18:52:55 PDT 2025
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/147171
Backport af7166a
Requested by: @MacDue
>From 254635a3d084f33bd5d26051eb46c99146703db0 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 26 Jun 2025 13:26:33 +0100
Subject: [PATCH] [AArch64] Ensure the LR is preserved if we must call
__arm_get_current_vg (#145760)
Fixes #145635
(cherry picked from commit af7166a3f126ce4e4d2a05eccc1358bd0427cf0f)
---
.../Target/AArch64/AArch64FrameLowering.cpp | 10 +++-
.../AArch64/sme-must-save-lr-for-vg.ll | 49 +++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d3abd79b85a75..74b80438a28b2 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3792,6 +3792,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
CSStackSize += SpillSize;
}
+ // Save number of saved regs, so we can easily update CSStackSize later to
+ // account for any additional 64-bit GPR saves. Note: After this point
+ // only 64-bit GPRs can be added to SavedRegs.
+ unsigned NumSavedRegs = SavedRegs.count();
+
// Increase the callee-saved stack size if the function has streaming mode
// changes, as we will need to spill the value of the VG register.
// For locally streaming functions, we spill both the streaming and
@@ -3811,8 +3816,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (AFI->hasStackHazardSlotIndex())
CSStackSize += getStackHazardSize(MF);
- // Save number of saved regs, so we can easily update CSStackSize later.
- unsigned NumSavedRegs = SavedRegs.count();
+ // If we must call __arm_get_current_vg in the prologue preserve the LR.
+ if (requiresSaveVG(MF) && !Subtarget.hasSVE())
+ SavedRegs.set(AArch64::LR);
// The frame record needs to be created by saving the appropriate registers
uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
new file mode 100644
index 0000000000000..69f603458670c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s
+
+; Example of locally streaming function that (at -O0) must preserve the LR (X30)
+; before calling __arm_get_current_vg.
+define void @foo() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: foo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: rdsvl x9, #1
+; CHECK-NEXT: lsr x9, x9, #3
+; CHECK-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
+; CHECK-NEXT: bl __arm_get_current_vg
+; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset vg, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: .cfi_offset b8, -40
+; CHECK-NEXT: .cfi_offset b9, -48
+; CHECK-NEXT: .cfi_offset b10, -56
+; CHECK-NEXT: .cfi_offset b11, -64
+; CHECK-NEXT: .cfi_offset b12, -72
+; CHECK-NEXT: .cfi_offset b13, -80
+; CHECK-NEXT: .cfi_offset b14, -88
+; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+ ret void
+}
More information about the llvm-branch-commits
mailing list