[llvm-branch-commits] [llvm] release/20.x: [AArch64] Ensure the LR is preserved if we must call __arm_get_current_vg (#145760) (PR #147171)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Jul 5 18:52:55 PDT 2025


https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/147171

Backport af7166a

Requested by: @MacDue

>From 254635a3d084f33bd5d26051eb46c99146703db0 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 26 Jun 2025 13:26:33 +0100
Subject: [PATCH] [AArch64] Ensure the LR is preserved if we must call
 __arm_get_current_vg (#145760)

Fixes #145635

(cherry picked from commit af7166a3f126ce4e4d2a05eccc1358bd0427cf0f)
---
 .../Target/AArch64/AArch64FrameLowering.cpp   | 10 +++-
 .../AArch64/sme-must-save-lr-for-vg.ll        | 49 +++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d3abd79b85a75..74b80438a28b2 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3792,6 +3792,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       CSStackSize += SpillSize;
   }
 
+  // Save number of saved regs, so we can easily update CSStackSize later to
+  // account for any additional 64-bit GPR saves. Note: After this point
+  // only 64-bit GPRs can be added to SavedRegs.
+  unsigned NumSavedRegs = SavedRegs.count();
+
   // Increase the callee-saved stack size if the function has streaming mode
   // changes, as we will need to spill the value of the VG register.
   // For locally streaming functions, we spill both the streaming and
@@ -3811,8 +3816,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (AFI->hasStackHazardSlotIndex())
     CSStackSize += getStackHazardSize(MF);
 
-  // Save number of saved regs, so we can easily update CSStackSize later.
-  unsigned NumSavedRegs = SavedRegs.count();
+  // If we must call __arm_get_current_vg in the prologue preserve the LR.
+  if (requiresSaveVG(MF) && !Subtarget.hasSVE())
+    SavedRegs.set(AArch64::LR);
 
   // The frame record needs to be created by saving the appropriate registers
   uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
new file mode 100644
index 0000000000000..69f603458670c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s
+
+; Example of locally streaming function that (at -O0) must preserve the LR (X30)
+; before calling __arm_get_current_vg.
+define void @foo() "aarch64_pstate_sm_body" {
+; CHECK-LABEL: foo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    rdsvl x9, #1
+; CHECK-NEXT:    lsr x9, x9, #3
+; CHECK-NEXT:    str x9, [sp, #72] // 8-byte Folded Spill
+; CHECK-NEXT:    bl __arm_get_current_vg
+; CHECK-NEXT:    str x0, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset vg, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+  ret void
+}



More information about the llvm-branch-commits mailing list