[llvm-branch-commits] [llvm] Cherry pick f314e12 into release/19.x (PR #117695)

Tue Nov 26 02:24:01 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

<details>
<summary>Changes</summary>

f314e12 uses `requiresSaveVG` which was introduced in 334a366ba792, which is also missing from the release/19.x branch.
I figured it made sense to cherry-pick that one as well.

---
Full diff: https://github.com/llvm/llvm-project/pull/117695.diff


4 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+19-12) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+11-8) 
- (added) llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll (+161) 
- (modified) llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll (+38) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87e057a468afd6..9d16c92b3fea5c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
          !MF.getSubtarget<AArch64Subtarget>().hasSVE();
 }
 
+static bool requiresSaveVG(MachineFunction &MF) {
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  // For Darwin platforms we don't save VG for non-SVE functions, even if SME
+  // is enabled with streaming mode changes.
+  if (!AFI->hasStreamingModeChanges())
+    return false;
+  auto &ST = MF.getSubtarget<AArch64Subtarget>();
+  if (ST.isTargetDarwin())
+    return ST.hasSVE();
+  return true;
+}
+
 bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
   unsigned Opc = MBBI->getOpcode();
   if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
   // functions, we need to do this for both the streaming and non-streaming
   // vector length. Move past these instructions if necessary.
   MachineFunction &MF = *MBB.getParent();
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  if (AFI->hasStreamingModeChanges())
+  if (requiresSaveVG(MF))
     while (isVGInstruction(MBBI))
       ++MBBI;
 
@@ -1936,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // pointer bump above.
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
          !IsSVECalleeSave(MBBI)) {
-    // Move past instructions generated to calculate VG
-    if (AFI->hasStreamingModeChanges())
-      while (isVGInstruction(MBBI))
-        ++MBBI;
-
-    if (CombineSPBump)
+    if (CombineSPBump &&
+        // Only fix-up frame-setup load/store instructions.
+        (!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
                                         NeedsWinCFI, &HasWinCFI);
     ++MBBI;
@@ -3720,7 +3728,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   // non-streaming VG value.
   const Function &F = MF.getFunction();
   SMEAttrs Attrs(F);
-  if (AFI->hasStreamingModeChanges()) {
+  if (requiresSaveVG(MF)) {
     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
       CSStackSize += 16;
     else
@@ -3873,7 +3881,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
   }
 
   // Insert VG into the list of CSRs, immediately before LR if saved.
-  if (AFI->hasStreamingModeChanges()) {
+  if (requiresSaveVG(MF)) {
     std::vector<CalleeSavedInfo> VGSaves;
     SMEAttrs Attrs(MF.getFunction());
 
@@ -4602,10 +4610,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
 
 void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
     MachineFunction &MF, RegScavenger *RS = nullptr) const {
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   for (auto &BB : MF)
     for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
-      if (AFI->hasStreamingModeChanges())
+      if (requiresSaveVG(MF))
         II = emitVGSaveRestore(II, this);
       if (StackTaggingMergeSetTag)
         II = tryMergeAdjacentSTG(II, this, RS);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62078822c89b18..ef2789e96213b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   SDValue InGlue;
   if (RequiresSMChange) {
-
-    Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
-                        DAG.getVTList(MVT::Other, MVT::Glue), Chain);
-    InGlue = Chain.getValue(1);
+    if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+      Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
+                          DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+      InGlue = Chain.getValue(1);
+    }
 
     SDValue NewChain = changeStreamingMode(
         DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Result = changeStreamingMode(
         DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
         getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
-    InGlue = Result.getValue(1);
 
-    Result =
-        DAG.getNode(AArch64ISD::VG_RESTORE, DL,
-                    DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+    if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+      InGlue = Result.getValue(1);
+      Result =
+          DAG.getNode(AArch64ISD::VG_RESTORE, DL,
+                      DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+    }
   }
 
   if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
new file mode 100644
index 00000000000000..36a300fea25e5a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Check we don't crash on Darwin and that we don't try to save VG
+; when only SME (and not SVE) is enabled.
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -24
+; CHECK-NEXT:    .cfi_offset b9, -32
+; CHECK-NEXT:    .cfi_offset b10, -40
+; CHECK-NEXT:    .cfi_offset b11, -48
+; CHECK-NEXT:    .cfi_offset b12, -56
+; CHECK-NEXT:    .cfi_offset b13, -64
+; CHECK-NEXT:    .cfi_offset b14, -72
+; CHECK-NEXT:    .cfi_offset b15, -80
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    bl __ZL9sme_crashv
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    mov w0, #0 ; =0x0
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+entry:
+  tail call fastcc void @_ZL9sme_crashv() #4
+  ret i32 0
+}
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
+; CHECK-LABEL: _ZL9sme_crashv:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #80
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w27, -24
+; CHECK-NEXT:    .cfi_offset w28, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    sub x9, sp, #160
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffff00
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x8, ___stack_chk_guard at GOTPAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    ldr x8, [x8, ___stack_chk_guard at GOTPAGEOFF]
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-NEXT:    str x8, [sp, #152]
+; CHECK-NEXT:    mov z0.b, #0 ; =0x0
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [sp, #152]
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    adrp x9, ___stack_chk_guard at GOTPAGE
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    ldr x9, [x9, ___stack_chk_guard at GOTPAGEOFF]
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    ldr x9, [x9]
+; CHECK-NEXT:    cmp x9, x8
+; CHECK-NEXT:    b.ne LBB1_2
+; CHECK-NEXT:  ; %bb.1: ; %entry
+; CHECK-NEXT:    sub sp, x29, #80
+; CHECK-NEXT:    .cfi_def_cfa wsp, 96
+; CHECK-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore w27
+; CHECK-NEXT:    .cfi_restore w28
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB1_2: ; %entry
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    bl ___stack_chk_fail
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
+; CHECK-NEXT:    .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
+entry:
+  %uu = alloca [16 x float], align 256
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
+  call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
+attributes #5 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index fa8f92cb0a2c99..38666a05c20f8c 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
   ret void
 }
 
+; The algorithm that fixes up the offsets of the callee-save/restore
+; instructions must jump over the instructions that instantiate the current
+; 'VG' value. We must make sure that it doesn't consider any RDSVL in
+; user-code as if it is part of the frame-setup when doing so.
+define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
+; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
+; NO-SVE-CHECK:     // %bb.0:
+; NO-SVE-CHECK-NEXT: stp     d15, d14, [sp, #-96]!           // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     d13, d12, [sp, #16]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov     x9, x0
+; NO-SVE-CHECK-NEXT: stp     d11, d10, [sp, #32]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     d9, d8, [sp, #48]               // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     x29, x30, [sp, #64]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: bl      __arm_get_current_vg
+; NO-SVE-CHECK-NEXT: str     x0, [sp, #80]                   // 8-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov     x0, x9
+; NO-SVE-CHECK-NEXT: rdsvl   x8, #1
+; NO-SVE-CHECK-NEXT: add     x29, sp, #64
+; NO-SVE-CHECK-NEXT: lsr     x8, x8, #3
+; NO-SVE-CHECK-NEXT: mov     x1, x0
+; NO-SVE-CHECK-NEXT: smstart sm
+; NO-SVE-CHECK-NEXT: mov     x0, x8
+; NO-SVE-CHECK-NEXT: bl      bar
+; NO-SVE-CHECK-NEXT: smstop  sm
+; NO-SVE-CHECK-NEXT: ldp     x29, x30, [sp, #64]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d9, d8, [sp, #48]               // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d11, d10, [sp, #32]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d13, d12, [sp, #16]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d15, d14, [sp], #96             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ret
+  %some_alloc = alloca i64, align 8
+  %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
+  call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
+  ret void
+}
+
+declare void @bar(i64, i64)
+
 ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
 ; if the function contains a streaming-mode change.
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/117695