[llvm-branch-commits] [llvm] Cherry pick f314e12 into release/19.x (PR #117695)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Nov 26 02:24:01 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
f314e12 uses `requiresSaveVG` which was introduced in 334a366ba792, which is also missing from the release/19.x branch.
I figured it made sense to cherry-pick that one as well.
---
Full diff: https://github.com/llvm/llvm-project/pull/117695.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+19-12)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+11-8)
- (added) llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll (+161)
- (modified) llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll (+38)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87e057a468afd6..9d16c92b3fea5c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
}
+static bool requiresSaveVG(MachineFunction &MF) {
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // For Darwin platforms we don't save VG for non-SVE functions, even if SME
+ // is enabled with streaming mode changes.
+ if (!AFI->hasStreamingModeChanges())
+ return false;
+ auto &ST = MF.getSubtarget<AArch64Subtarget>();
+ if (ST.isTargetDarwin())
+ return ST.hasSVE();
+ return true;
+}
+
bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
unsigned Opc = MBBI->getOpcode();
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// functions, we need to do this for both the streaming and non-streaming
// vector length. Move past these instructions if necessary.
MachineFunction &MF = *MBB.getParent();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
while (isVGInstruction(MBBI))
++MBBI;
@@ -1936,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// pointer bump above.
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
- // Move past instructions generated to calculate VG
- if (AFI->hasStreamingModeChanges())
- while (isVGInstruction(MBBI))
- ++MBBI;
-
- if (CombineSPBump)
+ if (CombineSPBump &&
+ // Only fix-up frame-setup load/store instructions.
+ (!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
++MBBI;
@@ -3720,7 +3728,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// non-streaming VG value.
const Function &F = MF.getFunction();
SMEAttrs Attrs(F);
- if (AFI->hasStreamingModeChanges()) {
+ if (requiresSaveVG(MF)) {
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
CSStackSize += 16;
else
@@ -3873,7 +3881,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Insert VG into the list of CSRs, immediately before LR if saved.
- if (AFI->hasStreamingModeChanges()) {
+ if (requiresSaveVG(MF)) {
std::vector<CalleeSavedInfo> VGSaves;
SMEAttrs Attrs(MF.getFunction());
@@ -4602,10 +4610,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS = nullptr) const {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
for (auto &BB : MF)
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
II = emitVGSaveRestore(II, this);
if (StackTaggingMergeSetTag)
II = tryMergeAdjacentSTG(II, this, RS);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62078822c89b18..ef2789e96213b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue InGlue;
if (RequiresSMChange) {
-
- Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
- DAG.getVTList(MVT::Other, MVT::Glue), Chain);
- InGlue = Chain.getValue(1);
+ if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+ Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+ InGlue = Chain.getValue(1);
+ }
SDValue NewChain = changeStreamingMode(
DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Result = changeStreamingMode(
DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
- InGlue = Result.getValue(1);
- Result =
- DAG.getNode(AArch64ISD::VG_RESTORE, DL,
- DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+ if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+ InGlue = Result.getValue(1);
+ Result =
+ DAG.getNode(AArch64ISD::VG_RESTORE, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+ }
}
if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
new file mode 100644
index 00000000000000..36a300fea25e5a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Check we don't crash on Darwin and that we don't try to save VG
+; when only SME (and not SVE) is enabled.
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: bl __ZL9sme_crashv
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: mov w0, #0 ; =0x0
+; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+entry:
+ tail call fastcc void @_ZL9sme_crashv() #4
+ ret i32 0
+}
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
+; CHECK-LABEL: _ZL9sme_crashv:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #80
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w27, -24
+; CHECK-NEXT: .cfi_offset w28, -32
+; CHECK-NEXT: .cfi_offset b8, -40
+; CHECK-NEXT: .cfi_offset b9, -48
+; CHECK-NEXT: .cfi_offset b10, -56
+; CHECK-NEXT: .cfi_offset b11, -64
+; CHECK-NEXT: .cfi_offset b12, -72
+; CHECK-NEXT: .cfi_offset b13, -80
+; CHECK-NEXT: .cfi_offset b14, -88
+; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: sub x9, sp, #160
+; CHECK-NEXT: and sp, x9, #0xffffffffffffff00
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, ___stack_chk_guard at GOTPAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard at GOTPAGEOFF]
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: ldr x8, [x8]
+; CHECK-NEXT: str x8, [sp, #152]
+; CHECK-NEXT: mov z0.b, #0 ; =0x0
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [sp, #152]
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: adrp x9, ___stack_chk_guard at GOTPAGE
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard at GOTPAGEOFF]
+; CHECK-NEXT: Lloh5:
+; CHECK-NEXT: ldr x9, [x9]
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: b.ne LBB1_2
+; CHECK-NEXT: ; %bb.1: ; %entry
+; CHECK-NEXT: sub sp, x29, #80
+; CHECK-NEXT: .cfi_def_cfa wsp, 96
+; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: .cfi_restore w27
+; CHECK-NEXT: .cfi_restore w28
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB1_2: ; %entry
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: bl ___stack_chk_fail
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
+; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
+entry:
+ %uu = alloca [16 x float], align 256
+ call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
+ call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
+ call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
+ ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
+attributes #5 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index fa8f92cb0a2c99..38666a05c20f8c 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
ret void
}
+; The algorithm that fixes up the offsets of the callee-save/restore
+; instructions must jump over the instructions that instantiate the current
+; 'VG' value. We must make sure that it doesn't consider any RDSVL in
+; user-code as if it is part of the frame-setup when doing so.
+define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
+; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
+; NO-SVE-CHECK: // %bb.0:
+; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov x9, x0
+; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg
+; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov x0, x9
+; NO-SVE-CHECK-NEXT: rdsvl x8, #1
+; NO-SVE-CHECK-NEXT: add x29, sp, #64
+; NO-SVE-CHECK-NEXT: lsr x8, x8, #3
+; NO-SVE-CHECK-NEXT: mov x1, x0
+; NO-SVE-CHECK-NEXT: smstart sm
+; NO-SVE-CHECK-NEXT: mov x0, x8
+; NO-SVE-CHECK-NEXT: bl bar
+; NO-SVE-CHECK-NEXT: smstop sm
+; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ret
+ %some_alloc = alloca i64, align 8
+ %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
+ call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
+ ret void
+}
+
+declare void @bar(i64, i64)
+
; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
; if the function contains a streaming-mode change.
``````````
</details>
https://github.com/llvm/llvm-project/pull/117695
More information about the llvm-branch-commits
mailing list