[llvm] 5fae000 - [AArch64][SME] Disable tail-call optimization when streaming mode change or lazy-save may be required.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 17 09:22:23 PDT 2022
Author: Sander de Smalen
Date: 2022-09-17T16:15:07Z
New Revision: 5fae000f36107a64f7f5b0ac5233803ab2bd82cd
URL: https://github.com/llvm/llvm-project/commit/5fae000f36107a64f7f5b0ac5233803ab2bd82cd
DIFF: https://github.com/llvm/llvm-project/commit/5fae000f36107a64f7f5b0ac5233803ab2bd82cd.diff
LOG: [AArch64][SME] Disable tail-call optimization when streaming mode change or lazy-save may be required.
When a streaming mode change is (or may be) required for a call, it will
need to restore the original mode after the call, which prevents the use of
tail-call optimization. The same holds true for a call that requires the lazy-save
mechanism to be set up before the call, and possibly restored after.
More details about the SME attributes and design can be found
in D131562.
Reviewed By: aemerson
Differential Revision: https://reviews.llvm.org/D131579
Added:
llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f55314dba63e2..f15697d3c5574 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6472,6 +6472,14 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
+ // SME Streaming functions are not eligible for TCO as they may require
+ // the streaming mode or ZA to be restored after returning from the call.
+ SMEAttrs CallerAttrs(MF.getFunction());
+ auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal);
+ if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
+ CallerAttrs.requiresLazySave(CalleeAttrs))
+ return false;
+
// Functions using the C or Fast calling convention that have an SVE signature
// preserve more registers and should assume the SVE_VectorCall CC.
// The check for matching callee-saved regs will determine whether it is
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
new file mode 100644
index 0000000000000..d276b177c2c05
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+declare void @private_za_callee()
+
+; Ensure that we don't use tail call optimization when a lazy-save is required.
+;
+; FIXME: The code below if obviously not yet correct, because it should set up
+; a lazy-save buffer before doing the call, and (conditionally) restore it after
+; the call. But this functionality will follow in a future patch.
+define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: bl private_za_callee
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ tail call void @private_za_callee()
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index d795f98f18dea..ecf82f8c438be 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -399,4 +399,35 @@ exit:
ret void
}
+define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: bl __arm_sme_state
+; CHECK-NEXT: and x19, x0, #0x1
+; CHECK-NEXT: tbz x19, #0, .LBB9_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: .LBB9_2:
+; CHECK-NEXT: bl normal_callee
+; CHECK-NEXT: tbz x19, #0, .LBB9_4
+; CHECK-NEXT: // %bb.3:
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .LBB9_4:
+; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+
+ tail call void @normal_callee();
+ ret void;
+}
+
+
attributes #0 = { nounwind "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
index 5725caeb706f8..2c44645abb7cc 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
@@ -337,4 +337,27 @@ entry:
declare double @llvm.cos.f64(double)
+; Ensure that tail call optimization is disabled when the streaming mode
+; doesn't match.
+define void @disable_tailcallopt() nounwind {
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: bl streaming_callee
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ tail call void @streaming_callee()
+ ret void;
+}
+
attributes #0 = { nounwind "target-features"="+sve" }
More information about the llvm-commits
mailing list