[llvm] 5fae000 - [AArch64][SME] Disable tail-call optimization when streaming mode change or lazy-save may be required.

Sat Sep 17 09:22:23 PDT 2022

Author: Sander de Smalen
Date: 2022-09-17T16:15:07Z
New Revision: 5fae000f36107a64f7f5b0ac5233803ab2bd82cd

URL: https://github.com/llvm/llvm-project/commit/5fae000f36107a64f7f5b0ac5233803ab2bd82cd
DIFF: https://github.com/llvm/llvm-project/commit/5fae000f36107a64f7f5b0ac5233803ab2bd82cd.diff

LOG: [AArch64][SME] Disable tail-call optimization when streaming mode change or lazy-save may be required.

When a streaming mode change is (or may be) required for a call, it will
need to restore the original mode after the call, which prevents the use of
tail-call optimization. The same holds true for a call that requires the lazy-save
mechanism to be set up before the call, and possibly restored after.

More details about the SME attributes and design can be found
in D131562.

Reviewed By: aemerson

Differential Revision: https://reviews.llvm.org/D131579

Added: 
    llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
    llvm/test/CodeGen/AArch64/sme-streaming-interface.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f55314dba63e2..f15697d3c5574 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6472,6 +6472,14 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
   const Function &CallerF = MF.getFunction();
   CallingConv::ID CallerCC = CallerF.getCallingConv();
 
+  // SME Streaming functions are not eligible for TCO as they may require
+  // the streaming mode or ZA to be restored after returning from the call.
+  SMEAttrs CallerAttrs(MF.getFunction());
+  auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal);
+  if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
+      CallerAttrs.requiresLazySave(CalleeAttrs))
+    return false;
+
   // Functions using the C or Fast calling convention that have an SVE signature
   // preserve more registers and should assume the SVE_VectorCall CC.
   // The check for matching callee-saved regs will determine whether it is

diff  --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
new file mode 100644
index 0000000000000..d276b177c2c05
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme  -verify-machineinstrs < %s | FileCheck %s
+
+declare void @private_za_callee()
+
+; Ensure that we don't use tail call optimization when a lazy-save is required.
+;
+; FIXME: The code below if obviously not yet correct, because it should set up
+; a lazy-save buffer before doing the call, and (conditionally) restore it after
+; the call. But this functionality will follow in a future patch.
+define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl private_za_callee
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  tail call void @private_za_callee()
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index d795f98f18dea..ecf82f8c438be 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -399,4 +399,35 @@ exit:
   ret void
 }
 
+define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and x19, x0, #0x1
+; CHECK-NEXT:    tbz x19, #0, .LBB9_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB9_2:
+; CHECK-NEXT:    bl normal_callee
+; CHECK-NEXT:    tbz x19, #0, .LBB9_4
+; CHECK-NEXT:  // %bb.3:
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB9_4:
+; CHECK-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+
+  tail call void @normal_callee();
+  ret void;
+}
+
+
 attributes #0 = { nounwind "target-features"="+sve" }

diff  --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
index 5725caeb706f8..2c44645abb7cc 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
@@ -337,4 +337,27 @@ entry:
 
 declare double @llvm.cos.f64(double)
 
+; Ensure that tail call optimization is disabled when the streaming mode
+; doesn't match.
+define void @disable_tailcallopt() nounwind {
+; CHECK-LABEL: disable_tailcallopt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    bl streaming_callee
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  tail call void @streaming_callee()
+  ret void;
+}
+
 attributes #0 = { nounwind "target-features"="+sve" }