[llvm] 50a2b31 - [ARM] Be more precise about conditions for indirect tail-calls (#102451)

Fri Aug 9 00:50:24 PDT 2024

Author: Oliver Stannard
Date: 2024-08-09T08:50:21+01:00
New Revision: 50a2b31800a554b35367a5f3bcebb640703a48e1

URL: https://github.com/llvm/llvm-project/commit/50a2b31800a554b35367a5f3bcebb640703a48e1
DIFF: https://github.com/llvm/llvm-project/commit/50a2b31800a554b35367a5f3bcebb640703a48e1.diff

LOG: [ARM] Be more precise about conditions for indirect tail-calls (#102451)

This code was trying to predict the conditions in which an indirect
tail call will have a free register to hold the target address, and
falling back to a non-tail call if all non-callee-saved registers are
used for arguments or return address authentication.

However, it was only taking the number of arguments into account, not
which registers they are allocated to, so floating-point arguments could
cause this to give the wrong result, causing either a later error due to
the lack of a free register, or a missed optimisation of not doing the
tail call.

The assignments of arguments to registers is available at this point in
the code, so we can calculate exactly which registers will be available
for the tail-call.

Added: 
    llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 75d16a42d0205a..476b7b349294ab 100644

--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -3024,18 +3024,27 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
 
   assert(Subtarget->supportsTailCall());
 
-  // Indirect tail calls cannot be optimized for Thumb1 if the args
-  // to the call take up r0-r3. The reason is that there are no legal registers
-  // left to hold the pointer to the function to be called.
-  // Similarly, if the function uses return address sign and authentication,
-  // r12 is needed to hold the PAC and is not available to hold the callee
-  // address.
-  if (Outs.size() >= 4 &&
-      (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
-    if (Subtarget->isThumb1Only())
-      return false;
-    // Conservatively assume the function spills LR.
-    if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
+  // Indirect tail-calls require a register to hold the target address. That
+  // register must be:
+  // * Allocatable (i.e. r0-r7 if the target is Thumb1).
+  // * Not callee-saved, so must be one of r0-r3 or r12.
+  // * Not used to hold an argument to the tail-called function, which might be
+  //   in r0-r3.
+  // * Not used to hold the return address authentication code, which is in r12
+  //   if enabled.
+  // Sometimes, no register matches all of these conditions, so we can't do a
+  // tail-call.
+  if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
+    SmallSet<MCPhysReg, 5> AddressRegisters;
+    for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+      AddressRegisters.insert(R);
+    if (!(Subtarget->isThumb1Only() ||
+          MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))
+      AddressRegisters.insert(ARM::R12);
+    for (const CCValAssign &AL : ArgLocs)
+      if (AL.isRegLoc())
+        AddressRegisters.erase(AL.getLocReg());
+    if (AddressRegisters.empty())
       return false;
   }
 

diff  --git a/llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll b/llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll
new file mode 100644
index 00000000000000..c6ace3eb55b287
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll
@@ -0,0 +1,111 @@
+; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+vfp4 | FileCheck %s
+
+;; No outgoing arguments, plenty of free registers to hold the target address.
+define void @test0(ptr %fptr) {
+; CHECK-LABEL: test0:
+; CHECK: bx {{r0|r1|r2|r3|r12}}
+entry:
+  tail call void %fptr()
+  ret void
+}
+
+;; Four integer outgoing arguments, which use up r0-r3.
+define void @test1(ptr %fptr) {
+; CHECK-LABEL: test1:
+; CHECK: bx r12
+entry:
+  tail call void %fptr(i32 0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+;; Four integer outgoing arguments, which use up r0-r3, and sign-return-address
+;; uses r12, so we can never tail-call this.
+define void @test2(ptr %fptr) "sign-return-address"="all" {
+; CHECK-LABEL: test2:
+; CHECK: blx
+  entry:
+  tail call void %fptr(i32 0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+;; An i32 and an i64 argument, which uses r0, r2 and r3 for arguments, leaving
+;; r1 free for the address.
+define void @test3(ptr %fptr) {
+; CHECK-LABEL: test3:
+; CHECK: bx {{r1|r12}}
+entry:
+  tail call void %fptr(i32 0, i64 0)
+  ret void
+}
+
+;; Four float arguments, using the soft-float calling convention, which uses
+;; r0-r3.
+define void @test4(ptr %fptr) {
+; CHECK-LABEL: test4:
+; CHECK: bx r12
+entry:
+  tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
+  ret void
+}
+
+;; Four float arguments, using the soft-float calling convention, which uses
+;; r0-r3, and sign-return-address uses r12. Currently fails with "ran out of
+;; registers during register allocation".
+define void @test5(ptr %fptr) "sign-return-address"="all" {
+; CHECK-LABEL: test5:
+; CHECK: blx
+entry:
+  tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
+  ret void
+}
+
+;; Four float arguments, using the hard-float calling convention, which uses
+;; s0-s3, leaving the all of the integer registers free for the address.
+define void @test6(ptr %fptr) {
+; CHECK-LABEL: test6:
+; CHECK: bx {{r0|r1|r2|r3|r12}}
+entry:
+  tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
+  ret void
+}
+
+;; Four float arguments, using the hard-float calling convention, which uses
+;; s0-s3, leaving r0-r3 free for the address, with r12 used for
+;; sign-return-address.
+define void @test7(ptr %fptr) "sign-return-address"="all" {
+; CHECK-LABEL: test7:
+; CHECK: bx {{r0|r1|r2|r3}}
+entry:
+  tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
+  ret void
+}
+
+;; Two double arguments, using the soft-float calling convention, which uses
+;; r0-r3.
+define void @test8(ptr %fptr) {
+; CHECK-LABEL: test8:
+; CHECK: bx r12
+entry:
+  tail call arm_aapcscc void %fptr(double 0.0, double 0.0)
+  ret void
+}
+
+;; Two double arguments, using the soft-float calling convention, which uses
+;; r0-r3, and sign-return-address uses r12, so we can't tail-call this.
+define void @test9(ptr %fptr) "sign-return-address"="all" {
+; CHECK-LABEL: test9:
+; CHECK: blx
+entry:
+  tail call arm_aapcscc void %fptr(double 0.0, double 0.0)
+  ret void
+}
+
+;; Four integer arguments (one on the stack), but dut to alignment r1 is left
+;; empty, so can be used for the tail-call.
+define void @test10(ptr %fptr, i64 %b, i32 %c) "sign-return-address"="all" {
+; CHECK-LABEL: test10:
+; CHECK: bx r1
+entry:
+  tail call void %fptr(i32 0, i64 %b, i32 %c)
+  ret void
+}