[llvm] [PowerPC] Support tail call optimization on AIX (PR #70016)

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 01:56:56 PDT 2023


https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/70016

>From f7165771da7ec3e401cefb3ca1b869b1f27ff7f5 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 24 Oct 2023 08:37:25 +0000
Subject: [PATCH 1/4] Initial attempt

---
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  |  6 +++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 31 +++++++++++----
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  2 +
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  3 ++
 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 39 +++++++++++++++++++
 5 files changed, 74 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll

diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index d41861ddcc8c6eb..29d07e4ed3add2b 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1938,6 +1938,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
     else if (JumpTarget.isSymbol())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
         addExternalSymbol(JumpTarget.getSymbolName());
+    else if (JumpTarget.isMCSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB))
+          .addSym(JumpTarget.getMCSymbol());
     else
       llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri) {
@@ -1957,6 +1960,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
     else if (JumpTarget.isSymbol())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
         addExternalSymbol(JumpTarget.getSymbolName());
+    else if (JumpTarget.isMCSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8))
+          .addSym(JumpTarget.getMCSymbol());
     else
       llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri8) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5e0c2d62f5a9cb5..72a9e86458711de 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5704,6 +5704,7 @@ SDValue PPCTargetLowering::FinishCall(
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
             Callee.getOpcode() == ISD::TargetGlobalAddress ||
             isa<ConstantSDNode>(Callee) ||
+            (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) ||
             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
            "Expecting a global address, external symbol, absolute value, "
            "register or an indirect tail call when PC Relative calls are "
@@ -7065,7 +7066,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
           CallConv == CallingConv::Fast) &&
          "Unexpected calling convention!");
 
-  if (getTargetMachine().Options.GuaranteedTailCallOpt)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv != CallingConv::Fast)
     report_fatal_error("Tail call support is unimplemented on AIX.");
 
   if (useSoftFloat())
@@ -7389,6 +7391,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   const bool IsPPC64 = Subtarget.isPPC64();
+  bool IsSibCall =
+      CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt;
   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
@@ -7404,13 +7408,19 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   const unsigned NumBytes = std::max<unsigned>(
       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
 
+  int SPDiff =
+      IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
+
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass.
   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
   SDValue CallSeqStart = Chain;
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   // Set up a copy of the stack pointer for loading and storing any
   // arguments that may not fit in the registers available for argument
@@ -7587,11 +7597,15 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     }
 
     if (VA.isMemLoc()) {
-      SDValue PtrOff =
-          DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
-      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-      MemOpChains.push_back(
-          DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
+      if (!CFlags.IsTailCall) {
+        SDValue PtrOff =
+            DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
+        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+        MemOpChains.push_back(
+            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
+      } else
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, VA.getLocMemOffset(),
+                                 TailCallArguments);
 
       continue;
     }
@@ -7674,7 +7688,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     InGlue = Chain.getValue(1);
   }
 
-  const int SPDiff = 0;
+  if (CFlags.IsTailCall && !IsSibCall)
+    PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+                    TailCallArguments);
+
   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 0322bb37b1fdf8f..a99a347e13824f1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -474,6 +474,8 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
+def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm),
+          (TCRETURNdi8 mcsym:$dst, imm:$imm)>;
 
 // 64-bit CR instructions
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a97062e0c643fb2..002851a1dc91b92 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3187,6 +3187,9 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
           (TCRETURNri CTRRC:$dst, imm:$imm)>;
 
+def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm),
+          (TCRETURNdi mcsym:$dst, imm:$imm)>;
+
 def : Pat<(int_ppc_readflm), (MFFS)>;
 def : Pat<(int_ppc_mffsl), (MFFSL)>;
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
new file mode 100644
index 000000000000000..14b38b931dd0280
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
+
+define hidden fastcc i32 @ff(i32 %a) {
+; AIX-64-LABEL: ff:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    addi 1, 1, 112
+; AIX-64-NEXT:    blr
+;
+; AIX-32-LABEL: ff:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    addi 1, 1, 64
+; AIX-32-NEXT:    blr
+entry:
+  ret i32 %a
+}
+
+define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-64-LABEL: f:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    stdu 1, -112(1)
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    addi 1, 1, 112
+; AIX-64-NEXT:    b .ff
+; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+;
+; AIX-32-LABEL: f:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    stwu 1, -64(1)
+; AIX-32-NEXT:    lwz 4, 72(1)
+; AIX-32-NEXT:    stw 4, 80(1)
+; AIX-32-NEXT:    addi 1, 1, 72
+; AIX-32-NEXT:    b .ff
+; AIX-32-NEXT:    #TC_RETURNd .ff 8
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  ret i32 %r
+}

>From 385dc75abc20b91a5173e7d39c342af235a58197 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 24 Oct 2023 08:48:33 +0000
Subject: [PATCH 2/4] Compare w/o TCO

---
 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 67 ++++++++++++++-----
 1 file changed, 50 insertions(+), 17 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
index 14b38b931dd0280..1274b0d329c0676 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -1,38 +1,71 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
-; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32-TCO
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64-TCO
 
 define hidden fastcc i32 @ff(i32 %a) {
+; AIX-32-LABEL: ff:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    blr
+;
+; AIX-32-TCO-LABEL: ff:
+; AIX-32-TCO:       # %bb.0: # %entry
+; AIX-32-TCO-NEXT:    addi 1, 1, 64
+; AIX-32-TCO-NEXT:    blr
+;
 ; AIX-64-LABEL: ff:
 ; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    addi 1, 1, 112
 ; AIX-64-NEXT:    blr
 ;
-; AIX-32-LABEL: ff:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    addi 1, 1, 64
-; AIX-32-NEXT:    blr
+; AIX-64-TCO-LABEL: ff:
+; AIX-64-TCO:       # %bb.0: # %entry
+; AIX-64-TCO-NEXT:    addi 1, 1, 112
+; AIX-64-TCO-NEXT:    blr
 entry:
   ret i32 %a
 }
 
 define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-32-LABEL: f:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    mflr 0
+; AIX-32-NEXT:    stwu 1, -64(1)
+; AIX-32-NEXT:    stw 0, 72(1)
+; AIX-32-NEXT:    bl .ff
+; AIX-32-NEXT:    addi 1, 1, 64
+; AIX-32-NEXT:    lwz 0, 8(1)
+; AIX-32-NEXT:    mtlr 0
+; AIX-32-NEXT:    blr
+;
+; AIX-32-TCO-LABEL: f:
+; AIX-32-TCO:       # %bb.0: # %entry
+; AIX-32-TCO-NEXT:    stwu 1, -64(1)
+; AIX-32-TCO-NEXT:    lwz 4, 72(1)
+; AIX-32-TCO-NEXT:    stw 4, 80(1)
+; AIX-32-TCO-NEXT:    addi 1, 1, 72
+; AIX-32-TCO-NEXT:    b .ff
+; AIX-32-TCO-NEXT:    #TC_RETURNd .ff 8
+;
 ; AIX-64-LABEL: f:
 ; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    mflr 0
 ; AIX-64-NEXT:    stdu 1, -112(1)
 ; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    std 0, 128(1)
+; AIX-64-NEXT:    bl .ff
 ; AIX-64-NEXT:    addi 1, 1, 112
-; AIX-64-NEXT:    b .ff
-; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+; AIX-64-NEXT:    ld 0, 16(1)
+; AIX-64-NEXT:    mtlr 0
+; AIX-64-NEXT:    blr
 ;
-; AIX-32-LABEL: f:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    stwu 1, -64(1)
-; AIX-32-NEXT:    lwz 4, 72(1)
-; AIX-32-NEXT:    stw 4, 80(1)
-; AIX-32-NEXT:    addi 1, 1, 72
-; AIX-32-NEXT:    b .ff
-; AIX-32-NEXT:    #TC_RETURNd .ff 8
+; AIX-64-TCO-LABEL: f:
+; AIX-64-TCO:       # %bb.0: # %entry
+; AIX-64-TCO-NEXT:    stdu 1, -112(1)
+; AIX-64-TCO-NEXT:    clrldi 3, 3, 32
+; AIX-64-TCO-NEXT:    addi 1, 1, 112
+; AIX-64-TCO-NEXT:    b .ff
+; AIX-64-TCO-NEXT:    #TC_RETURNd8 .ff 0
 entry:
   %r = tail call fastcc i32 @ff(i32 %a)
   ret i32 %r

>From a770e8837889bc8fc72d5d7545aa7b1e5e2a2e8e Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 24 Oct 2023 08:53:04 +0000
Subject: [PATCH 3/4] Format

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 72a9e86458711de..07f4d10e9e3fead 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7604,8 +7604,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
         MemOpChains.push_back(
             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
       } else
-        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, VA.getLocMemOffset(),
-                                 TailCallArguments);
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff,
+                                 VA.getLocMemOffset(), TailCallArguments);
 
       continue;
     }

>From 3973220b8d11ab5a0c4d242a0edc5934c9f86eba Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 24 Oct 2023 08:56:43 +0000
Subject: [PATCH 4/4] Fix RUN lines

---
 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
index 1274b0d329c0676..f8e61c7cb0ec69f 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32
-; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32-TCO
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64
-; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64-TCO
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32 %s
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32-TCO %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64 %s
+; RUN: llc -tailcallopt -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64-TCO %s
 
 define hidden fastcc i32 @ff(i32 %a) {
 ; AIX-32-LABEL: ff:



More information about the llvm-commits mailing list