[llvm] [PowerPC] Initial support of tail call optimization on AIXDigger/tail call (PR #161690)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 2 08:40:40 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: zhijian lin (diggerlin)

<details>
<summary>Changes</summary>

AIX is lacking support of TCO. This PR adds TCO for fastcc calling convention on AIX.

The patch is based on the https://github.com/llvm/llvm-project/pull/70016, thanks for Kai Luo's work



---
Full diff: https://github.com/llvm/llvm-project/pull/161690.diff


6 Files Affected:

- (modified) llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (+6) 
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+38-9) 
- (modified) llvm/lib/Target/PowerPC/PPCInstr64Bit.td (+2) 
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+4) 
- (added) llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll (+111) 
- (modified) llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 910bc9d281259..efa75fd5681b3 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1935,6 +1935,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
     else if (JumpTarget.isSymbol())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
         addExternalSymbol(JumpTarget.getSymbolName());
+    else if (JumpTarget.isMCSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB))
+          .addSym(JumpTarget.getMCSymbol());
     else
       llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri) {
@@ -1954,6 +1957,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
     else if (JumpTarget.isSymbol())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
         addExternalSymbol(JumpTarget.getSymbolName());
+    else if (JumpTarget.isMCSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8))
+          .addSym(JumpTarget.getMCSymbol());
     else
       llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri8) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f69218056fc44..a35cdf4e2c282 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5190,7 +5190,13 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization(
     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
     CallingConv::ID CallerCC, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins) const {
-  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
+
+  // Enable SCO on AIX.
+  if (!TailCallOpt && !Subtarget.isAIXABI())
+    return false;
+
+  if (DisableSCO)
     return false;
 
   // Variable argument functions are not supported.
@@ -5869,6 +5875,7 @@ SDValue PPCTargetLowering::FinishCall(
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
             Callee.getOpcode() == ISD::TargetGlobalAddress ||
             isa<ConstantSDNode>(Callee) ||
+            (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) ||
             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
            "Expecting a global address, external symbol, absolute value, "
            "register or an indirect tail call when PC Relative calls are "
@@ -7244,8 +7251,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
           CallConv == CallingConv::Fast) &&
          "Unexpected calling convention!");
 
-  if (getTargetMachine().Options.GuaranteedTailCallOpt)
-    report_fatal_error("Tail call support is unimplemented on AIX.");
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv != CallingConv::Fast)
+    report_fatal_error("Tail call support for non-fastcc calling convention is "
+                       "unimplemented on AIX.");
 
   if (useSoftFloat())
     report_fatal_error("Soft float support is unimplemented on AIX.");
@@ -7254,6 +7263,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
 
   const bool IsPPC64 = Subtarget.isPPC64();
   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+  // Potential tail calls could cause overwriting of argument stack slots.
+  const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                             (CallConv == CallingConv::Fast));
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -7319,10 +7331,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       // Objects are right-justified because AIX is big-endian.
       if (LocSize > ValSize)
         CurArgOffset += LocSize - ValSize;
-      // Potential tail calls could cause overwriting of argument stack slots.
-      const bool IsImmutable =
-          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
-            (CallConv == CallingConv::Fast));
       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       SDValue ArgValue =
@@ -7616,6 +7624,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   const bool IsPPC64 = Subtarget.isPPC64();
+  bool IsSibCall =
+      CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt;
   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
@@ -7631,13 +7641,25 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   const unsigned NumBytes = std::max<unsigned>(
       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
 
+  int SPDiff =
+      IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
+
+  // To protect arguments on the stack from being clobbered in a tail call,
+  // force all the loads to happen before doing any other lowering.
+  if (CFlags.IsTailCall)
+    Chain = DAG.getStackArgumentTokenFactor(Chain);
+
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass.
-  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  if (!IsSibCall)
+    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
   SDValue CallSeqStart = Chain;
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   // Set up a copy of the stack pointer for loading and storing any
   // arguments that may not fit in the registers available for argument
@@ -7814,6 +7836,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     }
 
     if (VA.isMemLoc()) {
+      if (!CFlags.IsTailCall) {
       SDValue PtrOff =
           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
@@ -7821,6 +7844,9 @@ SDValue PPCTargetLowering::LowerCall_AIX(
           DAG.getStore(Chain, dl, Arg, PtrOff,
                        MachinePointerInfo::getStack(MF, VA.getLocMemOffset()),
                        Subtarget.getFrameLowering()->getStackAlign()));
+      } else
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff,
+                                 VA.getLocMemOffset(), TailCallArguments);
 
       continue;
     }
@@ -7903,7 +7929,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     InGlue = Chain.getValue(1);
   }
 
-  const int SPDiff = 0;
+  if (CFlags.IsTailCall && !IsSibCall)
+    PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+                    TailCallArguments);
+
   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 269d30318bca8..aa913ae82484e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -474,6 +474,8 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
+def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm),
+          (TCRETURNdi8 mcsym:$dst, imm:$imm)>;
 
 // 64-bit CR instructions
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1c45050cdf9ca..f2ce9db9ab1c9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3155,6 +3155,10 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
           (TCRETURNri CTRRC:$dst, imm:$imm)>;
 
 def : Pat<(int_ppc_fence), (FENCE)>;
+
+def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm),
+          (TCRETURNdi mcsym:$dst, imm:$imm)>;
+
 def : Pat<(int_ppc_readflm), (MFFS)>;
 def : Pat<(int_ppc_mffsl), (MFFSL)>;
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
new file mode 100644
index 0000000000000..a23fd2a8ae2c0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64 %s
+
+define hidden fastcc i32 @k(i32 %a, i32 %b) {
+; AIX-32-LABEL: k:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    add 3, 3, 4
+; AIX-32-NEXT:    blr
+;
+; AIX-64-LABEL: k:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    add 3, 3, 4
+; AIX-64-NEXT:    blr
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define hidden fastcc i32 @ff(i32 %a) {
+; AIX-32-LABEL: ff:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    blr
+;
+; AIX-64-LABEL: ff:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    blr
+entry:
+  ret i32 %a
+}
+
+define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-32-LABEL: f:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    b .ff
+; AIX-32-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX-64-LABEL: f:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    b .ff
+; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  ret i32 %r
+}
+
+define fastcc i32 @kk(i32 %a) {
+; AIX-32-LABEL: kk:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    li 4, 1024
+; AIX-32-NEXT:    b .k
+; AIX-32-NEXT:    #TC_RETURNd .k 0
+;
+; AIX-64-LABEL: kk:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    li 4, 1024
+; AIX-64-NEXT:    b .k
+; AIX-64-NEXT:    #TC_RETURNd8 .k 0
+entry:
+  %r = tail call fastcc i32 @k(i32 %a, i32 1024)
+  ret i32 %r
+}
+
+define fastcc i32 @g(i32 %a) {
+; AIX-32-LABEL: g:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    b .ff
+; AIX-32-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX-64-LABEL: g:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    b .ff
+; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  ret i32 %r
+}
+
+define fastcc i32 @gg(i32 %a) {
+; AIX-32-LABEL: gg:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    mflr 0
+; AIX-32-NEXT:    stwu 1, -64(1)
+; AIX-32-NEXT:    stw 0, 72(1)
+; AIX-32-NEXT:    bl .ff
+; AIX-32-NEXT:    addi 3, 3, 1
+; AIX-32-NEXT:    addi 1, 1, 64
+; AIX-32-NEXT:    lwz 0, 8(1)
+; AIX-32-NEXT:    mtlr 0
+; AIX-32-NEXT:    blr
+;
+; AIX-64-LABEL: gg:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    mflr 0
+; AIX-64-NEXT:    stdu 1, -112(1)
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    std 0, 128(1)
+; AIX-64-NEXT:    bl .ff
+; AIX-64-NEXT:    addi 3, 3, 1
+; AIX-64-NEXT:    addi 1, 1, 112
+; AIX-64-NEXT:    ld 0, 16(1)
+; AIX-64-NEXT:    mtlr 0
+; AIX-64-NEXT:    blr
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  %r.0 = add i32 %r, 1
+  ret i32 %r.0
+}
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
index 1a3aaaec037ab..d0a7444e64458 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
@@ -36,7 +36,7 @@ exit:
 ; CHECK-SCO-SR: stdu 1, -{{[0-9]+}}(1)
 ; CHECK-SCO-SR: bl __assert_fail
 
-; CHECK-AIX: LLVM ERROR: Tail call support is unimplemented on AIX.
+; CHECK-AIX: LLVM ERROR: Tail call support for non-fastcc calling convention is unimplemented on AIX.
 }
 
 define dso_local fastcc i8 @LVComputationKind(

``````````

</details>


https://github.com/llvm/llvm-project/pull/161690


More information about the llvm-commits mailing list