[llvm] [PowerPC] Initial support of tail call optimization on AIX tail call (PR #161690)

Fri Oct 31 10:32:38 PDT 2025

https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/161690

>From d776d4e7d3941f99ae2b7c70c3829a3d3cf96140 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 2 Oct 2025 13:45:56 +0000
Subject: [PATCH 01/11] implement tail call

---
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  |  6 +++
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 47 +++++++++++++++----
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |  2 +
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |  4 ++
 .../PowerPC/ppc64-sibcall-shrinkwrap.ll       |  2 +-
 5 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 910bc9d281259..efa75fd5681b3 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1935,6 +1935,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
     else if (JumpTarget.isSymbol())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
         addExternalSymbol(JumpTarget.getSymbolName());
+    else if (JumpTarget.isMCSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB))
+          .addSym(JumpTarget.getMCSymbol());
     else
       llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri) {
@@ -1954,6 +1957,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
     else if (JumpTarget.isSymbol())
       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
         addExternalSymbol(JumpTarget.getSymbolName());
+    else if (JumpTarget.isMCSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8))
+          .addSym(JumpTarget.getMCSymbol());
     else
       llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri8) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f69218056fc44..a35cdf4e2c282 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5190,7 +5190,13 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization(
     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
     CallingConv::ID CallerCC, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins) const {
-  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
+
+  // Enable SCO on AIX.
+  if (!TailCallOpt && !Subtarget.isAIXABI())
+    return false;
+
+  if (DisableSCO)
     return false;
 
   // Variable argument functions are not supported.
@@ -5869,6 +5875,7 @@ SDValue PPCTargetLowering::FinishCall(
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
             Callee.getOpcode() == ISD::TargetGlobalAddress ||
             isa<ConstantSDNode>(Callee) ||
+            (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) ||
             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
            "Expecting a global address, external symbol, absolute value, "
            "register or an indirect tail call when PC Relative calls are "
@@ -7244,8 +7251,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
           CallConv == CallingConv::Fast) &&
          "Unexpected calling convention!");
 
-  if (getTargetMachine().Options.GuaranteedTailCallOpt)
-    report_fatal_error("Tail call support is unimplemented on AIX.");
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv != CallingConv::Fast)
+    report_fatal_error("Tail call support for non-fastcc calling convention is "
+                       "unimplemented on AIX.");
 
   if (useSoftFloat())
     report_fatal_error("Soft float support is unimplemented on AIX.");
@@ -7254,6 +7263,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
 
   const bool IsPPC64 = Subtarget.isPPC64();
   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+  // Potential tail calls could cause overwriting of argument stack slots.
+  const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                             (CallConv == CallingConv::Fast));
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -7319,10 +7331,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       // Objects are right-justified because AIX is big-endian.
       if (LocSize > ValSize)
         CurArgOffset += LocSize - ValSize;
-      // Potential tail calls could cause overwriting of argument stack slots.
-      const bool IsImmutable =
-          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
-            (CallConv == CallingConv::Fast));
       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       SDValue ArgValue =
@@ -7616,6 +7624,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   const bool IsPPC64 = Subtarget.isPPC64();
+  bool IsSibCall =
+      CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt;
   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
@@ -7631,13 +7641,25 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   const unsigned NumBytes = std::max<unsigned>(
       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
 
+  int SPDiff =
+      IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
+
+  // To protect arguments on the stack from being clobbered in a tail call,
+  // force all the loads to happen before doing any other lowering.
+  if (CFlags.IsTailCall)
+    Chain = DAG.getStackArgumentTokenFactor(Chain);
+
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass.
-  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  if (!IsSibCall)
+    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
   SDValue CallSeqStart = Chain;
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   // Set up a copy of the stack pointer for loading and storing any
   // arguments that may not fit in the registers available for argument
@@ -7814,6 +7836,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     }
 
     if (VA.isMemLoc()) {
+      if (!CFlags.IsTailCall) {
       SDValue PtrOff =
           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
@@ -7821,6 +7844,9 @@ SDValue PPCTargetLowering::LowerCall_AIX(
           DAG.getStore(Chain, dl, Arg, PtrOff,
                        MachinePointerInfo::getStack(MF, VA.getLocMemOffset()),
                        Subtarget.getFrameLowering()->getStackAlign()));
+      } else
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff,
+                                 VA.getLocMemOffset(), TailCallArguments);
 
       continue;
     }
@@ -7903,7 +7929,10 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     InGlue = Chain.getValue(1);
   }
 
-  const int SPDiff = 0;
+  if (CFlags.IsTailCall && !IsSibCall)
+    PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+                    TailCallArguments);
+
   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 269d30318bca8..aa913ae82484e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -474,6 +474,8 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
+def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm),
+          (TCRETURNdi8 mcsym:$dst, imm:$imm)>;
 
 // 64-bit CR instructions
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1c45050cdf9ca..f2ce9db9ab1c9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -3155,6 +3155,10 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
           (TCRETURNri CTRRC:$dst, imm:$imm)>;
 
 def : Pat<(int_ppc_fence), (FENCE)>;
+
+def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm),
+          (TCRETURNdi mcsym:$dst, imm:$imm)>;
+
 def : Pat<(int_ppc_readflm), (MFFS)>;
 def : Pat<(int_ppc_mffsl), (MFFSL)>;
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
index 1a3aaaec037ab..d0a7444e64458 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
@@ -36,7 +36,7 @@ exit:
 ; CHECK-SCO-SR: stdu 1, -{{[0-9]+}}(1)
 ; CHECK-SCO-SR: bl __assert_fail
 
-; CHECK-AIX: LLVM ERROR: Tail call support is unimplemented on AIX.
+; CHECK-AIX: LLVM ERROR: Tail call support for non-fastcc calling convention is unimplemented on AIX.
 }
 
 define dso_local fastcc i8 @LVComputationKind(

>From 601888756471e952e9b6ab28ccc036e1d060f9b5 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 2 Oct 2025 15:44:03 +0000
Subject: [PATCH 02/11] add test case

---
 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 111 ++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll

diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
new file mode 100644
index 0000000000000..a23fd2a8ae2c0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64 %s
+
+define hidden fastcc i32 @k(i32 %a, i32 %b) {
+; AIX-32-LABEL: k:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    add 3, 3, 4
+; AIX-32-NEXT:    blr
+;
+; AIX-64-LABEL: k:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    add 3, 3, 4
+; AIX-64-NEXT:    blr
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define hidden fastcc i32 @ff(i32 %a) {
+; AIX-32-LABEL: ff:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    blr
+;
+; AIX-64-LABEL: ff:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    blr
+entry:
+  ret i32 %a
+}
+
+define fastcc i32 @f(i32 %a, i32 %b) {
+; AIX-32-LABEL: f:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    b .ff
+; AIX-32-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX-64-LABEL: f:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    b .ff
+; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  ret i32 %r
+}
+
+define fastcc i32 @kk(i32 %a) {
+; AIX-32-LABEL: kk:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    li 4, 1024
+; AIX-32-NEXT:    b .k
+; AIX-32-NEXT:    #TC_RETURNd .k 0
+;
+; AIX-64-LABEL: kk:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    li 4, 1024
+; AIX-64-NEXT:    b .k
+; AIX-64-NEXT:    #TC_RETURNd8 .k 0
+entry:
+  %r = tail call fastcc i32 @k(i32 %a, i32 1024)
+  ret i32 %r
+}
+
+define fastcc i32 @g(i32 %a) {
+; AIX-32-LABEL: g:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    b .ff
+; AIX-32-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX-64-LABEL: g:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    b .ff
+; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  ret i32 %r
+}
+
+define fastcc i32 @gg(i32 %a) {
+; AIX-32-LABEL: gg:
+; AIX-32:       # %bb.0: # %entry
+; AIX-32-NEXT:    mflr 0
+; AIX-32-NEXT:    stwu 1, -64(1)
+; AIX-32-NEXT:    stw 0, 72(1)
+; AIX-32-NEXT:    bl .ff
+; AIX-32-NEXT:    addi 3, 3, 1
+; AIX-32-NEXT:    addi 1, 1, 64
+; AIX-32-NEXT:    lwz 0, 8(1)
+; AIX-32-NEXT:    mtlr 0
+; AIX-32-NEXT:    blr
+;
+; AIX-64-LABEL: gg:
+; AIX-64:       # %bb.0: # %entry
+; AIX-64-NEXT:    mflr 0
+; AIX-64-NEXT:    stdu 1, -112(1)
+; AIX-64-NEXT:    clrldi 3, 3, 32
+; AIX-64-NEXT:    std 0, 128(1)
+; AIX-64-NEXT:    bl .ff
+; AIX-64-NEXT:    addi 3, 3, 1
+; AIX-64-NEXT:    addi 1, 1, 112
+; AIX-64-NEXT:    ld 0, 16(1)
+; AIX-64-NEXT:    mtlr 0
+; AIX-64-NEXT:    blr
+entry:
+  %r = tail call fastcc i32 @ff(i32 %a)
+  %r.0 = add i32 %r, 1
+  ret i32 %r.0
+}

>From 86ff8bfeead5462658832c088b3424921b60aee7 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 22 Oct 2025 15:32:00 +0000
Subject: [PATCH 03/11] implement calling convention c

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 150 +++++++++++++++++---
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |   6 +
 2 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a35cdf4e2c282..a75334fdca016 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5082,9 +5082,8 @@ static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
 
 // Returns true if TCO is possible between the callers and callees
 // calling conventions.
-static bool
-areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
-                                    CallingConv::ID CalleeCC) {
+static bool areCallingConvEligibleForTCO(CallingConv::ID CallerCC,
+                                         CallingConv::ID CalleeCC) {
   // Tail calls are possible with fastcc and ccc.
   auto isTailCallableCC  = [] (CallingConv::ID CC){
       return  CC == CallingConv::C || CC == CallingConv::Fast;
@@ -5113,7 +5112,7 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
   if (isVarArg) return false;
 
   // Check that the calling conventions are compatible for tco.
-  if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
+  if (!areCallingConvEligibleForTCO(CallerCC, CalleeCC))
     return false;
 
   // Caller contains any byval parameter is not supported.
@@ -5183,6 +5182,110 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
   return true;
 }
 
+static bool
+needStackSlotPassParameters_AIX(const PPCSubtarget &Subtarget,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
+  const unsigned PhyGPRsNum = 8;
+  const unsigned PhyVRsNum = 12;
+  unsigned PhyGPRAllocated = 0;
+  unsigned PhyVRAllocated = 0;
+
+  for (unsigned i = 0; i != Outs.size(); ++i) {
+    MVT ArgVT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (ArgFlags.isByVal()) {
+      const unsigned ByValSize = ArgFlags.getByValSize();
+      const unsigned StackSize = alignTo(ByValSize, PtrAlign);
+      PhyGPRAllocated += StackSize / PtrAlign.value();
+      if (PhyGPRAllocated > PhyGPRsNum)
+        return true;
+      continue;
+    }
+
+    switch (ArgVT.SimpleTy) {
+    default:
+      report_fatal_error("Unhandled value type for argument.");
+    case MVT::i64:
+      // i64 arguments should have been split to i32 for PPC32.
+      assert(IsPPC64 && "PPC32 should have split i64 values.");
+      [[fallthrough]];
+    case MVT::i1:
+    case MVT::i32:
+      if (++PhyGPRAllocated > PhyGPRsNum)
+        return true;
+      break;
+    case MVT::f32:
+    case MVT::f64: {
+      const unsigned StoreSize = ArgVT.getStoreSize();
+      PhyGPRAllocated += StoreSize / PtrAlign.value();
+      if (PhyGPRAllocated > PhyGPRsNum)
+        return true;
+      break;
+    }
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+    case MVT::v2i64:
+    case MVT::v2f64:
+    case MVT::v1i128:
+      if (++PhyVRAllocated > PhyVRsNum)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+bool PPCTargetLowering::IsEligibleForTailCallOptimization_AIX(
+    const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+    CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, const Function *CallerFunc,
+    bool isCalleeExternalSymbol) const {
+  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
+
+  if (DisableSCO && !TailCallOpt)
+    return false;
+
+  // Variadic argument functions are not supported.
+  if (isVarArg)
+    return false;
+
+  // Check that the calling conventions are compatible for tco.
+  if (!areCallingConvEligibleForTCO(CallerCC, CalleeCC))
+    return false;
+
+  if (!Subtarget.isUsingPCRelativeCalls() &&
+      !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
+    return false;
+
+  // TCO allows altering callee ABI, so we don't have to check further.
+  if (CalleeCC == CallingConv::Fast && TailCallOpt)
+    return true;
+
+  if (DisableSCO)
+    return false;
+
+  if (CallerCC != CalleeCC && needStackSlotPassParameters_AIX(Subtarget, Outs))
+    return false;
+
+  // If callee use the same argument list that caller is using, then we can
+  // apply SCO on this case. If it is not, then we need to check if callee needs
+  // stack for passing arguments.
+  // PC Relative tail calls may not have a CallBase.
+  // If there is no CallBase we cannot verify if we have the same argument
+  // list so assume that we don't have the same argument list.
+  if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
+      needStackSlotPassParameters_AIX(Subtarget, Outs))
+    return false;
+  else if (!CB && needStackSlotPassParameters_AIX(Subtarget, Outs))
+    return false;
+
+  return true;
+}
+
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
@@ -5943,9 +6046,14 @@ bool PPCTargetLowering::isEligibleForTCO(
     return IsEligibleForTailCallOptimization_64SVR4(
         CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
         isCalleeExternalSymbol);
-  else
-    return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
-                                             isVarArg, Ins);
+
+  else if (Subtarget.isAIXABI())
+    return IsEligibleForTailCallOptimization_AIX(CalleeGV, CalleeCC, CallerCC,
+                                                 CB, isVarArg, Outs, CallerFunc,
+                                                 isCalleeExternalSymbol);
+
+  return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
+                                           isVarArg, Ins);
 }
 
 SDValue
@@ -7251,11 +7359,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
           CallConv == CallingConv::Fast) &&
          "Unexpected calling convention!");
 
-  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
-      CallConv != CallingConv::Fast)
-    report_fatal_error("Tail call support for non-fastcc calling convention is "
-                       "unimplemented on AIX.");
-
   if (useSoftFloat())
     report_fatal_error("Soft float support is unimplemented on AIX.");
 
@@ -7641,8 +7744,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   const unsigned NumBytes = std::max<unsigned>(
       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
 
-  int SPDiff =
-      IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
+  unsigned AlignNumBytes =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
+  int SPDiff = IsSibCall ? 0
+                         : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall,
+                                                   AlignNumBytes);
 
   // To protect arguments on the stack from being clobbered in a tail call,
   // force all the loads to happen before doing any other lowering.
@@ -7928,11 +8034,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
     InGlue = Chain.getValue(1);
   }
-
-  if (CFlags.IsTailCall && !IsSibCall)
-    PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
-                    TailCallArguments);
-
+  /*
+    if (CFlags.IsTailCall && !IsSibCall)
+      PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+                      TailCallArguments);
+  */
   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }
@@ -19271,9 +19377,9 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
 
   // Make sure the callee and caller calling conventions are eligible for tco.
   const Function *Caller = CI->getParent()->getParent();
-  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
-                                           CI->getCallingConv()))
-      return false;
+  if (!areCallingConvEligibleForTCO(Caller->getCallingConv(),
+                                    CI->getCallingConv()))
+    return false;
 
   // If the function is local then we have a good chance at tail-calling it
   return getTargetMachine().shouldAssumeDSOLocal(Callee);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 669430550f4e6..4cb0042c2f32e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1250,6 +1250,12 @@ namespace llvm {
         CallingConv::ID CallerCC, bool isVarArg,
         const SmallVectorImpl<ISD::InputArg> &Ins) const;
 
+    bool IsEligibleForTailCallOptimization_AIX(
+        const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+        CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
+        const SmallVectorImpl<ISD::OutputArg> &Outs, const Function *CallerFunc,
+        bool isCalleeExternalSymbol) const;
+
     bool IsEligibleForTailCallOptimization_64SVR4(
         const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
         CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,

>From 154a7cb92936773519293b61b1b3f1dcb6de80bd Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 22 Oct 2025 17:33:34 +0000
Subject: [PATCH 04/11] change tail call to call in test case

---
 llvm/test/CodeGen/PowerPC/aix-vector-byval.ll |   2 +-
 llvm/test/CodeGen/PowerPC/byval-lhs.ll        |   4 +-
 .../PowerPC/global-merge-aix-sections.ll      |   8 +-
 .../CodeGen/PowerPC/lower-scalar-mass-afn.ll  | 158 +++++++++---------
 .../CodeGen/PowerPC/lower-scalar-mass-fast.ll | 158 +++++++++---------
 .../PowerPC/lower-scalar-mass-nofast.ll       | 158 +++++++++---------
 llvm/test/CodeGen/PowerPC/merge-private.ll    |   4 +-
 .../PowerPC/mergeable-string-pool-large.ll    |   2 +-
 .../PowerPC/mergeable-string-pool-tls.ll      |  10 +-
 .../CodeGen/PowerPC/mergeable-string-pool.ll  |  24 +--
 llvm/test/CodeGen/PowerPC/milicode32.ll       |   2 +-
 llvm/test/CodeGen/PowerPC/milicode64.ll       |   2 +-
 ...ow-025-075-nointrinsic-scalar-mass-fast.ll |  24 +--
 llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll |  36 ++--
 .../PowerPC/ppc64-sibcall-shrinkwrap.ll       |   6 +-
 llvm/test/CodeGen/PowerPC/save-reg-params.ll  |  40 ++---
 llvm/test/CodeGen/PowerPC/undef-args.ll       |  12 +-
 17 files changed, 325 insertions(+), 325 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
index 8fcac7b6a5921..aa5bfe96284a9 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
@@ -42,7 +42,7 @@ define i32 @caller() {
   ; 64BIT-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit $x3
   entry:
   %vs = alloca %struct.B, align 16
-  %call = tail call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs)
+  %call = call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs)
   ret i32 %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/byval-lhs.ll b/llvm/test/CodeGen/PowerPC/byval-lhs.ll
index aef374a5edbb7..7bc7850a16954 100644
--- a/llvm/test/CodeGen/PowerPC/byval-lhs.ll
+++ b/llvm/test/CodeGen/PowerPC/byval-lhs.ll
@@ -38,7 +38,7 @@ define void @bar1(i64 %a) nounwind {
 ; AIX-NEXT:    blr
   %s = alloca %struct.type8, align 8
   store i64 %a, ptr %s, align 8
-  %call = tail call ptr @f0(ptr noundef nonnull byval(%struct.type8) align 8 %s)
+  %call = call ptr @f0(ptr noundef nonnull byval(%struct.type8) align 8 %s)
   ret void
 }
 
@@ -77,7 +77,7 @@ define void @bar2(i64 %a) nounwind {
   store i64 %a, ptr %index1, align 8
   %index2 = getelementptr inbounds i64, ptr %s, i32 1
   store i64 %a, ptr %index2, align 8
-  %call = tail call ptr @f1(ptr noundef nonnull byval(%struct.type16) align 8 %s)
+  %call = call ptr @f1(ptr noundef nonnull byval(%struct.type16) align 8 %s)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/global-merge-aix-sections.ll b/llvm/test/CodeGen/PowerPC/global-merge-aix-sections.ll
index c001633763a1f..45270b8f28252 100644
--- a/llvm/test/CodeGen/PowerPC/global-merge-aix-sections.ll
+++ b/llvm/test/CodeGen/PowerPC/global-merge-aix-sections.ll
@@ -17,10 +17,10 @@
 ; Function Attrs: nounwind
 define void @g() {
 entry:
-  tail call void @f(ptr noundef nonnull @y, ptr noundef nonnull @z)
-  tail call void @f(ptr noundef nonnull @l, ptr noundef nonnull @z)
-  tail call void @h(ptr noundef nonnull @u)
-  tail call void @s(ptr noundef nonnull @myStruct1)
+  call void @f(ptr noundef nonnull @y, ptr noundef nonnull @z)
+  call void @f(ptr noundef nonnull @l, ptr noundef nonnull @z)
+  call void @h(ptr noundef nonnull @u)
+  call void @s(ptr noundef nonnull @myStruct1)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/lower-scalar-mass-afn.ll b/llvm/test/CodeGen/PowerPC/lower-scalar-mass-afn.ll
index f3085ccc85c88..98f89db6feb91 100644
--- a/llvm/test/CodeGen/PowerPC/lower-scalar-mass-afn.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-scalar-mass-afn.ll
@@ -86,7 +86,7 @@ define float @acosf_f32(float %a) {
 ; CHECK: __xl_acosf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @acosf(float %a)
+  %call = call afn float @acosf(float %a)
   ret float %call
 }
 
@@ -95,7 +95,7 @@ define float @acoshf_f32(float %a) {
 ; CHECK: __xl_acoshf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @acoshf(float %a)
+  %call = call afn float @acoshf(float %a)
   ret float %call
 }
 
@@ -104,7 +104,7 @@ define float @asinf_f32(float %a) {
 ; CHECK: __xl_asinf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @asinf(float %a)
+  %call = call afn float @asinf(float %a)
   ret float %call
 }
 
@@ -113,7 +113,7 @@ define float @asinhf_f32(float %a) {
 ; CHECK: __xl_asinhf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @asinhf(float %a)
+  %call = call afn float @asinhf(float %a)
   ret float %call
 }
 
@@ -122,7 +122,7 @@ define float @atan2f_f32(float %a, float %b) {
 ; CHECK: __xl_atan2f
 ; CHECK: blr
 entry:
-  %call = tail call afn float @atan2f(float %a, float %b)
+  %call = call afn float @atan2f(float %a, float %b)
   ret float %call
 }
 
@@ -131,7 +131,7 @@ define float @atanf_f32(float %a) {
 ; CHECK: __xl_atanf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @atanf(float %a)
+  %call = call afn float @atanf(float %a)
   ret float %call
 }
 
@@ -140,7 +140,7 @@ define float @atanhf_f32(float %a) {
 ; CHECK: __xl_atanhf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @atanhf(float %a)
+  %call = call afn float @atanhf(float %a)
   ret float %call
 }
 
@@ -149,7 +149,7 @@ define float @cbrtf_f32(float %a) {
 ; CHECK: __xl_cbrtf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @cbrtf(float %a)
+  %call = call afn float @cbrtf(float %a)
   ret float %call
 }
 
@@ -158,7 +158,7 @@ define float @copysignf_f32(float %a, float %b) {
 ; CHECK: copysignf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @copysignf(float %a, float %b)
+  %call = call afn float @copysignf(float %a, float %b)
   ret float %call
 }
 
@@ -167,7 +167,7 @@ define float @cosf_f32(float %a) {
 ; CHECK: __xl_cosf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @cosf(float %a)
+  %call = call afn float @cosf(float %a)
   ret float %call
 }
 
@@ -176,7 +176,7 @@ define float @coshf_f32(float %a) {
 ; CHECK: __xl_coshf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @coshf(float %a)
+  %call = call afn float @coshf(float %a)
   ret float %call
 }
 
@@ -185,7 +185,7 @@ define float @erfcf_f32(float %a) {
 ; CHECK: __xl_erfcf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @erfcf(float %a)
+  %call = call afn float @erfcf(float %a)
   ret float %call
 }
 
@@ -194,7 +194,7 @@ define float @erff_f32(float %a) {
 ; CHECK: __xl_erff
 ; CHECK: blr
 entry:
-  %call = tail call afn float @erff(float %a)
+  %call = call afn float @erff(float %a)
   ret float %call
 }
 
@@ -203,7 +203,7 @@ define float @expf_f32(float %a) {
 ; CHECK: __xl_expf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @expf(float %a)
+  %call = call afn float @expf(float %a)
   ret float %call
 }
 
@@ -212,7 +212,7 @@ define float @expm1f_f32(float %a) {
 ; CHECK: __xl_expm1f
 ; CHECK: blr
 entry:
-  %call = tail call afn float @expm1f(float %a)
+  %call = call afn float @expm1f(float %a)
   ret float %call
 }
 
@@ -221,7 +221,7 @@ define float @hypotf_f32(float %a, float %b) {
 ; CHECK: __xl_hypotf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @hypotf(float %a, float %b)
+  %call = call afn float @hypotf(float %a, float %b)
   ret float %call
 }
 
@@ -230,7 +230,7 @@ define float @lgammaf_f32(float %a) {
 ; CHECK: __xl_lgammaf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @lgammaf(float %a)
+  %call = call afn float @lgammaf(float %a)
   ret float %call
 }
 
@@ -239,7 +239,7 @@ define float @log10f_f32(float %a) {
 ; CHECK: __xl_log10f
 ; CHECK: blr
 entry:
-  %call = tail call afn float @log10f(float %a)
+  %call = call afn float @log10f(float %a)
   ret float %call
 }
 
@@ -248,7 +248,7 @@ define float @log1pf_f32(float %a) {
 ; CHECK: __xl_log1pf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @log1pf(float %a)
+  %call = call afn float @log1pf(float %a)
   ret float %call
 }
 
@@ -257,7 +257,7 @@ define float @logf_f32(float %a) {
 ; CHECK: __xl_logf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @logf(float %a)
+  %call = call afn float @logf(float %a)
   ret float %call
 }
 
@@ -266,7 +266,7 @@ define float @powf_f32(float %a, float %b) {
 ; CHECK: __xl_powf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @powf(float %a, float %b)
+  %call = call afn float @powf(float %a, float %b)
   ret float %call
 }
 
@@ -275,7 +275,7 @@ define float @rintf_f32(float %a) {
 ; CHECK-NOT: __xl_rintf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @rintf(float %a)
+  %call = call afn float @rintf(float %a)
   ret float %call
 }
 
@@ -284,7 +284,7 @@ define float @sinf_f32(float %a) {
 ; CHECK: __xl_sinf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @sinf(float %a)
+  %call = call afn float @sinf(float %a)
   ret float %call
 }
 
@@ -293,7 +293,7 @@ define float @sinhf_f32(float %a) {
 ; CHECK: __xl_sinhf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @sinhf(float %a)
+  %call = call afn float @sinhf(float %a)
   ret float %call
 }
 
@@ -302,7 +302,7 @@ define float @tanf_f32(float %a) {
 ; CHECK: __xl_tanf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @tanf(float %a)
+  %call = call afn float @tanf(float %a)
   ret float %call
 }
 
@@ -311,7 +311,7 @@ define float @tanhf_f32(float %a) {
 ; CHECK: __xl_tanhf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @tanhf(float %a)
+  %call = call afn float @tanhf(float %a)
   ret float %call
 }
 
@@ -320,7 +320,7 @@ define double @acos_f64(double %a) {
 ; CHECK: __xl_acos
 ; CHECK: blr
 entry:
-  %call = tail call afn double @acos(double %a)
+  %call = call afn double @acos(double %a)
   ret double %call
 }
 
@@ -329,7 +329,7 @@ define double @acosh_f64(double %a) {
 ; CHECK: __xl_acosh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @acosh(double %a)
+  %call = call afn double @acosh(double %a)
   ret double %call
 }
 
@@ -338,7 +338,7 @@ define double @anint_f64(double %a) {
 ; CHECK-NOT: __xl_anint
 ; CHECK: blr
 entry:
-  %call = tail call afn double @anint(double %a)
+  %call = call afn double @anint(double %a)
   ret double %call
 }
 
@@ -347,7 +347,7 @@ define double @asin_f64(double %a) {
 ; CHECK: __xl_asin
 ; CHECK: blr
 entry:
-  %call = tail call afn double @asin(double %a)
+  %call = call afn double @asin(double %a)
   ret double %call
 }
 
@@ -356,7 +356,7 @@ define double @asinh_f64(double %a) {
 ; CHECK: __xl_asinh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @asinh(double %a)
+  %call = call afn double @asinh(double %a)
   ret double %call
 }
 
@@ -365,7 +365,7 @@ define double @atan_f64(double %a) {
 ; CHECK: __xl_atan
 ; CHECK: blr
 entry:
-  %call = tail call afn double @atan(double %a)
+  %call = call afn double @atan(double %a)
   ret double %call
 }
 
@@ -374,7 +374,7 @@ define double @atan2_f64(double %a, double %b) {
 ; CHECK: __xl_atan2
 ; CHECK: blr
 entry:
-  %call = tail call afn double @atan2(double %a, double %b)
+  %call = call afn double @atan2(double %a, double %b)
   ret double %call
 }
 
@@ -383,7 +383,7 @@ define double @atanh_f64(double %a) {
 ; CHECK: __xl_atanh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @atanh(double %a)
+  %call = call afn double @atanh(double %a)
   ret double %call
 }
 
@@ -392,7 +392,7 @@ define double @cbrt_f64(double %a) {
 ; CHECK: __xl_cbrt
 ; CHECK: blr
 entry:
-  %call = tail call afn double @cbrt(double %a)
+  %call = call afn double @cbrt(double %a)
   ret double %call
 }
 
@@ -401,7 +401,7 @@ define double @copysign_f64(double %a, double %b) {
 ; CHECK: copysign
 ; CHECK: blr
 entry:
-  %call = tail call afn double @copysign(double %a, double %b)
+  %call = call afn double @copysign(double %a, double %b)
   ret double %call
 }
 
@@ -410,7 +410,7 @@ define double @cos_f64(double %a) {
 ; CHECK: __xl_cos
 ; CHECK: blr
 entry:
-  %call = tail call afn double @cos(double %a)
+  %call = call afn double @cos(double %a)
   ret double %call
 }
 
@@ -419,7 +419,7 @@ define double @cosh_f64(double %a) {
 ; CHECK: __xl_cosh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @cosh(double %a)
+  %call = call afn double @cosh(double %a)
   ret double %call
 }
 
@@ -428,7 +428,7 @@ define double @cosisin_f64(double %a) {
 ; CHECK-NOT: __xl_cosisin
 ; CHECK: blr
 entry:
-  %call = tail call afn double @cosisin(double %a)
+  %call = call afn double @cosisin(double %a)
   ret double %call
 }
 
@@ -437,7 +437,7 @@ define double @dnint_f64(double %a) {
 ; CHECK-NOT: __xl_dnint
 ; CHECK: blr
 entry:
-  %call = tail call afn double @dnint(double %a)
+  %call = call afn double @dnint(double %a)
   ret double %call
 }
 
@@ -446,7 +446,7 @@ define double @erf_f64(double %a) {
 ; CHECK: __xl_erf
 ; CHECK: blr
 entry:
-  %call = tail call afn double @erf(double %a)
+  %call = call afn double @erf(double %a)
   ret double %call
 }
 
@@ -455,7 +455,7 @@ define double @erfc_f64(double %a) {
 ; CHECK: __xl_erfc
 ; CHECK: blr
 entry:
-  %call = tail call afn double @erfc(double %a)
+  %call = call afn double @erfc(double %a)
   ret double %call
 }
 
@@ -464,7 +464,7 @@ define double @exp_f64(double %a) {
 ; CHECK: __xl_exp
 ; CHECK: blr
 entry:
-  %call = tail call afn double @exp(double %a)
+  %call = call afn double @exp(double %a)
   ret double %call
 }
 
@@ -473,7 +473,7 @@ define double @expm1_f64(double %a) {
 ; CHECK: __xl_expm1
 ; CHECK: blr
 entry:
-  %call = tail call afn double @expm1(double %a)
+  %call = call afn double @expm1(double %a)
   ret double %call
 }
 
@@ -482,7 +482,7 @@ define double @hypot_f64(double %a, double %b) {
 ; CHECK: __xl_hypot
 ; CHECK: blr
 entry:
-  %call = tail call afn double @hypot(double %a, double %b)
+  %call = call afn double @hypot(double %a, double %b)
   ret double %call
 }
 
@@ -491,7 +491,7 @@ define double @lgamma_f64(double %a) {
 ; CHECK: __xl_lgamma
 ; CHECK: blr
 entry:
-  %call = tail call afn double @lgamma(double %a)
+  %call = call afn double @lgamma(double %a)
   ret double %call
 }
 
@@ -500,7 +500,7 @@ define double @log_f64(double %a) {
 ; CHECK: __xl_log
 ; CHECK: blr
 entry:
-  %call = tail call afn double @log(double %a)
+  %call = call afn double @log(double %a)
   ret double %call
 }
 
@@ -509,7 +509,7 @@ define double @log10_f64(double %a) {
 ; CHECK: __xl_log10
 ; CHECK: blr
 entry:
-  %call = tail call afn double @log10(double %a)
+  %call = call afn double @log10(double %a)
   ret double %call
 }
 
@@ -518,7 +518,7 @@ define double @log1p_f64(double %a) {
 ; CHECK: __xl_log1p
 ; CHECK: blr
 entry:
-  %call = tail call afn double @log1p(double %a)
+  %call = call afn double @log1p(double %a)
   ret double %call
 }
 
@@ -527,7 +527,7 @@ define double @pow_f64(double %a, double %b) {
 ; CHECK: __xl_pow
 ; CHECK: blr
 entry:
-  %call = tail call afn double @pow(double %a, double %b)
+  %call = call afn double @pow(double %a, double %b)
   ret double %call
 }
 
@@ -536,7 +536,7 @@ define double @rsqrt_f64(double %a) {
 ; CHECK: __xl_rsqrt
 ; CHECK: blr
 entry:
-  %call = tail call afn double @rsqrt(double %a)
+  %call = call afn double @rsqrt(double %a)
   ret double %call
 }
 
@@ -545,7 +545,7 @@ define double @sin_f64(double %a) {
 ; CHECK: __xl_sin
 ; CHECK: blr
 entry:
-  %call = tail call afn double @sin(double %a)
+  %call = call afn double @sin(double %a)
   ret double %call
 }
 
@@ -554,7 +554,7 @@ define double @sincos_f64(double %a) {
 ; CHECK-NOT: __xl_sincos
 ; CHECK: blr
 entry:
-  %call = tail call afn double @sincos(double %a)
+  %call = call afn double @sincos(double %a)
   ret double %call
 }
 
@@ -563,7 +563,7 @@ define double @sinh_f64(double %a) {
 ; CHECK: __xl_sinh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @sinh(double %a)
+  %call = call afn double @sinh(double %a)
   ret double %call
 }
 
@@ -572,7 +572,7 @@ define double @sqrt_f64(double %a) {
 ; CHECK: __xl_sqrt
 ; CHECK: blr
 entry:
-  %call = tail call afn double @sqrt(double %a)
+  %call = call afn double @sqrt(double %a)
   ret double %call
 }
 
@@ -581,7 +581,7 @@ define double @tan_f64(double %a) {
 ; CHECK: __xl_tan
 ; CHECK: blr
 entry:
-  %call = tail call afn double @tan(double %a)
+  %call = call afn double @tan(double %a)
   ret double %call
 }
 
@@ -590,7 +590,7 @@ define double @tanh_f64(double %a) {
 ; CHECK: __xl_tanh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @tanh(double %a)
+  %call = call afn double @tanh(double %a)
   ret double %call
 }
 
@@ -599,7 +599,7 @@ define float @__acosf_finite_f32(float %a) {
 ; CHECK: __xl_acosf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__acosf_finite(float %a)
+  %call = call afn float @__acosf_finite(float %a)
   ret float %call
 }
 
@@ -608,7 +608,7 @@ define float @__acoshf_finite_f32(float %a) {
 ; CHECK: __xl_acoshf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__acoshf_finite(float %a)
+  %call = call afn float @__acoshf_finite(float %a)
   ret float %call
 }
 
@@ -617,7 +617,7 @@ define float @__asinf_finite_f32(float %a) {
 ; CHECK: __xl_asinf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__asinf_finite(float %a)
+  %call = call afn float @__asinf_finite(float %a)
   ret float %call
 }
 
@@ -626,7 +626,7 @@ define float @__atan2f_finite_f32(float %a, float %b) {
 ; CHECK: __xl_atan2f
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__atan2f_finite(float %a, float %b)
+  %call = call afn float @__atan2f_finite(float %a, float %b)
   ret float %call
 }
 
@@ -635,7 +635,7 @@ define float @__atanhf_finite_f32(float %a) {
 ; CHECK: __xl_atanhf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__atanhf_finite(float %a)
+  %call = call afn float @__atanhf_finite(float %a)
   ret float %call
 }
 
@@ -644,7 +644,7 @@ define float @__coshf_finite_f32(float %a) {
 ; CHECK: __xl_coshf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__coshf_finite(float %a)
+  %call = call afn float @__coshf_finite(float %a)
   ret float %call
 }
 define float @__expf_finite_f32(float %a) {
@@ -652,7 +652,7 @@ define float @__expf_finite_f32(float %a) {
 ; CHECK: __xl_expf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__expf_finite(float %a)
+  %call = call afn float @__expf_finite(float %a)
   ret float %call
 }
 define float @__logf_finite_f32(float %a) {
@@ -660,7 +660,7 @@ define float @__logf_finite_f32(float %a) {
 ; CHECK: __xl_logf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__logf_finite(float %a)
+  %call = call afn float @__logf_finite(float %a)
   ret float %call
 }
 define float @__log10f_finite_f32(float %a) {
@@ -668,7 +668,7 @@ define float @__log10f_finite_f32(float %a) {
 ; CHECK: __xl_log10f
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__log10f_finite(float %a)
+  %call = call afn float @__log10f_finite(float %a)
   ret float %call
 }
 define float @__powf_finite_f32(float %a, float %b) {
@@ -676,7 +676,7 @@ define float @__powf_finite_f32(float %a, float %b) {
 ; CHECK: __xl_powf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__powf_finite(float %a, float %b)
+  %call = call afn float @__powf_finite(float %a, float %b)
   ret float %call
 }
 define float @__sinhf_finite_f32(float %a) {
@@ -684,7 +684,7 @@ define float @__sinhf_finite_f32(float %a) {
 ; CHECK: __xl_sinhf
 ; CHECK: blr
 entry:
-  %call = tail call afn float @__sinhf_finite(float %a)
+  %call = call afn float @__sinhf_finite(float %a)
   ret float %call
 }
 
@@ -693,7 +693,7 @@ define double @__acos_finite_f64(double %a) {
 ; CHECK: __xl_acos
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__acos_finite(double %a)
+  %call = call afn double @__acos_finite(double %a)
   ret double %call
 }
 
@@ -702,7 +702,7 @@ define double @__acosh_finite_f64(double %a) {
 ; CHECK: __xl_acosh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__acosh_finite(double %a)
+  %call = call afn double @__acosh_finite(double %a)
   ret double %call
 }
 
@@ -711,7 +711,7 @@ define double @__asin_finite_f64(double %a) {
 ; CHECK: __xl_asin
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__asin_finite(double %a)
+  %call = call afn double @__asin_finite(double %a)
   ret double %call
 }
 
@@ -720,7 +720,7 @@ define double @__atan2_finite_f64(double %a, double %b) {
 ; CHECK: __xl_atan2
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__atan2_finite(double %a, double %b)
+  %call = call afn double @__atan2_finite(double %a, double %b)
   ret double %call
 }
 
@@ -729,7 +729,7 @@ define double @__atanh_finite_f64(double %a) {
 ; CHECK: __xl_atanh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__atanh_finite(double %a)
+  %call = call afn double @__atanh_finite(double %a)
   ret double %call
 }
 
@@ -738,7 +738,7 @@ define double @__cosh_finite_f64(double %a) {
 ; CHECK: __xl_cosh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__cosh_finite(double %a)
+  %call = call afn double @__cosh_finite(double %a)
   ret double %call
 }
 
@@ -747,7 +747,7 @@ define double @__exp_finite_f64(double %a) {
 ; CHECK: __xl_exp
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__exp_finite(double %a)
+  %call = call afn double @__exp_finite(double %a)
   ret double %call
 }
 
@@ -756,7 +756,7 @@ define double @__log_finite_f64(double %a) {
 ; CHECK: __xl_log
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__log_finite(double %a)
+  %call = call afn double @__log_finite(double %a)
   ret double %call
 }
 
@@ -765,7 +765,7 @@ define double @__log10_finite_f64(double %a) {
 ; CHECK: __xl_log10
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__log10_finite(double %a)
+  %call = call afn double @__log10_finite(double %a)
   ret double %call
 }
 
@@ -774,7 +774,7 @@ define double @__pow_finite_f64(double %a, double %b) {
 ; CHECK: __xl_pow
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__pow_finite(double %a, double %b)
+  %call = call afn double @__pow_finite(double %a, double %b)
   ret double %call
 }
 
@@ -783,6 +783,6 @@ define double @__sinh_finite_f64(double %a) {
 ; CHECK: __xl_sinh
 ; CHECK: blr
 entry:
-  %call = tail call afn double @__sinh_finite(double %a)
+  %call = call afn double @__sinh_finite(double %a)
   ret double %call
 }
diff --git a/llvm/test/CodeGen/PowerPC/lower-scalar-mass-fast.ll b/llvm/test/CodeGen/PowerPC/lower-scalar-mass-fast.ll
index 8c22d43040d36..f9185dabd005d 100644
--- a/llvm/test/CodeGen/PowerPC/lower-scalar-mass-fast.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-scalar-mass-fast.ll
@@ -86,7 +86,7 @@ define float @acosf_f32(float %a) {
 ; CHECK: __xl_acosf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @acosf(float %a)
+  %call = call nnan ninf afn nsz float @acosf(float %a)
   ret float %call
 }
 
@@ -95,7 +95,7 @@ define float @acoshf_f32(float %a) {
 ; CHECK: __xl_acoshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @acoshf(float %a)
+  %call = call nnan ninf afn nsz float @acoshf(float %a)
   ret float %call
 }
 
@@ -104,7 +104,7 @@ define float @asinf_f32(float %a) {
 ; CHECK: __xl_asinf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @asinf(float %a)
+  %call = call nnan ninf afn nsz float @asinf(float %a)
   ret float %call
 }
 
@@ -113,7 +113,7 @@ define float @asinhf_f32(float %a) {
 ; CHECK: __xl_asinhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @asinhf(float %a)
+  %call = call nnan ninf afn nsz float @asinhf(float %a)
   ret float %call
 }
 
@@ -122,7 +122,7 @@ define float @atan2f_f32(float %a, float %b) {
 ; CHECK: __xl_atan2f_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @atan2f(float %a, float %b)
+  %call = call nnan ninf afn nsz float @atan2f(float %a, float %b)
   ret float %call
 }
 
@@ -131,7 +131,7 @@ define float @atanf_f32(float %a) {
 ; CHECK: __xl_atanf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @atanf(float %a)
+  %call = call nnan ninf afn nsz float @atanf(float %a)
   ret float %call
 }
 
@@ -140,7 +140,7 @@ define float @atanhf_f32(float %a) {
 ; CHECK: __xl_atanhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @atanhf(float %a)
+  %call = call nnan ninf afn nsz float @atanhf(float %a)
   ret float %call
 }
 
@@ -149,7 +149,7 @@ define float @cbrtf_f32(float %a) {
 ; CHECK: __xl_cbrtf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @cbrtf(float %a)
+  %call = call nnan ninf afn nsz float @cbrtf(float %a)
   ret float %call
 }
 
@@ -158,7 +158,7 @@ define float @copysignf_f32(float %a, float %b) {
 ; CHECK: copysignf
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @copysignf(float %a, float %b)
+  %call = call nnan ninf afn nsz float @copysignf(float %a, float %b)
   ret float %call
 }
 
@@ -167,7 +167,7 @@ define float @cosf_f32(float %a) {
 ; CHECK: __xl_cosf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @cosf(float %a)
+  %call = call nnan ninf afn nsz float @cosf(float %a)
   ret float %call
 }
 
@@ -176,7 +176,7 @@ define float @coshf_f32(float %a) {
 ; CHECK: __xl_coshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @coshf(float %a)
+  %call = call nnan ninf afn nsz float @coshf(float %a)
   ret float %call
 }
 
@@ -185,7 +185,7 @@ define float @erfcf_f32(float %a) {
 ; CHECK: __xl_erfcf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @erfcf(float %a)
+  %call = call nnan ninf afn nsz float @erfcf(float %a)
   ret float %call
 }
 
@@ -194,7 +194,7 @@ define float @erff_f32(float %a) {
 ; CHECK: __xl_erff_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @erff(float %a)
+  %call = call nnan ninf afn nsz float @erff(float %a)
   ret float %call
 }
 
@@ -203,7 +203,7 @@ define float @expf_f32(float %a) {
 ; CHECK: __xl_expf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @expf(float %a)
+  %call = call nnan ninf afn nsz float @expf(float %a)
   ret float %call
 }
 
@@ -212,7 +212,7 @@ define float @expm1f_f32(float %a) {
 ; CHECK: __xl_expm1f_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @expm1f(float %a)
+  %call = call nnan ninf afn nsz float @expm1f(float %a)
   ret float %call
 }
 
@@ -221,7 +221,7 @@ define float @hypotf_f32(float %a, float %b) {
 ; CHECK: __xl_hypotf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @hypotf(float %a, float %b)
+  %call = call nnan ninf afn nsz float @hypotf(float %a, float %b)
   ret float %call
 }
 
@@ -230,7 +230,7 @@ define float @lgammaf_f32(float %a) {
 ; CHECK: __xl_lgammaf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @lgammaf(float %a)
+  %call = call nnan ninf afn nsz float @lgammaf(float %a)
   ret float %call
 }
 
@@ -239,7 +239,7 @@ define float @log10f_f32(float %a) {
 ; CHECK: __xl_log10f_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @log10f(float %a)
+  %call = call nnan ninf afn nsz float @log10f(float %a)
   ret float %call
 }
 
@@ -248,7 +248,7 @@ define float @log1pf_f32(float %a) {
 ; CHECK: __xl_log1pf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @log1pf(float %a)
+  %call = call nnan ninf afn nsz float @log1pf(float %a)
   ret float %call
 }
 
@@ -257,7 +257,7 @@ define float @logf_f32(float %a) {
 ; CHECK: __xl_logf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @logf(float %a)
+  %call = call nnan ninf afn nsz float @logf(float %a)
   ret float %call
 }
 
@@ -266,7 +266,7 @@ define float @powf_f32(float %a, float %b) {
 ; CHECK: __xl_powf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @powf(float %a, float %b)
+  %call = call nnan ninf afn nsz float @powf(float %a, float %b)
   ret float %call
 }
 
@@ -275,7 +275,7 @@ define float @rintf_f32(float %a) {
 ; CHECK-NOT: __xl_rintf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @rintf(float %a)
+  %call = call nnan ninf afn nsz float @rintf(float %a)
   ret float %call
 }
 
@@ -284,7 +284,7 @@ define float @sinf_f32(float %a) {
 ; CHECK: __xl_sinf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @sinf(float %a)
+  %call = call nnan ninf afn nsz float @sinf(float %a)
   ret float %call
 }
 
@@ -293,7 +293,7 @@ define float @sinhf_f32(float %a) {
 ; CHECK: __xl_sinhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @sinhf(float %a)
+  %call = call nnan ninf afn nsz float @sinhf(float %a)
   ret float %call
 }
 
@@ -302,7 +302,7 @@ define float @tanf_f32(float %a) {
 ; CHECK: __xl_tanf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @tanf(float %a)
+  %call = call nnan ninf afn nsz float @tanf(float %a)
   ret float %call
 }
 
@@ -311,7 +311,7 @@ define float @tanhf_f32(float %a) {
 ; CHECK: __xl_tanhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @tanhf(float %a)
+  %call = call nnan ninf afn nsz float @tanhf(float %a)
   ret float %call
 }
 
@@ -320,7 +320,7 @@ define double @acos_f64(double %a) {
 ; CHECK: __xl_acos_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @acos(double %a)
+  %call = call nnan ninf afn nsz double @acos(double %a)
   ret double %call
 }
 
@@ -329,7 +329,7 @@ define double @acosh_f64(double %a) {
 ; CHECK: __xl_acosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @acosh(double %a)
+  %call = call nnan ninf afn nsz double @acosh(double %a)
   ret double %call
 }
 
@@ -338,7 +338,7 @@ define double @anint_f64(double %a) {
 ; CHECK-NOT: __xl_anint_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @anint(double %a)
+  %call = call nnan ninf afn nsz double @anint(double %a)
   ret double %call
 }
 
@@ -347,7 +347,7 @@ define double @asin_f64(double %a) {
 ; CHECK: __xl_asin_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @asin(double %a)
+  %call = call nnan ninf afn nsz double @asin(double %a)
   ret double %call
 }
 
@@ -356,7 +356,7 @@ define double @asinh_f64(double %a) {
 ; CHECK: __xl_asinh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @asinh(double %a)
+  %call = call nnan ninf afn nsz double @asinh(double %a)
   ret double %call
 }
 
@@ -365,7 +365,7 @@ define double @atan_f64(double %a) {
 ; CHECK: __xl_atan_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @atan(double %a)
+  %call = call nnan ninf afn nsz double @atan(double %a)
   ret double %call
 }
 
@@ -374,7 +374,7 @@ define double @atan2_f64(double %a) {
 ; CHECK: __xl_atan2_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @atan2(double %a)
+  %call = call nnan ninf afn nsz double @atan2(double %a)
   ret double %call
 }
 
@@ -383,7 +383,7 @@ define double @atanh_f64(double %a) {
 ; CHECK: __xl_atanh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @atanh(double %a)
+  %call = call nnan ninf afn nsz double @atanh(double %a)
   ret double %call
 }
 
@@ -392,7 +392,7 @@ define double @cbrt_f64(double %a) {
 ; CHECK: __xl_cbrt_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @cbrt(double %a)
+  %call = call nnan ninf afn nsz double @cbrt(double %a)
   ret double %call
 }
 
@@ -401,7 +401,7 @@ define double @copysign_f64(double %a, double %b) {
 ; CHECK: copysign
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @copysign(double %a, double %b)
+  %call = call nnan ninf afn nsz double @copysign(double %a, double %b)
   ret double %call
 }
 
@@ -410,7 +410,7 @@ define double @cos_f64(double %a) {
 ; CHECK: __xl_cos_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @cos(double %a)
+  %call = call nnan ninf afn nsz double @cos(double %a)
   ret double %call
 }
 
@@ -419,7 +419,7 @@ define double @cosh_f64(double %a) {
 ; CHECK: __xl_cosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @cosh(double %a)
+  %call = call nnan ninf afn nsz double @cosh(double %a)
   ret double %call
 }
 
@@ -428,7 +428,7 @@ define double @cosisin_f64(double %a) {
 ; CHECK-NOT: __xl_cosisin_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @cosisin(double %a)
+  %call = call nnan ninf afn nsz double @cosisin(double %a)
   ret double %call
 }
 
@@ -437,7 +437,7 @@ define double @dnint_f64(double %a) {
 ; CHECK-NOT: __xl_dnint_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @dnint(double %a)
+  %call = call nnan ninf afn nsz double @dnint(double %a)
   ret double %call
 }
 
@@ -446,7 +446,7 @@ define double @erf_f64(double %a) {
 ; CHECK: __xl_erf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @erf(double %a)
+  %call = call nnan ninf afn nsz double @erf(double %a)
   ret double %call
 }
 
@@ -455,7 +455,7 @@ define double @erfc_f64(double %a) {
 ; CHECK: __xl_erfc_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @erfc(double %a)
+  %call = call nnan ninf afn nsz double @erfc(double %a)
   ret double %call
 }
 
@@ -464,7 +464,7 @@ define double @exp_f64(double %a) {
 ; CHECK: __xl_exp_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @exp(double %a)
+  %call = call nnan ninf afn nsz double @exp(double %a)
   ret double %call
 }
 
@@ -473,7 +473,7 @@ define double @expm1_f64(double %a) {
 ; CHECK: __xl_expm1_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @expm1(double %a)
+  %call = call nnan ninf afn nsz double @expm1(double %a)
   ret double %call
 }
 
@@ -482,7 +482,7 @@ define double @hypot_f64(double %a, double %b) {
 ; CHECK: __xl_hypot_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @hypot(double %a, double %b)
+  %call = call nnan ninf afn nsz double @hypot(double %a, double %b)
   ret double %call
 }
 
@@ -491,7 +491,7 @@ define double @lgamma_f64(double %a) {
 ; CHECK: __xl_lgamma_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @lgamma(double %a)
+  %call = call nnan ninf afn nsz double @lgamma(double %a)
   ret double %call
 }
 
@@ -500,7 +500,7 @@ define double @log_f64(double %a) {
 ; CHECK: __xl_log_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @log(double %a)
+  %call = call nnan ninf afn nsz double @log(double %a)
   ret double %call
 }
 
@@ -509,7 +509,7 @@ define double @log10_f64(double %a) {
 ; CHECK: __xl_log10_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @log10(double %a)
+  %call = call nnan ninf afn nsz double @log10(double %a)
   ret double %call
 }
 
@@ -518,7 +518,7 @@ define double @log1p_f64(double %a) {
 ; CHECK: __xl_log1p_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @log1p(double %a)
+  %call = call nnan ninf afn nsz double @log1p(double %a)
   ret double %call
 }
 
@@ -527,7 +527,7 @@ define double @pow_f64(double %a, double %b) {
 ; CHECK: __xl_pow_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @pow(double %a, double %b)
+  %call = call nnan ninf afn nsz double @pow(double %a, double %b)
   ret double %call
 }
 
@@ -536,7 +536,7 @@ define double @rsqrt_f64(double %a) {
 ; CHECK: __xl_rsqrt_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @rsqrt(double %a)
+  %call = call nnan ninf afn nsz double @rsqrt(double %a)
   ret double %call
 }
 
@@ -545,7 +545,7 @@ define double @sin_f64(double %a) {
 ; CHECK: __xl_sin_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @sin(double %a)
+  %call = call nnan ninf afn nsz double @sin(double %a)
   ret double %call
 }
 
@@ -554,7 +554,7 @@ define double @sincos_f64(double %a) {
 ; CHECK-NOT: __xl_sincos_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @sincos(double %a)
+  %call = call nnan ninf afn nsz double @sincos(double %a)
   ret double %call
 }
 
@@ -563,7 +563,7 @@ define double @sinh_f64(double %a) {
 ; CHECK: __xl_sinh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @sinh(double %a)
+  %call = call nnan ninf afn nsz double @sinh(double %a)
   ret double %call
 }
 
@@ -572,7 +572,7 @@ define double @sqrt_f64(double %a) {
 ; CHECK: __xl_sqrt_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @sqrt(double %a)
+  %call = call nnan ninf afn nsz double @sqrt(double %a)
   ret double %call
 }
 
@@ -581,7 +581,7 @@ define double @tan_f64(double %a) {
 ; CHECK: __xl_tan_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @tan(double %a)
+  %call = call nnan ninf afn nsz double @tan(double %a)
   ret double %call
 }
 
@@ -590,7 +590,7 @@ define double @tanh_f64(double %a) {
 ; CHECK: __xl_tanh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @tanh(double %a)
+  %call = call nnan ninf afn nsz double @tanh(double %a)
   ret double %call
 }
 
@@ -599,7 +599,7 @@ define float @__acosf_finite_f32(float %a) {
 ; CHECK: __xl_acosf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__acosf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__acosf_finite(float %a)
   ret float %call
 }
 
@@ -608,7 +608,7 @@ define float @__acoshf_finite_f32(float %a) {
 ; CHECK: __xl_acoshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__acoshf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__acoshf_finite(float %a)
   ret float %call
 }
 
@@ -617,7 +617,7 @@ define float @__asinf_finite_f32(float %a) {
 ; CHECK: __xl_asinf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__asinf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__asinf_finite(float %a)
   ret float %call
 }
 
@@ -626,7 +626,7 @@ define float @__atan2f_finite_f32(float %a, float %b) {
 ; CHECK: __xl_atan2f_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__atan2f_finite(float %a, float %b)
+  %call = call nnan ninf afn nsz float @__atan2f_finite(float %a, float %b)
   ret float %call
 }
 
@@ -635,7 +635,7 @@ define float @__atanhf_finite_f32(float %a) {
 ; CHECK: __xl_atanhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__atanhf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__atanhf_finite(float %a)
   ret float %call
 }
 
@@ -644,7 +644,7 @@ define float @__coshf_finite_f32(float %a) {
 ; CHECK: __xl_coshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__coshf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__coshf_finite(float %a)
   ret float %call
 }
 
@@ -653,7 +653,7 @@ define float @__expf_finite_f32(float %a) {
 ; CHECK: __xl_expf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__expf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__expf_finite(float %a)
   ret float %call
 }
 
@@ -662,7 +662,7 @@ define float @__logf_finite_f32(float %a) {
 ; CHECK: __xl_logf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__logf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__logf_finite(float %a)
   ret float %call
 }
 
@@ -671,7 +671,7 @@ define float @__log10f_finite_f32(float %a) {
 ; CHECK: __xl_log10f_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__log10f_finite(float %a)
+  %call = call nnan ninf afn nsz float @__log10f_finite(float %a)
   ret float %call
 }
 
@@ -680,7 +680,7 @@ define float @__powf_finite_f32(float %a, float %b) {
 ; CHECK: __xl_powf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__powf_finite(float %a, float %b)
+  %call = call nnan ninf afn nsz float @__powf_finite(float %a, float %b)
   ret float %call
 }
 
@@ -689,7 +689,7 @@ define float @__sinhf_finite_f32(float %a) {
 ; CHECK: __xl_sinhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__sinhf_finite(float %a)
+  %call = call nnan ninf afn nsz float @__sinhf_finite(float %a)
   ret float %call
 }
 
@@ -698,7 +698,7 @@ define double @__acos_finite_f64(double %a) {
 ; CHECK: __xl_acos_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__acos_finite(double %a)
+  %call = call nnan ninf afn nsz double @__acos_finite(double %a)
   ret double %call
 }
 
@@ -707,7 +707,7 @@ define double @__acosh_finite_f64(double %a) {
 ; CHECK: __xl_acosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__acosh_finite(double %a)
+  %call = call nnan ninf afn nsz double @__acosh_finite(double %a)
   ret double %call
 }
 
@@ -716,7 +716,7 @@ define double @__asin_finite_f64(double %a) {
 ; CHECK: __xl_asin_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__asin_finite(double %a)
+  %call = call nnan ninf afn nsz double @__asin_finite(double %a)
   ret double %call
 }
 
@@ -725,7 +725,7 @@ define double @__atan2_finite_f64(double %a, double %b) {
 ; CHECK: __xl_atan2_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__atan2_finite(double %a, double %b)
+  %call = call nnan ninf afn nsz double @__atan2_finite(double %a, double %b)
   ret double %call
 }
 
@@ -734,7 +734,7 @@ define double @__atanh_finite_f64(double %a) {
 ; CHECK: __xl_atanh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__atanh_finite(double %a)
+  %call = call nnan ninf afn nsz double @__atanh_finite(double %a)
   ret double %call
 }
 
@@ -743,7 +743,7 @@ define double @__cosh_finite_f64(double %a) {
 ; CHECK: __xl_cosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__cosh_finite(double %a)
+  %call = call nnan ninf afn nsz double @__cosh_finite(double %a)
   ret double %call
 }
 
@@ -752,7 +752,7 @@ define double @__exp_finite_f64(double %a) {
 ; CHECK: __xl_exp_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__exp_finite(double %a)
+  %call = call nnan ninf afn nsz double @__exp_finite(double %a)
   ret double %call
 }
 
@@ -761,7 +761,7 @@ define double @__log_finite_f64(double %a) {
 ; CHECK: __xl_log_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__log_finite(double %a)
+  %call = call nnan ninf afn nsz double @__log_finite(double %a)
   ret double %call
 }
 
@@ -770,7 +770,7 @@ define double @__log10_finite_f64(double %a) {
 ; CHECK: __xl_log10_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__log10_finite(double %a)
+  %call = call nnan ninf afn nsz double @__log10_finite(double %a)
   ret double %call
 }
 
@@ -779,7 +779,7 @@ define double @__pow_finite_f64(double %a, double %b) {
 ; CHECK: __xl_pow_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__pow_finite(double %a, double %b)
+  %call = call nnan ninf afn nsz double @__pow_finite(double %a, double %b)
   ret double %call
 }
 
@@ -788,6 +788,6 @@ define double @__sinh_finite_f64(double %a) {
 ; CHECK: __xl_sinh_finite
 ; CHECK: blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__sinh_finite(double %a)
+  %call = call nnan ninf afn nsz double @__sinh_finite(double %a)
   ret double %call
 }
diff --git a/llvm/test/CodeGen/PowerPC/lower-scalar-mass-nofast.ll b/llvm/test/CodeGen/PowerPC/lower-scalar-mass-nofast.ll
index 6624b5752139c..585189008825d 100644
--- a/llvm/test/CodeGen/PowerPC/lower-scalar-mass-nofast.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-scalar-mass-nofast.ll
@@ -87,7 +87,7 @@ define float @acosf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_acosf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @acosf(float %a)
+  %call = call float @acosf(float %a)
   ret float %call
 }
 
@@ -97,7 +97,7 @@ define float @acoshf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_acoshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @acoshf(float %a)
+  %call = call float @acoshf(float %a)
   ret float %call
 }
 
@@ -107,7 +107,7 @@ define float @asinf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_asinf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @asinf(float %a)
+  %call = call float @asinf(float %a)
   ret float %call
 }
 
@@ -117,7 +117,7 @@ define float @asinhf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_asinhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @asinhf(float %a)
+  %call = call float @asinhf(float %a)
   ret float %call
 }
 
@@ -127,7 +127,7 @@ define float @atan2f_f32_nofast(float %a, float %b) {
 ; CHECK-NOT: __xl_atan2f_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @atan2f(float %a, float %b)
+  %call = call float @atan2f(float %a, float %b)
   ret float %call
 }
 
@@ -137,7 +137,7 @@ define float @atanf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_atanf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @atanf(float %a)
+  %call = call float @atanf(float %a)
   ret float %call
 }
 
@@ -147,7 +147,7 @@ define float @atanhf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_atanhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @atanhf(float %a)
+  %call = call float @atanhf(float %a)
   ret float %call
 }
 
@@ -157,7 +157,7 @@ define float @cbrtf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_cbrtf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @cbrtf(float %a)
+  %call = call float @cbrtf(float %a)
   ret float %call
 }
 
@@ -167,7 +167,7 @@ define float @copysignf_f32_nofast(float %a, float %b) {
 ; CHECK-NOT: __xl_copysignf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @copysignf(float %a, float %b)
+  %call = call float @copysignf(float %a, float %b)
   ret float %call
 }
 
@@ -177,7 +177,7 @@ define float @cosf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_cosf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @cosf(float %a)
+  %call = call float @cosf(float %a)
   ret float %call
 }
 
@@ -187,7 +187,7 @@ define float @coshf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_coshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @coshf(float %a)
+  %call = call float @coshf(float %a)
   ret float %call
 }
 
@@ -197,7 +197,7 @@ define float @erfcf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_erfcf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @erfcf(float %a)
+  %call = call float @erfcf(float %a)
   ret float %call
 }
 
@@ -207,7 +207,7 @@ define float @erff_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_erff_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @erff(float %a)
+  %call = call float @erff(float %a)
   ret float %call
 }
 
@@ -217,7 +217,7 @@ define float @expf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_expf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @expf(float %a)
+  %call = call float @expf(float %a)
   ret float %call
 }
 
@@ -227,7 +227,7 @@ define float @expm1f_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_expm1f_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @expm1f(float %a)
+  %call = call float @expm1f(float %a)
   ret float %call
 }
 
@@ -237,7 +237,7 @@ define float @hypotf_f32_nofast(float %a, float %b) {
 ; CHECK-NOT: __xl_hypotf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @hypotf(float %a, float %b)
+  %call = call float @hypotf(float %a, float %b)
   ret float %call
 }
 
@@ -247,7 +247,7 @@ define float @lgammaf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_lgammaf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @lgammaf(float %a)
+  %call = call float @lgammaf(float %a)
   ret float %call
 }
 
@@ -257,7 +257,7 @@ define float @log10f_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_log10f_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @log10f(float %a)
+  %call = call float @log10f(float %a)
   ret float %call
 }
 
@@ -267,7 +267,7 @@ define float @log1pf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_log1pf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @log1pf(float %a)
+  %call = call float @log1pf(float %a)
   ret float %call
 }
 
@@ -277,7 +277,7 @@ define float @logf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_logf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @logf(float %a)
+  %call = call float @logf(float %a)
   ret float %call
 }
 
@@ -287,7 +287,7 @@ define float @powf_f32_nofast(float %a, float %b) {
 ; CHECK-NOT: __xl_powf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @powf(float %a, float %b)
+  %call = call float @powf(float %a, float %b)
   ret float %call
 }
 
@@ -297,7 +297,7 @@ define float @rintf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_rintf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @rintf(float %a)
+  %call = call float @rintf(float %a)
   ret float %call
 }
 
@@ -307,7 +307,7 @@ define float @sinf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_sinf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @sinf(float %a)
+  %call = call float @sinf(float %a)
   ret float %call
 }
 
@@ -317,7 +317,7 @@ define float @sinhf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_sinhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @sinhf(float %a)
+  %call = call float @sinhf(float %a)
   ret float %call
 }
 
@@ -327,7 +327,7 @@ define float @tanf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_tanf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @tanf(float %a)
+  %call = call float @tanf(float %a)
   ret float %call
 }
 
@@ -337,7 +337,7 @@ define float @tanhf_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_tanhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @tanhf(float %a)
+  %call = call float @tanhf(float %a)
   ret float %call
 }
 
@@ -347,7 +347,7 @@ define double @acos_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_acos_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @acos(double %a)
+  %call = call double @acos(double %a)
   ret double %call
 }
 
@@ -357,7 +357,7 @@ define double @acosh_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_acosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @acosh(double %a)
+  %call = call double @acosh(double %a)
   ret double %call
 }
 
@@ -367,7 +367,7 @@ define double @anint_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_anint_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @anint(double %a)
+  %call = call double @anint(double %a)
   ret double %call
 }
 
@@ -377,7 +377,7 @@ define double @asin_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_asin_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @asin(double %a)
+  %call = call double @asin(double %a)
   ret double %call
 }
 
@@ -387,7 +387,7 @@ define double @asinh_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_asinh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @asinh(double %a)
+  %call = call double @asinh(double %a)
   ret double %call
 }
 
@@ -397,7 +397,7 @@ define double @atan_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_atan_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @atan(double %a)
+  %call = call double @atan(double %a)
   ret double %call
 }
 
@@ -407,7 +407,7 @@ define double @atan2_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_atan2_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @atan2(double %a)
+  %call = call double @atan2(double %a)
   ret double %call
 }
 
@@ -417,7 +417,7 @@ define double @atanh_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_atanh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @atanh(double %a)
+  %call = call double @atanh(double %a)
   ret double %call
 }
 
@@ -427,7 +427,7 @@ define double @cbrt_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_cbrt_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @cbrt(double %a)
+  %call = call double @cbrt(double %a)
   ret double %call
 }
 
@@ -437,7 +437,7 @@ define double @copysign_f64_nofast(double %a, double %b) {
 ; CHECK-NOT: __xl_copysign_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @copysign(double %a, double %b)
+  %call = call double @copysign(double %a, double %b)
   ret double %call
 }
 
@@ -447,7 +447,7 @@ define double @cos_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_cos_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @cos(double %a)
+  %call = call double @cos(double %a)
   ret double %call
 }
 
@@ -457,7 +457,7 @@ define double @cosh_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_cosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @cosh(double %a)
+  %call = call double @cosh(double %a)
   ret double %call
 }
 
@@ -467,7 +467,7 @@ define double @cosisin_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_cosisin_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @cosisin(double %a)
+  %call = call double @cosisin(double %a)
   ret double %call
 }
 
@@ -477,7 +477,7 @@ define double @dnint_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_dnint_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @dnint(double %a)
+  %call = call double @dnint(double %a)
   ret double %call
 }
 
@@ -487,7 +487,7 @@ define double @erf_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_erf_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @erf(double %a)
+  %call = call double @erf(double %a)
   ret double %call
 }
 
@@ -497,7 +497,7 @@ define double @erfc_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_erfc_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @erfc(double %a)
+  %call = call double @erfc(double %a)
   ret double %call
 }
 
@@ -507,7 +507,7 @@ define double @exp_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_exp_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @exp(double %a)
+  %call = call double @exp(double %a)
   ret double %call
 }
 
@@ -517,7 +517,7 @@ define double @expm1_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_expm1_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @expm1(double %a)
+  %call = call double @expm1(double %a)
   ret double %call
 }
 
@@ -527,7 +527,7 @@ define double @hypot_f64_nofast(double %a, double %b) {
 ; CHECK-NOT: __xl_hypot_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @hypot(double %a, double %b)
+  %call = call double @hypot(double %a, double %b)
   ret double %call
 }
 
@@ -537,7 +537,7 @@ define double @lgamma_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_lgamma_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @lgamma(double %a)
+  %call = call double @lgamma(double %a)
   ret double %call
 }
 
@@ -547,7 +547,7 @@ define double @log_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_log_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @log(double %a)
+  %call = call double @log(double %a)
   ret double %call
 }
 
@@ -557,7 +557,7 @@ define double @log10_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_log10_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @log10(double %a)
+  %call = call double @log10(double %a)
   ret double %call
 }
 
@@ -567,7 +567,7 @@ define double @log1p_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_log1p_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @log1p(double %a)
+  %call = call double @log1p(double %a)
   ret double %call
 }
 
@@ -577,7 +577,7 @@ define double @pow_f64_nofast(double %a, double %b) {
 ; CHECK-NOT: __xl_pow_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @pow(double %a, double %b)
+  %call = call double @pow(double %a, double %b)
   ret double %call
 }
 
@@ -587,7 +587,7 @@ define double @rsqrt_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_rsqrt_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @rsqrt(double %a)
+  %call = call double @rsqrt(double %a)
   ret double %call
 }
 
@@ -597,7 +597,7 @@ define double @sin_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_sin_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @sin(double %a)
+  %call = call double @sin(double %a)
   ret double %call
 }
 
@@ -607,7 +607,7 @@ define double @sincos_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_sincos_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @sincos(double %a)
+  %call = call double @sincos(double %a)
   ret double %call
 }
 
@@ -617,7 +617,7 @@ define double @sinh_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_sinh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @sinh(double %a)
+  %call = call double @sinh(double %a)
   ret double %call
 }
 
@@ -627,7 +627,7 @@ define double @sqrt_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_sqrt_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @sqrt(double %a)
+  %call = call double @sqrt(double %a)
   ret double %call
 }
 
@@ -637,7 +637,7 @@ define double @tan_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_tan_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @tan(double %a)
+  %call = call double @tan(double %a)
   ret double %call
 }
 
@@ -647,7 +647,7 @@ define double @tanh_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_tanh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @tanh(double %a)
+  %call = call double @tanh(double %a)
   ret double %call
 }
 
@@ -657,7 +657,7 @@ define float @__acosf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_acosf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__acosf_finite(float %a)
+  %call = call float @__acosf_finite(float %a)
   ret float %call
 }
 
@@ -667,7 +667,7 @@ define float @__acoshf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_acoshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__acoshf_finite(float %a)
+  %call = call float @__acoshf_finite(float %a)
   ret float %call
 }
 
@@ -677,7 +677,7 @@ define float @__asinf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_asinf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__asinf_finite(float %a)
+  %call = call float @__asinf_finite(float %a)
   ret float %call
 }
 
@@ -687,7 +687,7 @@ define float @__atan2f_finite_f32_nofast(float %a, float %b) {
 ; CHECK-NOT: __xl_atan2f_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__atan2f_finite(float %a, float %b)
+  %call = call float @__atan2f_finite(float %a, float %b)
   ret float %call
 }
 
@@ -697,7 +697,7 @@ define float @__atanhf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_atanhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__atanhf_finite(float %a)
+  %call = call float @__atanhf_finite(float %a)
   ret float %call
 }
 
@@ -707,7 +707,7 @@ define float @__coshf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_coshf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__coshf_finite(float %a)
+  %call = call float @__coshf_finite(float %a)
   ret float %call
 }
 
@@ -717,7 +717,7 @@ define float @__expf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_expf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__expf_finite(float %a)
+  %call = call float @__expf_finite(float %a)
   ret float %call
 }
 
@@ -727,7 +727,7 @@ define float @__logf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_logf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__logf_finite(float %a)
+  %call = call float @__logf_finite(float %a)
   ret float %call
 }
 
@@ -737,7 +737,7 @@ define float @__log10f_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_log10f_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__log10f_finite(float %a)
+  %call = call float @__log10f_finite(float %a)
   ret float %call
 }
 
@@ -747,7 +747,7 @@ define float @__powf_finite_f32_nofast(float %a, float %b) {
 ; CHECK-NOT: __xl_powf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__powf_finite(float %a, float %b)
+  %call = call float @__powf_finite(float %a, float %b)
   ret float %call
 }
 
@@ -757,7 +757,7 @@ define float @__sinhf_finite_f32_nofast(float %a) {
 ; CHECK-NOT: __xl_sinhf_finite
 ; CHECK: blr
 entry:
-  %call = tail call float @__sinhf_finite(float %a)
+  %call = call float @__sinhf_finite(float %a)
   ret float %call
 }
 
@@ -767,7 +767,7 @@ define double @__acos_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_acos_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__acos_finite(double %a)
+  %call = call double @__acos_finite(double %a)
   ret double %call
 }
 
@@ -777,7 +777,7 @@ define double @__acosh_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_acosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__acosh_finite(double %a)
+  %call = call double @__acosh_finite(double %a)
   ret double %call
 }
 
@@ -787,7 +787,7 @@ define double @__asin_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_asin_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__asin_finite(double %a)
+  %call = call double @__asin_finite(double %a)
   ret double %call
 }
 
@@ -797,7 +797,7 @@ define double @__atan2_finite_f64_nofast(double %a, double %b) {
 ; CHECK-NOT: __xl_atan2_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__atan2_finite(double %a, double %b)
+  %call = call double @__atan2_finite(double %a, double %b)
   ret double %call
 }
 
@@ -807,7 +807,7 @@ define double @__atanh_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_atanh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__atanh_finite(double %a)
+  %call = call double @__atanh_finite(double %a)
   ret double %call
 }
 
@@ -817,7 +817,7 @@ define double @__cosh_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_cosh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__cosh_finite(double %a)
+  %call = call double @__cosh_finite(double %a)
   ret double %call
 }
 
@@ -827,7 +827,7 @@ define double @__exp_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_exp_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__exp_finite(double %a)
+  %call = call double @__exp_finite(double %a)
   ret double %call
 }
 
@@ -837,7 +837,7 @@ define double @__log_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_log_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__log_finite(double %a)
+  %call = call double @__log_finite(double %a)
   ret double %call
 }
 
@@ -847,7 +847,7 @@ define double @__log10_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_log10_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__log10_finite(double %a)
+  %call = call double @__log10_finite(double %a)
   ret double %call
 }
 
@@ -857,7 +857,7 @@ define double @__pow_finite_f64_nofast(double %a, double %b) {
 ; CHECK-NOT: __xl_pow_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__pow_finite(double %a, double %b)
+  %call = call double @__pow_finite(double %a, double %b)
   ret double %call
 }
 
@@ -867,7 +867,7 @@ define double @__sinh_finite_f64_nofast(double %a) {
 ; CHECK-NOT: __xl_sinh_finite
 ; CHECK: blr
 entry:
-  %call = tail call double @__sinh_finite(double %a)
+  %call = call double @__sinh_finite(double %a)
   ret double %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/merge-private.ll b/llvm/test/CodeGen/PowerPC/merge-private.ll
index b50783d10928e..d6fe9aae33ec6 100644
--- a/llvm/test/CodeGen/PowerPC/merge-private.ll
+++ b/llvm/test/CodeGen/PowerPC/merge-private.ll
@@ -108,7 +108,7 @@ define dso_local void @print_func() {
 ; LINUX64BE-NEXT:    mtlr r0
 ; LINUX64BE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @puts(ptr noundef nonnull dereferenceable(1) @.str)
-  %call1 = tail call signext i32 @puts(ptr noundef nonnull dereferenceable(1) @str)
+  %call = call signext i32 @puts(ptr noundef nonnull dereferenceable(1) @.str)
+  %call1 = call signext i32 @puts(ptr noundef nonnull dereferenceable(1) @str)
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
index bc143e8c7ea4c..8e155554a71f6 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
@@ -292,7 +292,7 @@ define dso_local signext i32 @str1() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.1)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.1)
   ret i32 %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll
index aa0b441646fd3..e243e0be678af 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-tls.ll
@@ -118,9 +118,9 @@ define void @print_tls_func() {
 ; LINUX64BE-NEXT:    mtlr r0
 ; LINUX64BE-NEXT:    blr
 entry:
-  %0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
-  %1 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @b)
-  %call = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, ptr noundef nonnull %0, ptr noundef nonnull %1, ptr noundef nonnull @c)
+  %0 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @a)
+  %1 = call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @b)
+  %call = call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, ptr noundef nonnull %0, ptr noundef nonnull %1, ptr noundef nonnull @c)
   ret void
 }
 
@@ -187,7 +187,7 @@ define void @test_func() {
 ; LINUX64BE-NEXT:    mtlr r0
 ; LINUX64BE-NEXT:    blr
 entry:
-  tail call void @callee(ptr noundef nonnull @d) #4
+  call void @callee(ptr noundef nonnull @d) #4
   ret void
 }
 
@@ -254,7 +254,7 @@ define void @test_func2() {
 ; LINUX64BE-NEXT:    mtlr r0
 ; LINUX64BE-NEXT:    blr
 entry:
-  tail call void @callee2(ptr noundef nonnull @e) #4
+  call void @callee2(ptr noundef nonnull @e) #4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
index 6a21a100bb990..01b2f54b0a9ea 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
@@ -94,7 +94,7 @@ define dso_local signext i32 @str1() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.1)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.1)
   ret i32 %call
 }
 
@@ -159,7 +159,7 @@ define dso_local signext i32 @str2() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.2)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.2)
   ret i32 %call
 }
 
@@ -261,8 +261,8 @@ define dso_local signext i32 @str3() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.3)
-  %call1 = tail call signext i32 @callee(ptr noundef nonnull @.str.2)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.3)
+  %call1 = call signext i32 @callee(ptr noundef nonnull @.str.2)
   %add = add nsw i32 %call1, %call
   ret i32 %add
 }
@@ -326,7 +326,7 @@ define dso_local signext i32 @str4() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.4)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.4)
   ret i32 %call
 }
 
@@ -389,7 +389,7 @@ define dso_local signext i32 @str5() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.5)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.5)
   ret i32 %call
 }
 
@@ -749,8 +749,8 @@ define dso_local signext i32 @str7() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    blr
 entry:
   %0 = load ptr, ptr @GLOBALSTRING, align 8
-  %call = tail call signext i32 @callee(ptr noundef %0)
-  %call1 = tail call signext i32 @callee(ptr noundef nonnull @.str.8)
+  %call = call signext i32 @callee(ptr noundef %0)
+  %call1 = call signext i32 @callee(ptr noundef nonnull @.str.8)
   %add = add nsw i32 %call1, %call
   ret i32 %add
 }
@@ -847,8 +847,8 @@ define dso_local signext i32 @mixed1() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @calleeInt(ptr noundef nonnull @IntArray2)
-  %call1 = tail call signext i32 @callee(ptr noundef nonnull @.str.6)
+  %call = call signext i32 @calleeInt(ptr noundef nonnull @IntArray2)
+  %call1 = call signext i32 @callee(ptr noundef nonnull @.str.6)
   %add = add nsw i32 %call1, %call
   ret i32 %add
 }
@@ -1087,7 +1087,7 @@ define dso_local signext i32 @str9() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.9)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.9)
   ret i32 %call
 }
 
@@ -1150,7 +1150,7 @@ define dso_local signext i32 @str10() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %call = tail call signext i32 @callee(ptr noundef nonnull @.str.10)
+  %call = call signext i32 @callee(ptr noundef nonnull @.str.10)
   ret i32 %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/milicode32.ll b/llvm/test/CodeGen/PowerPC/milicode32.ll
index 78d036202fe4e..0df53f4059c62 100644
--- a/llvm/test/CodeGen/PowerPC/milicode32.ll
+++ b/llvm/test/CodeGen/PowerPC/milicode32.ll
@@ -29,7 +29,7 @@ define i32 @memcmp_test(ptr nocapture noundef readonly %ptr1, ptr nocapture noun
 ; CHECK-LINUX32-P9-NEXT:    mtlr r0
 ; CHECK-LINUX32-P9-NEXT:    blr
 entry:
-  %call = tail call i32 @memcmp(ptr noundef %ptr1, ptr noundef %ptr2, i32 noundef %num)
+  %call = call i32 @memcmp(ptr noundef %ptr1, ptr noundef %ptr2, i32 noundef %num)
     ret i32 %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/milicode64.ll b/llvm/test/CodeGen/PowerPC/milicode64.ll
index 8b87529d9a6d8..b54035f174c7c 100644
--- a/llvm/test/CodeGen/PowerPC/milicode64.ll
+++ b/llvm/test/CodeGen/PowerPC/milicode64.ll
@@ -46,7 +46,7 @@ define noundef i32 @_Z11memcmp_testPKvS0_m(ptr noundef readonly captures(none) %
 ; CHECK-AIX-64-P9-NEXT:    mtlr r0
 ; CHECK-AIX-64-P9-NEXT:    blr
 entry:
-  %call = tail call i32 @memcmp(ptr noundef %ptr1, ptr noundef %ptr2, i64 noundef %num)
+  %call = call i32 @memcmp(ptr noundef %ptr1, ptr noundef %ptr2, i64 noundef %num)
   ret i32 %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll b/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
index 3e0cdb03d3868..5bcf05ac57290 100644
--- a/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
+++ b/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
@@ -40,7 +40,7 @@ define float @powf_f32_fast025(float %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz float @powf(float %a, float 2.500000e-01)
+  %call = call nnan ninf afn nsz float @powf(float %a, float 2.500000e-01)
   ret float %call
 }
 
@@ -77,7 +77,7 @@ define double @pow_f64_fast025(double %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz double @pow(double %a, double 2.500000e-01)
+  %call = call nnan ninf afn nsz double @pow(double %a, double 2.500000e-01)
   ret double %call
 }
 
@@ -114,7 +114,7 @@ define float @powf_f32_fast075(float %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz float @powf(float %a, float 7.500000e-01)
+  %call = call nnan ninf afn nsz float @powf(float %a, float 7.500000e-01)
   ret float %call
 }
 
@@ -151,7 +151,7 @@ define double @pow_f64_fast075(double %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz double @pow(double %a, double 7.500000e-01)
+  %call = call nnan ninf afn nsz double @pow(double %a, double 7.500000e-01)
   ret double %call
 }
 
@@ -188,7 +188,7 @@ define float @powf_f32_fast050(float %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz float @powf(float %a, float 5.000000e-01)
+  %call = call nnan ninf afn nsz float @powf(float %a, float 5.000000e-01)
   ret float %call
 }
 
@@ -225,7 +225,7 @@ define double @pow_f64_fast050(double %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz double @pow(double %a, double 5.000000e-01)
+  %call = call nnan ninf afn nsz double @pow(double %a, double 5.000000e-01)
   ret double %call
 }
 
@@ -264,7 +264,7 @@ define float @__powf_finite_f32_fast025(float %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__powf_finite(float %a, float 2.500000e-01)
+  %call = call nnan ninf afn nsz float @__powf_finite(float %a, float 2.500000e-01)
   ret float %call
 }
 
@@ -301,7 +301,7 @@ define double @__pow_finite_f64_fast025(double %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__pow_finite(double %a, double 2.500000e-01)
+  %call = call nnan ninf afn nsz double @__pow_finite(double %a, double 2.500000e-01)
   ret double %call
 }
 
@@ -338,7 +338,7 @@ define float @__powf_finite_f32_fast075(float %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__powf_finite(float %a, float 7.500000e-01)
+  %call = call nnan ninf afn nsz float @__powf_finite(float %a, float 7.500000e-01)
   ret float %call
 }
 
@@ -375,7 +375,7 @@ define double @__pow_finite_f64_fast075(double %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__pow_finite(double %a, double 7.500000e-01)
+  %call = call nnan ninf afn nsz double @__pow_finite(double %a, double 7.500000e-01)
   ret double %call
 }
 
@@ -412,7 +412,7 @@ define float @__powf_finite_f32_fast050(float %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz float @__powf_finite(float %a, float 5.000000e-01)
+  %call = call nnan ninf afn nsz float @__powf_finite(float %a, float 5.000000e-01)
   ret float %call
 }
 
@@ -449,7 +449,7 @@ define double @__pow_finite_f64_fast050(double %a) #1 {
 ; CHECK-AIX-NEXT:    mtlr 0
 ; CHECK-AIX-NEXT:    blr
 entry:
-  %call = tail call nnan ninf afn nsz double @__pow_finite(double %a, double 5.000000e-01)
+  %call = call nnan ninf afn nsz double @__pow_finite(double %a, double 5.000000e-01)
   ret double %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
index f9d2c259a4c19..7629211b099de 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
@@ -38,10 +38,10 @@ define dso_local i64 @rotatemask32(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    rlwnm r3, r3, r4, 1, 31
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
+  %0 = call i64 @llvm.ctlz.i64(i64 %word, i1 false)
   %cast = trunc i64 %0 to i32
   %conv1 = trunc i64 %word to i32
-  %1 = tail call i32 @llvm.fshl.i32(i32 %conv1, i32 %conv1, i32 %cast)
+  %1 = call i32 @llvm.fshl.i32(i32 %conv1, i32 %conv1, i32 %cast)
   %2 = and i32 %1, 2147483647
   %and = zext i32 %2 to i64
   ret i64 %and
@@ -90,8 +90,8 @@ define dso_local i64 @rotatemask64(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
-  %1 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
+  %0 = call i64 @llvm.ctlz.i64(i64 %word, i1 false)
+  %1 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
   %and = and i64 %1, 9223372036854775807
   ret i64 %and
 }
@@ -138,8 +138,8 @@ define dso_local i64 @rotatemask64_2(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
-  %1 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
+  %0 = call i64 @llvm.ctlz.i64(i64 %word, i1 false)
+  %1 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
   %and = and i64 %1, 9223372036854775807
   ret i64 %and
 }
@@ -191,8 +191,8 @@ define dso_local i64 @rotatemask64_3(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    rldicl r3, r3, 8, 1
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
-  %1 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
+  %0 = call i64 @llvm.ctlz.i64(i64 %word, i1 false)
+  %1 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
   %and = and i64 %1, 9223372036854775552
   ret i64 %and
 }
@@ -229,7 +229,7 @@ define dso_local i64 @rotatemask64_nocount(i64 noundef %word, i64 noundef %clz)
 ; LINUX64LE-NEXT:    rldcl r3, r3, r4, 8
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %clz)
+  %0 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %clz)
   %and = and i64 %0, 72057594037927935
   ret i64 %and
 }
@@ -266,7 +266,7 @@ define dso_local i64 @builtincheck(i64 noundef %word, i64 noundef %shift) local_
 ; LINUX64LE-NEXT:    rldcl r3, r3, r4, 1
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %shift)
+  %0 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %shift)
   %1 = and i64 %0, 9223372036854775807
   ret i64 %1
 }
@@ -297,7 +297,7 @@ define dso_local i64 @immshift(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    rldicl r3, r3, 15, 12
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 15)
+  %0 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 15)
   %and = and i64 %0, 4503599627370495
   ret i64 %and
 }
@@ -382,11 +382,11 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
-  %1 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
+  %0 = call i64 @llvm.ctlz.i64(i64 %word, i1 false)
+  %1 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
   %and = and i64 %1, 9223372036854775807
   %and1 = and i64 %1, 281474976710655
-  %call = tail call i64 @callee(i64 noundef %and, i64 noundef %and1) #0
+  %call = call i64 @callee(i64 noundef %and, i64 noundef %and1) #0
   ret i64 %call
 }
 
@@ -476,12 +476,12 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mtlr r0
 ; LINUX64LE-NEXT:    blr
 entry:
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %word, i1 false)
-  %1 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
-  %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
+  %0 = call i64 @llvm.ctlz.i64(i64 %word, i1 false)
+  %1 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 %0)
+  %2 = call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
   %and = and i64 %1, 9223372036854775807
   %and1 = and i64 %2, 9223372036854775807
-  %call = tail call i64 @callee(i64 noundef %and, i64 noundef %and1) #0
+  %call = call i64 @callee(i64 noundef %and, i64 noundef %and1) #0
   ret i64 %call
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
index d0a7444e64458..2791451c53193 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SR
 ; RUN: not --crash llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -tailcallopt -disable-ppc-sco=false --enable-shrink-wrap=true 2>&1 | FileCheck %s -check-prefix=CHECK-AIX
-;; The above RUN command is expected to fail on AIX since tail calling is not implemented ATM
+;; The above RUN command is expected to fail on AIX since calling is not implemented ATM
 %"class.clang::NamedDecl" = type { i32 }
 declare void @__assert_fail();
 
@@ -14,12 +14,12 @@ entry:
   br i1 %tobool, label %cond.false, label %exit
 
 cond.false:
-  tail call void @__assert_fail()
+  call void @__assert_fail()
   unreachable
 
 exit:
   %bf.load = load i32, ptr %this, align 4
-  %call.i = tail call i8 @LVComputationKind(
+  %call.i = call i8 @LVComputationKind(
     ptr %this,
     i32 %bf.load)
   ret i8 %call.i
diff --git a/llvm/test/CodeGen/PowerPC/save-reg-params.ll b/llvm/test/CodeGen/PowerPC/save-reg-params.ll
index da4cd51c864ea..25b628675e1ea 100644
--- a/llvm/test/CodeGen/PowerPC/save-reg-params.ll
+++ b/llvm/test/CodeGen/PowerPC/save-reg-params.ll
@@ -52,7 +52,7 @@ entry:
   %add6 = add nsw i64 %add5, %h
   %add7 = add nsw i64 %add6, %i
   %add8 = add nsw i64 %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -105,7 +105,7 @@ entry:
   %add6 = add nsw i64 %add5, %h
   %add7 = add nsw i64 %add6, %i
   %add8 = add nsw i64 %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -159,7 +159,7 @@ entry:
   %add6 = add nsw i32 %add5, %h
   %add7 = add nsw i32 %add6, %i
   %add8 = add nsw i32 %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -212,7 +212,7 @@ entry:
   %add6 = add nsw i32 %add5, %h
   %add7 = add nsw i32 %add6, %i
   %add8 = add nsw i32 %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -270,7 +270,7 @@ entry:
   %add6 = fadd float %add5, %h
   %add7 = fadd float %add6, %i
   %add8 = fadd float %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -328,7 +328,7 @@ entry:
   %add6 = fadd float %add5, %h
   %add7 = fadd float %add6, %i
   %add8 = fadd float %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -386,7 +386,7 @@ entry:
   %add6 = fadd double %add5, %h
   %add7 = fadd double %add6, %i
   %add8 = fadd double %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -444,7 +444,7 @@ entry:
   %add6 = fadd double %add5, %h
   %add7 = fadd double %add6, %i
   %add8 = fadd double %add7, %j
-  tail call void @foo()
+  call void @foo()
   ret void
 }
 
@@ -574,16 +574,16 @@ define void @mixed_1(double %a, i64 %b, i64 %c, i32 signext %d, i64 %e, float %f
 ; 64BIT-NEXT:    blr
 entry:
   %add = fadd double %a, %j
-  tail call void @consume_f64(double %add)
+  call void @consume_f64(double %add)
   %add1 = fadd float %f, %g
-  tail call void @consume_f32(float %add1)
+  call void @consume_f32(float %add1)
   %add2 = add nsw i64 %c, %b
   %conv = sext i32 %d to i64
   %add3 = add nsw i64 %add2, %conv
   %add4 = add nsw i64 %add3, %e
-  tail call void @consume_i64(i64 %add4)
+  call void @consume_i64(i64 %add4)
   %add5 = add nsw i32 %i, %d
-  tail call void @consume_i32(i32 signext %add5)
+  call void @consume_i32(i32 signext %add5)
   ret void
 }
 
@@ -644,9 +644,9 @@ entry:
   %vecext = extractelement <4 x i32> %b, i64 0
   %conv = sext i32 %vecext to i64
   %add = add nsw i64 %conv, %c
-  tail call void @consume_i64(i64 %add)
+  call void @consume_i64(i64 %add)
   %vecext1 = extractelement <2 x double> %a, i64 0
-  tail call void @consume_f64(double %vecext1)
+  call void @consume_f64(double %vecext1)
   ret void
 }
 
@@ -741,10 +741,10 @@ entry:
   %vecext = extractelement <2 x double> %a, i64 0
   %add = fadd double %vecext, %f
   %add1 = fadd double %add, %c
-  tail call void @consume_f64(double %add1)
-  tail call void @consume_i64(i64 %b)
-  tail call void @consume_f32(float %d)
-  tail call void @consume_i32(i32 signext %e)
+  call void @consume_f64(double %add1)
+  call void @consume_i64(i64 %b)
+  call void @consume_f32(float %d)
+  call void @consume_i32(i32 signext %e)
   ret void
 }
 
@@ -849,10 +849,10 @@ define void @mixed_5(ptr byref(%struct.bar) align 16 %r, ptr byval(%struct.bar)
 entry:
   %d = getelementptr inbounds i8, ptr %f, i64 12
   %0 = load double, ptr %d, align 4
-  tail call void @consume_f64(double %0)
+  call void @consume_f64(double %0)
   %i = getelementptr inbounds i8, ptr %x, i64 4
   %1 = load i32, ptr %i, align 4
-  tail call void @consume_i32(i32 signext %1)
+  call void @consume_i32(i32 signext %1)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index cdaaec2add3ee..af4ad9a04327a 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -19,7 +19,7 @@
 
 define void @bar32(i32 zeroext %var1, i32 noundef zeroext %var2) local_unnamed_addr  {
 entry:
-  tail call void @foo32(i32 noundef zeroext %var2)
+  call void @foo32(i32 noundef zeroext %var2)
   ret void
 }
 
@@ -27,7 +27,7 @@ declare void @foo32(i32 noundef zeroext) local_unnamed_addr
 
 define void @test32() local_unnamed_addr {
 entry:
-  tail call void @bar32(i32 zeroext poison, i32 noundef zeroext 255)
+  call void @bar32(i32 zeroext poison, i32 noundef zeroext 255)
   ret void
 }
 
@@ -85,7 +85,7 @@ entry:
 
 define void @bar8(i8 zeroext %var1, i8 noundef zeroext %var2) local_unnamed_addr  {
 entry:
-  tail call void @foo8(i8 noundef zeroext %var2)
+  call void @foo8(i8 noundef zeroext %var2)
   ret void
 }
 
@@ -93,7 +93,7 @@ declare void @foo8(i8 noundef zeroext) local_unnamed_addr
 
 define void @test8() local_unnamed_addr {
 entry:
-  tail call void @bar8(i8 zeroext poison, i8 noundef zeroext 255)
+  call void @bar8(i8 zeroext poison, i8 noundef zeroext 255)
   ret void
 }
 
@@ -156,7 +156,7 @@ entry:
 
 define void @bar64(i64 zeroext %var1, i64 noundef zeroext %var2) local_unnamed_addr  {
 entry:
-  tail call void @foo64(i64 noundef zeroext %var2)
+  call void @foo64(i64 noundef zeroext %var2)
   ret void
 }
 
@@ -165,7 +165,7 @@ declare void @foo64(i64 noundef zeroext) local_unnamed_addr
 ; Function Attrs: noinline nounwind
 define void @test64() local_unnamed_addr {
 entry:
-  tail call void @bar64(i64 zeroext poison, i64 noundef zeroext 255)
+  call void @bar64(i64 zeroext poison, i64 noundef zeroext 255)
   ret void
 }
 

>From b73c8db8b9a89191dbebe1c2eef17fa5d7a713d3 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 22 Oct 2025 18:29:26 +0000
Subject: [PATCH 05/11] delete some unused code

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 28 +++++----------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a75334fdca016..e8b7dd9710d0f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7366,9 +7366,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
 
   const bool IsPPC64 = Subtarget.isPPC64();
   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
-  // Potential tail calls could cause overwriting of argument stack slots.
-  const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
-                             (CallConv == CallingConv::Fast));
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -7434,6 +7431,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
       // Objects are right-justified because AIX is big-endian.
       if (LocSize > ValSize)
         CurArgOffset += LocSize - ValSize;
+      // Potential tail calls could cause overwriting of argument stack slots.
+      const bool IsImmutable =
+          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+            (CallConv == CallingConv::Fast));
       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       SDValue ArgValue =
@@ -7744,12 +7745,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   const unsigned NumBytes = std::max<unsigned>(
       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
 
-  unsigned AlignNumBytes =
-      EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
-  int SPDiff = IsSibCall ? 0
-                         : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall,
-                                                   AlignNumBytes);
-
   // To protect arguments on the stack from being clobbered in a tail call,
   // force all the loads to happen before doing any other lowering.
   if (CFlags.IsTailCall)
@@ -7760,12 +7755,9 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   if (!IsSibCall)
     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
   SDValue CallSeqStart = Chain;
-  SDValue LROp, FPOp;
-  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
-  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   // Set up a copy of the stack pointer for loading and storing any
   // arguments that may not fit in the registers available for argument
@@ -7942,7 +7934,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     }
 
     if (VA.isMemLoc()) {
-      if (!CFlags.IsTailCall) {
       SDValue PtrOff =
           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
@@ -7950,10 +7941,6 @@ SDValue PPCTargetLowering::LowerCall_AIX(
           DAG.getStore(Chain, dl, Arg, PtrOff,
                        MachinePointerInfo::getStack(MF, VA.getLocMemOffset()),
                        Subtarget.getFrameLowering()->getStackAlign()));
-      } else
-        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff,
-                                 VA.getLocMemOffset(), TailCallArguments);
-
       continue;
     }
 
@@ -8034,11 +8021,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
     InGlue = Chain.getValue(1);
   }
-  /*
-    if (CFlags.IsTailCall && !IsSibCall)
-      PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
-                      TailCallArguments);
-  */
+
+  const int SPDiff = 0;
   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
 }

>From 4aaf43bd915193b7edfcfd43730923bed46272aa Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 23 Oct 2025 19:43:40 +0000
Subject: [PATCH 06/11] fix a machine instrunction verify error

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  6 ++++
 .../PowerPC/ppc64-sibcall-shrinkwrap.ll       | 32 +++++++++++++++----
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e8b7dd9710d0f..c536935379592 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8022,6 +8022,12 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     InGlue = Chain.getValue(1);
   }
 
+  if (CFlags.IsTailCall && !IsSibCall) {
+    // Emit callseq_end just before tailcall node.
+  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
+  InGlue = Chain.getValue(1);
+  }
+
   const int SPDiff = 0;
   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
index 2791451c53193..37060dd338019 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
@@ -2,8 +2,13 @@
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SR
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SR
-; RUN: not --crash llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -tailcallopt -disable-ppc-sco=false --enable-shrink-wrap=true 2>&1 | FileCheck %s -check-prefix=CHECK-AIX
-;; The above RUN command is expected to fail on AIX since calling is not implemented ATM
+
+
+; RUN: llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefixes=CHECK-SCO-ONLY-AIX,CHECK-SCO-ONLY-AIX64
+; RUN: llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix-xcoff -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefixes=CHECK-SCO-ONLY-AIX,CHECK-SCO-ONLY-AIX32
+; RUN: llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefixes=CHECK-SCO-SR-AIX64,CHECK-SCO-SR-AIX
+; RUN: llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix-xcoff -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefixes=CHECK-SCO-SR-AIX32,CHECK-SCO-SR-AIX
+
 %"class.clang::NamedDecl" = type { i32 }
 declare void @__assert_fail();
 
@@ -14,12 +19,12 @@ entry:
   br i1 %tobool, label %cond.false, label %exit
 
 cond.false:
-  call void @__assert_fail()
+  tail call void @__assert_fail()
   unreachable
 
 exit:
   %bf.load = load i32, ptr %this, align 4
-  %call.i = call i8 @LVComputationKind(
+  %call.i = tail call i8 @LVComputationKind(
     ptr %this,
     i32 %bf.load)
   ret i8 %call.i
@@ -29,14 +34,29 @@ exit:
 ; CHECK-SCO-ONLY: b LVComputationKind
 ; CHECK-SCO-ONLY: #TC_RETURNd8
 ; CHECK-SCO-ONLY: bl __assert_fail
-;
+
+; CHECK-SCO-ONLY-AIXLABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
+; CHECK-SCO-ONLY-AIX64: stdu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-ONLY-AIX32: stwu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-ONLY-AIX: b .LVComputationKind
+; CHECK-SCO-ONLY-AiX64: #TC_RETURNd8
+; CHECK-SCO-ONLY-AiX32: #TC_RETURNd
+; CHECK-SCO-ONLY-AIX: bl .__assert_fail
+
 ; CHECK-SCO-SR-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
 ; CHECK-SCO-SR: b LVComputationKind
 ; CHECK-SCO-SR: #TC_RETURNd8
 ; CHECK-SCO-SR: stdu 1, -{{[0-9]+}}(1)
 ; CHECK-SCO-SR: bl __assert_fail
 
-; CHECK-AIX: LLVM ERROR: Tail call support for non-fastcc calling convention is unimplemented on AIX.
+; CHECK-SCO-SR-AIX-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
+; CHECK-SCO-SR-AIX: b .LVComputationKind
+; CHECK-SCO-SR-AIX64: #TC_RETURNd8
+; CHECK-SCO-SR-AIX64: stdu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-SR-AIX32: #TC_RETURNd
+; CHECK-SCO-SR-AIX32: stwu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-SR-AIX: bl .__assert_fail
+
 }
 
 define dso_local fastcc i8 @LVComputationKind(

>From f8cbacb282a0986827f82d5c1074d4339d2bf599 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 27 Oct 2025 17:35:13 +0000
Subject: [PATCH 07/11] add share TOC base

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c536935379592..3e4da79a87cae 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5265,6 +5265,11 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_AIX(
   if (CalleeCC == CallingConv::Fast && TailCallOpt)
     return true;
 
+  // Check if we share the TOC base.
+  if (!Subtarget.isUsingPCRelativeCalls() &&
+      !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
+    return false;
+
   if (DisableSCO)
     return false;
 

>From bcbe81f9193a682ec7e2650a553d2926e1ddfe48 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 27 Oct 2025 19:48:16 +0000
Subject: [PATCH 08/11] calculate callee parameter size

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 78 ++++++++++-----------
 1 file changed, 38 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3e4da79a87cae..909ab0505fde4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5182,28 +5182,21 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
   return true;
 }
 
-static bool
-needStackSlotPassParameters_AIX(const PPCSubtarget &Subtarget,
-                                const SmallVectorImpl<ISD::OutputArg> &Outs) {
+static void calculeStackSlotSizeForParameter_AIX(const PPCSubtarget &Subtarget,
+                                                 MVT ArgVT,
+                                                 ISD::ArgFlagsTy ArgFlags,
+                                                 unsigned &StackSize,
+                                                 Align &MaxStackArgAlign) {
   const bool IsPPC64 = Subtarget.isPPC64();
   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
-  const unsigned PhyGPRsNum = 8;
-  const unsigned PhyVRsNum = 12;
-  unsigned PhyGPRAllocated = 0;
-  unsigned PhyVRAllocated = 0;
-
-  for (unsigned i = 0; i != Outs.size(); ++i) {
-    MVT ArgVT = Outs[i].VT;
-    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
-    if (ArgFlags.isByVal()) {
-      const unsigned ByValSize = ArgFlags.getByValSize();
-      const unsigned StackSize = alignTo(ByValSize, PtrAlign);
-      PhyGPRAllocated += StackSize / PtrAlign.value();
-      if (PhyGPRAllocated > PhyGPRsNum)
-        return true;
-      continue;
-    }
+  unsigned Size = 0;
+  Align Alignment = PtrAlign;
 
+  if (ArgFlags.isByVal()) {
+    Size = ArgFlags.getByValSize();
+    const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
+    Alignment = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
+  } else {
     switch (ArgVT.SimpleTy) {
     default:
       report_fatal_error("Unhandled value type for argument.");
@@ -5213,17 +5206,13 @@ needStackSlotPassParameters_AIX(const PPCSubtarget &Subtarget,
       [[fallthrough]];
     case MVT::i1:
     case MVT::i32:
-      if (++PhyGPRAllocated > PhyGPRsNum)
-        return true;
+      Size = PtrAlign.value();
       break;
     case MVT::f32:
-    case MVT::f64: {
-      const unsigned StoreSize = ArgVT.getStoreSize();
-      PhyGPRAllocated += StoreSize / PtrAlign.value();
-      if (PhyGPRAllocated > PhyGPRsNum)
-        return true;
+    case MVT::f64:
+      Size = ArgVT.getStoreSize();
+      Alignment = Align(4);
       break;
-    }
     case MVT::v4f32:
     case MVT::v4i32:
     case MVT::v8i16:
@@ -5231,12 +5220,12 @@ needStackSlotPassParameters_AIX(const PPCSubtarget &Subtarget,
     case MVT::v2i64:
     case MVT::v2f64:
     case MVT::v1i128:
-      if (++PhyVRAllocated > PhyVRsNum)
-        return true;
+      Size = 16;
+      Alignment = Align(16);
     }
   }
-
-  return false;
+  StackSize = alignTo(StackSize, Alignment) + Size;
+  MaxStackArgAlign = std::max(Alignment, MaxStackArgAlign);
 }
 
 bool PPCTargetLowering::IsEligibleForTailCallOptimization_AIX(
@@ -5273,19 +5262,28 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_AIX(
   if (DisableSCO)
     return false;
 
-  if (CallerCC != CalleeCC && needStackSlotPassParameters_AIX(Subtarget, Outs))
+  unsigned CalleeArgSize = 0;
+  Align MaxAligment = Align(1);
+  for (auto OutArg : Outs)
+    calculeStackSlotSizeForParameter_AIX(Subtarget, OutArg.VT, OutArg.Flags,
+                                         CalleeArgSize, MaxAligment);
+  CalleeArgSize = alignTo(CalleeArgSize, MaxAligment);
+
+  // TODO: In the future, calculate the actual caller argument size
+  // instead of using the minimum parameter save area.
+  unsigned MinPSA = 8 * (Subtarget.isPPC64() ? 8 : 4);
+
+  if (CallerCC != CalleeCC && CalleeArgSize > MinPSA)
     return false;
 
   // If callee use the same argument list that caller is using, then we can
-  // apply SCO on this case. If it is not, then we need to check if callee needs
-  // stack for passing arguments.
-  // PC Relative tail calls may not have a CallBase.
-  // If there is no CallBase we cannot verify if we have the same argument
-  // list so assume that we don't have the same argument list.
-  if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
-      needStackSlotPassParameters_AIX(Subtarget, Outs))
+  // apply SCO on this case. If it is not, then we need to check if callee
+  // needs stack for passing arguments. PC Relative tail calls may not have
+  // a CallBase. If there is no CallBase we cannot verify if we have the
+  // same argument list so assume that we don't have the same argument list.
+  if (CB && !hasSameArgumentList(CallerFunc, *CB) && CalleeArgSize > MinPSA)
     return false;
-  else if (!CB && needStackSlotPassParameters_AIX(Subtarget, Outs))
+  else if (!CB && CalleeArgSize > MinPSA)
     return false;
 
   return true;

>From e7bb1fe2c47d79e2da8ee2ffdbbca222906d6194 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 30 Oct 2025 17:05:30 +0000
Subject: [PATCH 09/11] fix an error

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 909ab0505fde4..9622e3a74dd0c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5210,7 +5210,7 @@ static void calculeStackSlotSizeForParameter_AIX(const PPCSubtarget &Subtarget,
       break;
     case MVT::f32:
     case MVT::f64:
-      Size = ArgVT.getStoreSize();
+      Size = IsPPC64 ? 8 : ArgVT.getStoreSize();
       Alignment = Align(4);
       break;
     case MVT::v4f32:

>From 062437dc58e131e602c8853e63f2f9b326869c15 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 30 Oct 2025 19:33:57 +0000
Subject: [PATCH 10/11] add more test case to tailcall-opt.ll

---
 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 670 ++++++++++++++++--
 1 file changed, 597 insertions(+), 73 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
index a23fd2a8ae2c0..1a4b9a9cfded4 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -1,111 +1,635 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32 %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX32 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX64 %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -tailcallopt -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX32-OPT %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -tailcallopt -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX64-OPT %s
 
 define hidden fastcc i32 @k(i32 %a, i32 %b) {
-; AIX-32-LABEL: k:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    add 3, 3, 4
-; AIX-32-NEXT:    blr
-;
-; AIX-64-LABEL: k:
-; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    add 3, 3, 4
-; AIX-64-NEXT:    blr
+; AIX32-LABEL: k:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    add r3, r3, r4
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: k:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    add r3, r3, r4
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: k:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    add r3, r3, r4
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: k:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    add r3, r3, r4
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    blr
 entry:
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define hidden fastcc i32 @ff(i32 %a) {
-; AIX-32-LABEL: ff:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    blr
+; AIX32-LABEL: ff:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: ff:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    blr
 ;
-; AIX-64-LABEL: ff:
-; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    blr
+; AIX32-OPT-LABEL: ff:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: ff:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    blr
 entry:
   ret i32 %a
 }
 
 define fastcc i32 @f(i32 %a, i32 %b) {
-; AIX-32-LABEL: f:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    b .ff
-; AIX-32-NEXT:    #TC_RETURNd .ff 0
-;
-; AIX-64-LABEL: f:
-; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    clrldi 3, 3, 32
-; AIX-64-NEXT:    b .ff
-; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+; AIX32-LABEL: f:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    b .ff
+; AIX32-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX64-LABEL: f:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    b .ff
+; AIX64-NEXT:    #TC_RETURNd8 .ff 0
+;
+; AIX32-OPT-LABEL: f:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    b .ff
+; AIX32-OPT-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX64-OPT-LABEL: f:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    b .ff
+; AIX64-OPT-NEXT:    #TC_RETURNd8 .ff 0
 entry:
   %r = tail call fastcc i32 @ff(i32 %a)
   ret i32 %r
 }
 
 define fastcc i32 @kk(i32 %a) {
-; AIX-32-LABEL: kk:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    li 4, 1024
-; AIX-32-NEXT:    b .k
-; AIX-32-NEXT:    #TC_RETURNd .k 0
-;
-; AIX-64-LABEL: kk:
-; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    clrldi 3, 3, 32
-; AIX-64-NEXT:    li 4, 1024
-; AIX-64-NEXT:    b .k
-; AIX-64-NEXT:    #TC_RETURNd8 .k 0
+; AIX32-LABEL: kk:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    li r4, 1024
+; AIX32-NEXT:    b .k
+; AIX32-NEXT:    #TC_RETURNd .k 0
+;
+; AIX64-LABEL: kk:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    li r4, 1024
+; AIX64-NEXT:    b .k
+; AIX64-NEXT:    #TC_RETURNd8 .k 0
+;
+; AIX32-OPT-LABEL: kk:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    li r4, 1024
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    b .k
+; AIX32-OPT-NEXT:    #TC_RETURNd .k 0
+;
+; AIX64-OPT-LABEL: kk:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    li r4, 1024
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    b .k
+; AIX64-OPT-NEXT:    #TC_RETURNd8 .k 0
 entry:
   %r = tail call fastcc i32 @k(i32 %a, i32 1024)
   ret i32 %r
 }
 
 define fastcc i32 @g(i32 %a) {
-; AIX-32-LABEL: g:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    b .ff
-; AIX-32-NEXT:    #TC_RETURNd .ff 0
-;
-; AIX-64-LABEL: g:
-; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    clrldi 3, 3, 32
-; AIX-64-NEXT:    b .ff
-; AIX-64-NEXT:    #TC_RETURNd8 .ff 0
+; AIX32-LABEL: g:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    b .ff
+; AIX32-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX64-LABEL: g:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    b .ff
+; AIX64-NEXT:    #TC_RETURNd8 .ff 0
+;
+; AIX32-OPT-LABEL: g:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    b .ff
+; AIX32-OPT-NEXT:    #TC_RETURNd .ff 0
+;
+; AIX64-OPT-LABEL: g:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    b .ff
+; AIX64-OPT-NEXT:    #TC_RETURNd8 .ff 0
 entry:
   %r = tail call fastcc i32 @ff(i32 %a)
   ret i32 %r
 }
 
 define fastcc i32 @gg(i32 %a) {
-; AIX-32-LABEL: gg:
-; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    mflr 0
-; AIX-32-NEXT:    stwu 1, -64(1)
-; AIX-32-NEXT:    stw 0, 72(1)
-; AIX-32-NEXT:    bl .ff
-; AIX-32-NEXT:    addi 3, 3, 1
-; AIX-32-NEXT:    addi 1, 1, 64
-; AIX-32-NEXT:    lwz 0, 8(1)
-; AIX-32-NEXT:    mtlr 0
-; AIX-32-NEXT:    blr
-;
-; AIX-64-LABEL: gg:
-; AIX-64:       # %bb.0: # %entry
-; AIX-64-NEXT:    mflr 0
-; AIX-64-NEXT:    stdu 1, -112(1)
-; AIX-64-NEXT:    clrldi 3, 3, 32
-; AIX-64-NEXT:    std 0, 128(1)
-; AIX-64-NEXT:    bl .ff
-; AIX-64-NEXT:    addi 3, 3, 1
-; AIX-64-NEXT:    addi 1, 1, 112
-; AIX-64-NEXT:    ld 0, 16(1)
-; AIX-64-NEXT:    mtlr 0
-; AIX-64-NEXT:    blr
+; AIX32-LABEL: gg:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    mflr r0
+; AIX32-NEXT:    stwu r1, -64(r1)
+; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    bl .ff
+; AIX32-NEXT:    addi r3, r3, 1
+; AIX32-NEXT:    addi r1, r1, 64
+; AIX32-NEXT:    lwz r0, 8(r1)
+; AIX32-NEXT:    mtlr r0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: gg:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    mflr r0
+; AIX64-NEXT:    stdu r1, -112(r1)
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    std r0, 128(r1)
+; AIX64-NEXT:    bl .ff
+; AIX64-NEXT:    addi r3, r3, 1
+; AIX64-NEXT:    addi r1, r1, 112
+; AIX64-NEXT:    ld r0, 16(r1)
+; AIX64-NEXT:    mtlr r0
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: gg:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    mflr r0
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    stw r0, 72(r1)
+; AIX32-OPT-NEXT:    bl .ff
+; AIX32-OPT-NEXT:    addi r1, r1, -56
+; AIX32-OPT-NEXT:    addi r3, r3, 1
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    lwz r0, 8(r1)
+; AIX32-OPT-NEXT:    mtlr r0
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: gg:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    mflr r0
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    std r0, 128(r1)
+; AIX64-OPT-NEXT:    bl .ff
+; AIX64-OPT-NEXT:    addi r1, r1, -112
+; AIX64-OPT-NEXT:    addi r3, r3, 1
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    ld r0, 16(r1)
+; AIX64-OPT-NEXT:    mtlr r0
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    blr
 entry:
   %r = tail call fastcc i32 @ff(i32 %a)
   %r.0 = add i32 %r, 1
   ret i32 %r.0
 }
+
+
+;; The following IRs are generated from source code with commmand:
+;; clang -O1 -target powerpc-unknown-aix -emit-llvm
+
+; __attribute__((noinline, visibility("hidden")))
+; double c1(int a1, double a2, float a3 ,vector unsigned int v1) {
+;     return a1 + a2  ;
+; }
+;
+; double b1(int x, double y, float z) {
+;     vector unsigned int v = { 0x0000FFFF, 0,0xFF,0 };
+;     return c1(x, y, z, v);
+; }
+;
+; __attribute__((noinline, visibility("hidden")))
+; double c2(int a1, double a2, float a3 ,vector unsigned int v1 , vector unsigned int v2 ) {
+;     return a1 + a2  ;
+; }
+;
+; double b2(int x, double y, float z) {
+;     vector unsigned int v = { 0x0000FFFF, 0,0xFF,0 };
+;     return c2(x, y, z, v, v);
+; }
+;
+; __attribute__((noinline, visibility("hidden")))
+; double c3(int a1, double a2, float a3 ,vector unsigned int v1 , vector unsigned int v2 , vector unsigned int v3 ) {
+;     return a1 + a2  ;
+; }
+;
+; double b3(int x, double y, float z) {
+;     vector unsigned int v = { 0x0000FFFF, 0,0xFF,0 };
+;     return c3(x, y, z, v, v, v);
+; }
+;
+; struct ST {
+;   int a1;
+;   double b1;
+; };
+;
+; __attribute__((noinline, visibility("hidden")))
+; double c4(struct ST s, double a3 ,vector unsigned int v1 ) {
+;    return s.a1 +s.b1;
+; }
+;
+; double b4(int x, double y, float z) {
+;   struct ST s = { 1, 1.0};
+;   vector unsigned int v = { 0x0000FFFF, 0,0xFF,0 };
+;   return  c4( s, y, v);
+; }
+
+%struct.ST = type <{ i32, double }>
+
+ at __const.b4.s = private unnamed_addr constant %struct.ST <{ i32 1, double 1.000000e+00 }>, align 4
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+define hidden noundef double @c1(i32 noundef %a1, double noundef %a2, float %a3, <4 x i32> %v1)  {
+; AIX32-LABEL: c1:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    stw r3, -4(r1)
+; AIX32-NEXT:    addi r3, r1, -4
+; AIX32-NEXT:    lfiwax f0, 0, r3
+; AIX32-NEXT:    xscvsxddp f0, f0
+; AIX32-NEXT:    xsadddp f1, f1, f0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: c1:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    stw r3, -4(r1)
+; AIX64-NEXT:    addi r3, r1, -4
+; AIX64-NEXT:    lfiwax f0, 0, r3
+; AIX64-NEXT:    xscvsxddp f0, f0
+; AIX64-NEXT:    xsadddp f1, f1, f0
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: c1:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stw r3, -4(r1)
+; AIX32-OPT-NEXT:    addi r3, r1, -4
+; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX32-OPT-NEXT:    xscvsxddp f0, f0
+; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: c1:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stw r3, -4(r1)
+; AIX64-OPT-NEXT:    addi r3, r1, -4
+; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX64-OPT-NEXT:    xscvsxddp f0, f0
+; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX64-OPT-NEXT:    blr
+entry:
+  %conv = sitofp i32 %a1 to double
+  %add = fadd double %a2, %conv
+  ret double %add
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define noundef double @b1(i32 noundef %x, double noundef %y, float noundef %z)  {
+; AIX32-LABEL: b1:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    b .c1
+; AIX32-NEXT:    #TC_RETURNd .c1 0
+;
+; AIX64-LABEL: b1:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    b .c1
+; AIX64-NEXT:    #TC_RETURNd8 .c1 0
+;
+; AIX32-OPT-LABEL: b1:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    b .c1
+; AIX32-OPT-NEXT:    #TC_RETURNd .c1 0
+;
+; AIX64-OPT-LABEL: b1:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    b .c1
+; AIX64-OPT-NEXT:    #TC_RETURNd8 .c1 0
+entry:
+  %call = tail call double @c1(i32 noundef %x, double noundef %y, float poison, <4 x i32> poison)
+  ret double %call
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
+define hidden noundef double @c2(i32 noundef %a1, double noundef %a2, float %a3, <4 x i32> %v1, <4 x i32> %v2)  {
+; AIX32-LABEL: c2:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    stw r3, -4(r1)
+; AIX32-NEXT:    addi r3, r1, -4
+; AIX32-NEXT:    lfiwax f0, 0, r3
+; AIX32-NEXT:    xscvsxddp f0, f0
+; AIX32-NEXT:    xsadddp f1, f1, f0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: c2:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    stw r3, -4(r1)
+; AIX64-NEXT:    addi r3, r1, -4
+; AIX64-NEXT:    lfiwax f0, 0, r3
+; AIX64-NEXT:    xscvsxddp f0, f0
+; AIX64-NEXT:    xsadddp f1, f1, f0
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: c2:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stw r3, -4(r1)
+; AIX32-OPT-NEXT:    addi r3, r1, -4
+; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX32-OPT-NEXT:    xscvsxddp f0, f0
+; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: c2:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stw r3, -4(r1)
+; AIX64-OPT-NEXT:    addi r3, r1, -4
+; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX64-OPT-NEXT:    xscvsxddp f0, f0
+; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX64-OPT-NEXT:    blr
+entry:
+  %conv = sitofp i32 %a1 to double
+  %add = fadd double %a2, %conv
+  ret double %add
+}
+
+define noundef double @b2(i32 noundef %x, double noundef %y, float noundef %z)  {
+; AIX32-LABEL: b2:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    mflr r0
+; AIX32-NEXT:    stwu r1, -64(r1)
+; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    bl .c2
+; AIX32-NEXT:    addi r1, r1, 64
+; AIX32-NEXT:    lwz r0, 8(r1)
+; AIX32-NEXT:    mtlr r0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: b2:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    b .c2
+; AIX64-NEXT:    #TC_RETURNd8 .c2 0
+;
+; AIX32-OPT-LABEL: b2:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    mflr r0
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    stw r0, 72(r1)
+; AIX32-OPT-NEXT:    bl .c2
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    lwz r0, 8(r1)
+; AIX32-OPT-NEXT:    mtlr r0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: b2:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    b .c2
+; AIX64-OPT-NEXT:    #TC_RETURNd8 .c2 0
+entry:
+  %call = tail call double @c2(i32 noundef %x, double noundef %y, float poison, <4 x i32> poison, <4 x i32> poison)
+  ret double %call
+}
+
+define hidden noundef double @c3(i32 noundef %a1, double noundef %a2, float %a3, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)  {
+; AIX32-LABEL: c3:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    stw r3, -4(r1)
+; AIX32-NEXT:    addi r3, r1, -4
+; AIX32-NEXT:    lfiwax f0, 0, r3
+; AIX32-NEXT:    xscvsxddp f0, f0
+; AIX32-NEXT:    xsadddp f1, f1, f0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: c3:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    stw r3, -4(r1)
+; AIX64-NEXT:    addi r3, r1, -4
+; AIX64-NEXT:    lfiwax f0, 0, r3
+; AIX64-NEXT:    xscvsxddp f0, f0
+; AIX64-NEXT:    xsadddp f1, f1, f0
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: c3:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stw r3, -4(r1)
+; AIX32-OPT-NEXT:    addi r3, r1, -4
+; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX32-OPT-NEXT:    xscvsxddp f0, f0
+; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: c3:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stw r3, -4(r1)
+; AIX64-OPT-NEXT:    addi r3, r1, -4
+; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX64-OPT-NEXT:    xscvsxddp f0, f0
+; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX64-OPT-NEXT:    blr
+entry:
+  %conv = sitofp i32 %a1 to double
+  %add = fadd double %a2, %conv
+  ret double %add
+}
+
+define noundef double @b3(i32 noundef %x, double noundef %y, float noundef %z)   {
+; AIX32-LABEL: b3:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    mflr r0
+; AIX32-NEXT:    stwu r1, -64(r1)
+; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    bl .c3
+; AIX32-NEXT:    addi r1, r1, 64
+; AIX32-NEXT:    lwz r0, 8(r1)
+; AIX32-NEXT:    mtlr r0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: b3:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    mflr r0
+; AIX64-NEXT:    stdu r1, -112(r1)
+; AIX64-NEXT:    clrldi r3, r3, 32
+; AIX64-NEXT:    std r0, 128(r1)
+; AIX64-NEXT:    bl .c3
+; AIX64-NEXT:    addi r1, r1, 112
+; AIX64-NEXT:    ld r0, 16(r1)
+; AIX64-NEXT:    mtlr r0
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: b3:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    mflr r0
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    stw r0, 72(r1)
+; AIX32-OPT-NEXT:    bl .c3
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    lwz r0, 8(r1)
+; AIX32-OPT-NEXT:    mtlr r0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: b3:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    mflr r0
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    clrldi r3, r3, 32
+; AIX64-OPT-NEXT:    std r0, 128(r1)
+; AIX64-OPT-NEXT:    bl .c3
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    ld r0, 16(r1)
+; AIX64-OPT-NEXT:    mtlr r0
+; AIX64-OPT-NEXT:    blr
+entry:
+  %call = tail call double @c3(i32 noundef %x, double noundef %y, float poison, <4 x i32> poison, <4 x i32> poison, <4 x i32> poison)
+  ret double %call
+}
+
+define hidden double @c4(ptr noundef readonly byval(%struct.ST) align 4 captures(none) %s, double %a3, <4 x i32> %v1)  {
+; AIX32-LABEL: c4:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    stw r3, 24(r1)
+; AIX32-NEXT:    stw r3, -4(r1)
+; AIX32-NEXT:    addi r3, r1, -4
+; AIX32-NEXT:    stw r5, 32(r1)
+; AIX32-NEXT:    stw r4, 28(r1)
+; AIX32-NEXT:    lfiwax f0, 0, r3
+; AIX32-NEXT:    lfd f1, 28(r1)
+; AIX32-NEXT:    xscvsxddp f0, f0
+; AIX32-NEXT:    xsadddp f1, f1, f0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: c4:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    std r3, 48(r1)
+; AIX64-NEXT:    rldicl r3, r3, 32, 32
+; AIX64-NEXT:    std r4, 56(r1)
+; AIX64-NEXT:    lfd f1, 52(r1)
+; AIX64-NEXT:    stw r3, -4(r1)
+; AIX64-NEXT:    addi r3, r1, -4
+; AIX64-NEXT:    lfiwax f0, 0, r3
+; AIX64-NEXT:    xscvsxddp f0, f0
+; AIX64-NEXT:    xsadddp f1, f1, f0
+; AIX64-NEXT:    blr
+;
+; AIX32-OPT-LABEL: c4:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    stw r3, 24(r1)
+; AIX32-OPT-NEXT:    stw r3, -4(r1)
+; AIX32-OPT-NEXT:    addi r3, r1, -4
+; AIX32-OPT-NEXT:    stw r5, 32(r1)
+; AIX32-OPT-NEXT:    stw r4, 28(r1)
+; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX32-OPT-NEXT:    lfd f1, 28(r1)
+; AIX32-OPT-NEXT:    xscvsxddp f0, f0
+; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: c4:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    std r3, 48(r1)
+; AIX64-OPT-NEXT:    rldicl r3, r3, 32, 32
+; AIX64-OPT-NEXT:    std r4, 56(r1)
+; AIX64-OPT-NEXT:    lfd f1, 52(r1)
+; AIX64-OPT-NEXT:    stw r3, -4(r1)
+; AIX64-OPT-NEXT:    addi r3, r1, -4
+; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
+; AIX64-OPT-NEXT:    xscvsxddp f0, f0
+; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
+; AIX64-OPT-NEXT:    blr
+entry:
+  %0 = load i32, ptr %s, align 4
+  %conv = sitofp i32 %0 to double
+  %b1 = getelementptr inbounds nuw i8, ptr %s, i32 4
+  %1 = load double, ptr %b1, align 4
+  %add = fadd double %1, %conv
+  ret double %add
+}
+
+define double @b4(i32 noundef %x, double noundef %y, float noundef %z)  {
+; AIX32-LABEL: b4:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    mflr r0
+; AIX32-NEXT:    stwu r1, -64(r1)
+; AIX32-NEXT:    lwz r3, L..C0(r2) # @__const.b4.s
+; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    lwz r5, 8(r3)
+; AIX32-NEXT:    lwz r4, 4(r3)
+; AIX32-NEXT:    lwz r3, 0(r3)
+; AIX32-NEXT:    bl .c4
+; AIX32-NEXT:    addi r1, r1, 64
+; AIX32-NEXT:    lwz r0, 8(r1)
+; AIX32-NEXT:    mtlr r0
+; AIX32-NEXT:    blr
+;
+; AIX64-LABEL: b4:
+; AIX64:       # %bb.0: # %entry
+; AIX64-NEXT:    ld r4, L..C0(r2) # @__const.b4.s
+; AIX64-NEXT:    ld r3, 0(r4)
+; AIX64-NEXT:    lwz r4, 8(r4)
+; AIX64-NEXT:    sldi r4, r4, 32
+; AIX64-NEXT:    b .c4
+; AIX64-NEXT:    #TC_RETURNd8 .c4 0
+;
+; AIX32-OPT-LABEL: b4:
+; AIX32-OPT:       # %bb.0: # %entry
+; AIX32-OPT-NEXT:    mflr r0
+; AIX32-OPT-NEXT:    stwu r1, -64(r1)
+; AIX32-OPT-NEXT:    lwz r3, L..C0(r2) # @__const.b4.s
+; AIX32-OPT-NEXT:    stw r0, 72(r1)
+; AIX32-OPT-NEXT:    lwz r5, 8(r3)
+; AIX32-OPT-NEXT:    lwz r4, 4(r3)
+; AIX32-OPT-NEXT:    lwz r3, 0(r3)
+; AIX32-OPT-NEXT:    bl .c4
+; AIX32-OPT-NEXT:    addi r1, r1, 64
+; AIX32-OPT-NEXT:    lwz r0, 8(r1)
+; AIX32-OPT-NEXT:    mtlr r0
+; AIX32-OPT-NEXT:    blr
+;
+; AIX64-OPT-LABEL: b4:
+; AIX64-OPT:       # %bb.0: # %entry
+; AIX64-OPT-NEXT:    stdu r1, -112(r1)
+; AIX64-OPT-NEXT:    ld r4, L..C0(r2) # @__const.b4.s
+; AIX64-OPT-NEXT:    ld r3, 0(r4)
+; AIX64-OPT-NEXT:    lwz r4, 8(r4)
+; AIX64-OPT-NEXT:    sldi r4, r4, 32
+; AIX64-OPT-NEXT:    addi r1, r1, 112
+; AIX64-OPT-NEXT:    b .c4
+; AIX64-OPT-NEXT:    #TC_RETURNd8 .c4 0
+entry:
+  %call = tail call double @c4(ptr noundef nonnull byval(%struct.ST) align 4 @__const.b4.s, double poison, <4 x i32> poison)
+  ret double %call
+}
+

>From 988d21237a9e5893e9ca6c757b9640b3d67d7110 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 31 Oct 2025 17:39:53 +0000
Subject: [PATCH 11/11] modified  test case

---
 llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll | 226 +++++++-----------
 1 file changed, 84 insertions(+), 142 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
index 1a4b9a9cfded4..d1b73b6609d42 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll
@@ -1,55 +1,40 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX32 %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX64 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names < %s | FileCheck --check-prefixes=AIX32,COMM %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names < %s | FileCheck --check-prefixes=AIX64,COMM %s
 
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -tailcallopt -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX32-OPT %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -tailcallopt -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=AIX64-OPT %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -tailcallopt -ppc-asm-full-reg-names < %s | FileCheck --check-prefixes=AIX32-OPT,COMM-OPT %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -tailcallopt -ppc-asm-full-reg-names < %s | FileCheck --check-prefixes=AIX64-OPT,COMM-OPT %s
 
 define hidden fastcc i32 @k(i32 %a, i32 %b) {
-; AIX32-LABEL: k:
-; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    add r3, r3, r4
-; AIX32-NEXT:    blr
-;
-; AIX64-LABEL: k:
-; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    add r3, r3, r4
-; AIX64-NEXT:    blr
-;
-; AIX32-OPT-LABEL: k:
-; AIX32-OPT:       # %bb.0: # %entry
-; AIX32-OPT-NEXT:    add r3, r3, r4
+; COMM-LABEL: k:
+; COMM:       # %bb.0: # %entry
+; COMM-NEXT:    add r3, r3, r4
+; COMM-NEXT:    blr
+
+; COMM-OPT-LABEL: k:
+; COMM-OPT:       # %bb.0: # %entry
+; COMM-OPT-NEXT:    add r3, r3, r4
 ; AIX32-OPT-NEXT:    addi r1, r1, 64
-; AIX32-OPT-NEXT:    blr
-;
-; AIX64-OPT-LABEL: k:
-; AIX64-OPT:       # %bb.0: # %entry
-; AIX64-OPT-NEXT:    add r3, r3, r4
 ; AIX64-OPT-NEXT:    addi r1, r1, 112
-; AIX64-OPT-NEXT:    blr
+; COMM-OPT-NEXT:    blr
+
 entry:
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define hidden fastcc i32 @ff(i32 %a) {
-; AIX32-LABEL: ff:
-; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    blr
-;
-; AIX64-LABEL: ff:
-; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    blr
+
+; COMM-LABEL: ff:
+; COMM:       # %bb.0: # %entry
+; COMM-NEXT:    blr
 ;
-; AIX32-OPT-LABEL: ff:
-; AIX32-OPT:       # %bb.0: # %entry
+; COMM-OPT-LABEL: ff:
+; COMM-OPT:       # %bb.0: # %entry
 ; AIX32-OPT-NEXT:    addi r1, r1, 64
-; AIX32-OPT-NEXT:    blr
-;
-; AIX64-OPT-LABEL: ff:
-; AIX64-OPT:       # %bb.0: # %entry
 ; AIX64-OPT-NEXT:    addi r1, r1, 112
-; AIX64-OPT-NEXT:    blr
+; COMM-OPT-NEXT:    blr
+
 entry:
   ret i32 %a
 }
@@ -215,6 +200,8 @@ entry:
 ;; The following IRs are generated from source code with commmand:
 ;; clang -O1 -target powerpc-unknown-aix -emit-llvm
 
+; // The argument size in stack is 4(int) +8(double) +4(float) + 16(vector)  = 32 bytes  in 32-bit mode.
+; // The argument size in stack is 8(int) +8(double) +8(float) + 16(vector)  = 40 bytes  in 64-bit mode. 
 ; __attribute__((noinline, visibility("hidden")))
 ; double c1(int a1, double a2, float a3 ,vector unsigned int v1) {
 ;     return a1 + a2  ;
@@ -225,6 +212,8 @@ entry:
 ;     return c1(x, y, z, v);
 ; }
 ;
+; // The argument size in stack is 4(int) +8(double) +4(float) + 16(vector) + 16(vector) = 48 bytes  in 32-bit mode.
+; // The argument size in stack is 8(int) +8(double) +8(float) + 16(vector) + 16(vector) = 56 bytes  in 64-bit mode.
 ; __attribute__((noinline, visibility("hidden")))
 ; double c2(int a1, double a2, float a3 ,vector unsigned int v1 , vector unsigned int v2 ) {
 ;     return a1 + a2  ;
@@ -235,6 +224,8 @@ entry:
 ;     return c2(x, y, z, v, v);
 ; }
 ;
+; // The argument size in stack is 4(int) +8(double) +4(float) + 16(vector) + 16(vector) + 16(vector) = 60 bytes  in 32-bit mode.
+; // The argument size in stack is 8(int) +8(double) +8(float) + 16(vector) + 16(vector) + 16(vector) = 72 bytes  in 64-bit mode.
 ; __attribute__((noinline, visibility("hidden")))
 ; double c3(int a1, double a2, float a3 ,vector unsigned int v1 , vector unsigned int v2 , vector unsigned int v3 ) {
 ;     return a1 + a2  ;
@@ -250,6 +241,9 @@ entry:
 ;   double b1;
 ; };
 ;
+
+; // The argument size in stack is 12(ST) +8(double) +4(float) + 16(vector) = 40 bytes  in 32-bit mode.
+; // The argument size in stack is 12(ST) +8(double) +8(float) + 16(vector) = 48 bytes  in 64-bit mode.
 ; __attribute__((noinline, visibility("hidden")))
 ; double c4(struct ST s, double a3 ,vector unsigned int v1 ) {
 ;    return s.a1 +s.b1;
@@ -267,41 +261,24 @@ entry:
 
 ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
 define hidden noundef double @c1(i32 noundef %a1, double noundef %a2, float %a3, <4 x i32> %v1)  {
-; AIX32-LABEL: c1:
-; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    stw r3, -4(r1)
-; AIX32-NEXT:    addi r3, r1, -4
-; AIX32-NEXT:    lfiwax f0, 0, r3
-; AIX32-NEXT:    xscvsxddp f0, f0
-; AIX32-NEXT:    xsadddp f1, f1, f0
-; AIX32-NEXT:    blr
-;
-; AIX64-LABEL: c1:
-; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    stw r3, -4(r1)
-; AIX64-NEXT:    addi r3, r1, -4
-; AIX64-NEXT:    lfiwax f0, 0, r3
-; AIX64-NEXT:    xscvsxddp f0, f0
-; AIX64-NEXT:    xsadddp f1, f1, f0
-; AIX64-NEXT:    blr
-;
-; AIX32-OPT-LABEL: c1:
-; AIX32-OPT:       # %bb.0: # %entry
-; AIX32-OPT-NEXT:    stw r3, -4(r1)
-; AIX32-OPT-NEXT:    addi r3, r1, -4
-; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
-; AIX32-OPT-NEXT:    xscvsxddp f0, f0
-; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
-; AIX32-OPT-NEXT:    blr
-;
-; AIX64-OPT-LABEL: c1:
-; AIX64-OPT:       # %bb.0: # %entry
-; AIX64-OPT-NEXT:    stw r3, -4(r1)
-; AIX64-OPT-NEXT:    addi r3, r1, -4
-; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
-; AIX64-OPT-NEXT:    xscvsxddp f0, f0
-; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
-; AIX64-OPT-NEXT:    blr
+; COMM-LABEL: c1:
+; COMM:       # %bb.0: # %entry
+; COMM-NEXT:    stw r3, -4(r1)
+; COMM-NEXT:    addi r3, r1, -4
+; COMM-NEXT:    lfiwax f0, 0, r3
+; COMM-NEXT:    xscvsxddp f0, f0
+; COMM-NEXT:    xsadddp f1, f1, f0
+; COMM-NEXT:    blr
+
+; COMM-OPT-LABEL: c1:
+; COMM-OPT:       # %bb.0: # %entry
+; COMM-OPT-NEXT:    stw r3, -4(r1)
+; COMM-OPT-NEXT:    addi r3, r1, -4
+; COMM-OPT-NEXT:    lfiwax f0, 0, r3
+; COMM-OPT-NEXT:    xscvsxddp f0, f0
+; COMM-OPT-NEXT:    xsadddp f1, f1, f0
+; COMM-OPT-NEXT:    blr
+
 entry:
   %conv = sitofp i32 %a1 to double
   %add = fadd double %a2, %conv
@@ -342,41 +319,24 @@ entry:
 
 ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
 define hidden noundef double @c2(i32 noundef %a1, double noundef %a2, float %a3, <4 x i32> %v1, <4 x i32> %v2)  {
-; AIX32-LABEL: c2:
-; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    stw r3, -4(r1)
-; AIX32-NEXT:    addi r3, r1, -4
-; AIX32-NEXT:    lfiwax f0, 0, r3
-; AIX32-NEXT:    xscvsxddp f0, f0
-; AIX32-NEXT:    xsadddp f1, f1, f0
-; AIX32-NEXT:    blr
-;
-; AIX64-LABEL: c2:
-; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    stw r3, -4(r1)
-; AIX64-NEXT:    addi r3, r1, -4
-; AIX64-NEXT:    lfiwax f0, 0, r3
-; AIX64-NEXT:    xscvsxddp f0, f0
-; AIX64-NEXT:    xsadddp f1, f1, f0
-; AIX64-NEXT:    blr
-;
-; AIX32-OPT-LABEL: c2:
-; AIX32-OPT:       # %bb.0: # %entry
-; AIX32-OPT-NEXT:    stw r3, -4(r1)
-; AIX32-OPT-NEXT:    addi r3, r1, -4
-; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
-; AIX32-OPT-NEXT:    xscvsxddp f0, f0
-; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
-; AIX32-OPT-NEXT:    blr
-;
-; AIX64-OPT-LABEL: c2:
-; AIX64-OPT:       # %bb.0: # %entry
-; AIX64-OPT-NEXT:    stw r3, -4(r1)
-; AIX64-OPT-NEXT:    addi r3, r1, -4
-; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
-; AIX64-OPT-NEXT:    xscvsxddp f0, f0
-; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
-; AIX64-OPT-NEXT:    blr
+; COMM-LABEL: c2:
+; COMM:       # %bb.0: # %entry
+; COMM-NEXT:    stw r3, -4(r1)
+; COMM-NEXT:    addi r3, r1, -4
+; COMM-NEXT:    lfiwax f0, 0, r3
+; COMM-NEXT:    xscvsxddp f0, f0
+; COMM-NEXT:    xsadddp f1, f1, f0
+; COMM-NEXT:    blr
+
+; COMM-OPT-LABEL: c2:
+; COMM-OPT:       # %bb.0: # %entry
+; COMM-OPT-NEXT:    stw r3, -4(r1)
+; COMM-OPT-NEXT:    addi r3, r1, -4
+; COMM-OPT-NEXT:    lfiwax f0, 0, r3
+; COMM-OPT-NEXT:    xscvsxddp f0, f0
+; COMM-OPT-NEXT:    xsadddp f1, f1, f0
+; COMM-OPT-NEXT:    blr
+
 entry:
   %conv = sitofp i32 %a1 to double
   %add = fadd double %a2, %conv
@@ -425,41 +385,23 @@ entry:
 }
 
 define hidden noundef double @c3(i32 noundef %a1, double noundef %a2, float %a3, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)  {
-; AIX32-LABEL: c3:
-; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    stw r3, -4(r1)
-; AIX32-NEXT:    addi r3, r1, -4
-; AIX32-NEXT:    lfiwax f0, 0, r3
-; AIX32-NEXT:    xscvsxddp f0, f0
-; AIX32-NEXT:    xsadddp f1, f1, f0
-; AIX32-NEXT:    blr
-;
-; AIX64-LABEL: c3:
-; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    stw r3, -4(r1)
-; AIX64-NEXT:    addi r3, r1, -4
-; AIX64-NEXT:    lfiwax f0, 0, r3
-; AIX64-NEXT:    xscvsxddp f0, f0
-; AIX64-NEXT:    xsadddp f1, f1, f0
-; AIX64-NEXT:    blr
-;
-; AIX32-OPT-LABEL: c3:
-; AIX32-OPT:       # %bb.0: # %entry
-; AIX32-OPT-NEXT:    stw r3, -4(r1)
-; AIX32-OPT-NEXT:    addi r3, r1, -4
-; AIX32-OPT-NEXT:    lfiwax f0, 0, r3
-; AIX32-OPT-NEXT:    xscvsxddp f0, f0
-; AIX32-OPT-NEXT:    xsadddp f1, f1, f0
-; AIX32-OPT-NEXT:    blr
-;
-; AIX64-OPT-LABEL: c3:
-; AIX64-OPT:       # %bb.0: # %entry
-; AIX64-OPT-NEXT:    stw r3, -4(r1)
-; AIX64-OPT-NEXT:    addi r3, r1, -4
-; AIX64-OPT-NEXT:    lfiwax f0, 0, r3
-; AIX64-OPT-NEXT:    xscvsxddp f0, f0
-; AIX64-OPT-NEXT:    xsadddp f1, f1, f0
-; AIX64-OPT-NEXT:    blr
+; COMM-LABEL: c3:
+; COMM:       # %bb.0: # %entry
+; COMM-NEXT:    stw r3, -4(r1)
+; COMM-NEXT:    addi r3, r1, -4
+; COMM-NEXT:    lfiwax f0, 0, r3
+; COMM-NEXT:    xscvsxddp f0, f0
+; COMM-NEXT:    xsadddp f1, f1, f0
+; COMM-NEXT:    blr
+
+; COMM-OPT-LABEL: c3:
+; COMM-OPT:       # %bb.0: # %entry
+; COMM-OPT-NEXT:    stw r3, -4(r1)
+; COMM-OPT-NEXT:    addi r3, r1, -4
+; COMM-OPT-NEXT:    lfiwax f0, 0, r3
+; COMM-OPT-NEXT:    xscvsxddp f0, f0
+; COMM-OPT-NEXT:    xsadddp f1, f1, f0
+; COMM-OPT-NEXT:    blr
 entry:
   %conv = sitofp i32 %a1 to double
   %add = fadd double %a2, %conv