[llvm] c78726f - [PowerPC] Refactor FinishCall. [NFC]

Sean Fertile via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 9 09:41:12 PST 2019


Author: Sean Fertile
Date: 2019-12-09T12:40:15-05:00
New Revision: c78726fae0edf28e0556a03b7b44df3c4ec3c94e

URL: https://github.com/llvm/llvm-project/commit/c78726fae0edf28e0556a03b7b44df3c4ec3c94e
DIFF: https://github.com/llvm/llvm-project/commit/c78726fae0edf28e0556a03b7b44df3c4ec3c94e.diff

LOG: [PowerPC] Refactor FinishCall. [NFC]

Refactor FinishCall to be more easily understandable as a precursor to
implementing indirect calls for AIX. The refactor tries to group similar
code together at the cost of some code duplication. The high level
overview of the refactor:

- Adds a number of helper functions for things like:
  * Determining if a call is indirect.
  * What the Opcode for a call is.
  * Transforming the callee for a direct function call.
  * Extracting the Chain operand from a CallSeqStart node.
  * Building the operands of the call.

- Adds helpers for building the indirect call DAG nodes
  (excluding the call instruction itself which is created in
  `FinishCall`).

- Removes PrepareCall, which has been subsumed by the
  helpers.

- Rename 'InFlag' to 'Glue'.

- FinishCall has been refactored to:
  1) Set TOC pointer usage on the DAG for the TOC based
     subtargets.
  2) Calculate if a call is indirect.
  3) Determine the Opcode to use for the call
     instruction.
  4) Transform the Callee for direct calls, or build
     the DAG nodes for indirect calls.
  5) Buildup the call operands.
  6) Emit the call instruction.
  7) If needed, emit the callSeqEnd Node and
     finish lowering by calling `LowerCallResult`

Differential Revision: https://reviews.llvm.org/D70126

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCSubtarget.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a4f662dfdddb..057ecfb3bad1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4959,213 +4959,6 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
   return false;
 }
 
-static unsigned
-PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
-            SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
-            bool isPatchPoint, bool hasNest,
-            SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
-            SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
-            ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
-  bool isPPC64 = Subtarget.isPPC64();
-  bool isSVR4ABI = Subtarget.isSVR4ABI();
-  bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
-  bool isAIXABI = Subtarget.isAIXABI();
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
-
-  unsigned CallOpc = PPCISD::CALL;
-
-  bool needIndirectCall = true;
-  if (!isSVR4ABI || !isPPC64)
-    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
-      // If this is an absolute destination address, use the munged value.
-      Callee = SDValue(Dest, 0);
-      needIndirectCall = false;
-    }
-
-  // PC-relative references to external symbols should go through $stub, unless
-  // we're building with the leopard linker or later, which automatically
-  // synthesizes these stubs.
-  const TargetMachine &TM = DAG.getTarget();
-  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
-  const GlobalValue *GV = nullptr;
-  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    GV = G->getGlobal();
-  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
-  bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
-  // every direct call is) turn it into a TargetGlobalAddress /
-  // TargetExternalSymbol node so that legalize doesn't hack it.
-  if (isFunctionGlobalAddress(Callee)) {
-    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
-
-    // A call to a TLS address is actually an indirect call to a
-    // thread-specific pointer.
-    unsigned OpFlags = 0;
-    if (UsePlt)
-      OpFlags = PPCII::MO_PLT;
-
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                        Callee.getValueType(), 0, OpFlags);
-    needIndirectCall = false;
-  }
-
-  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    unsigned char OpFlags = 0;
-
-    if (UsePlt)
-      OpFlags = PPCII::MO_PLT;
-
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
-                                         OpFlags);
-    needIndirectCall = false;
-  }
-
-  if (isPatchPoint) {
-    // We'll form an invalid direct call when lowering a patchpoint; the full
-    // sequence for an indirect call is complicated, and many of the
-    // instructions introduced might have side effects (and, thus, can't be
-    // removed later). The call itself will be removed as soon as the
-    // argument/return lowering is complete, so the fact that it has the wrong
-    // kind of operands should not really matter.
-    needIndirectCall = false;
-  }
-
-  if (needIndirectCall) {
-    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
-    // to do the call, we can't use PPCISD::CALL.
-    SDValue MTCTROps[] = {Chain, Callee, InFlag};
-
-    if (is64BitELFv1ABI) {
-      // Function pointers in the 64-bit SVR4 ABI do not point to the function
-      // entry point, but to the function descriptor (the function entry point
-      // address is part of the function descriptor though).
-      // The function descriptor is a three doubleword structure with the
-      // following fields: function entry point, TOC base address and
-      // environment pointer.
-      // Thus for a call through a function pointer, the following actions need
-      // to be performed:
-      //   1. Save the TOC of the caller in the TOC save area of its stack
-      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
-      //   2. Load the address of the function entry point from the function
-      //      descriptor.
-      //   3. Load the TOC of the callee from the function descriptor into r2.
-      //   4. Load the environment pointer from the function descriptor into
-      //      r11.
-      //   5. Branch to the function entry point address.
-      //   6. On return of the callee, the TOC of the caller needs to be
-      //      restored (this is done in FinishCall()).
-      //
-      // The loads are scheduled at the beginning of the call sequence, and the
-      // register copies are flagged together to ensure that no other
-      // operations can be scheduled in between. E.g. without flagging the
-      // copies together, a TOC access in the caller could be scheduled between
-      // the assignment of the callee TOC and the branch to the callee, which
-      // results in the TOC access going through the TOC of the callee instead
-      // of going through the TOC of the caller, which leads to incorrect code.
-
-      // Load the address of the function entry point from the function
-      // descriptor.
-      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
-      if (LDChain.getValueType() == MVT::Glue)
-        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
-
-      auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
-                          ? (MachineMemOperand::MODereferenceable |
-                             MachineMemOperand::MOInvariant)
-                          : MachineMemOperand::MONone;
-
-      MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
-      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
-                                        /* Alignment = */ 8, MMOFlags);
-
-      // Load environment pointer into r11.
-      SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
-      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
-      SDValue LoadEnvPtr =
-          DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
-                      /* Alignment = */ 8, MMOFlags);
-
-      SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
-      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
-      SDValue TOCPtr =
-          DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
-                      /* Alignment = */ 8, MMOFlags);
-
-      setUsesTOCBasePtr(DAG);
-      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
-                                        InFlag);
-      Chain = TOCVal.getValue(0);
-      InFlag = TOCVal.getValue(1);
-
-      // If the function call has an explicit 'nest' parameter, it takes the
-      // place of the environment pointer.
-      if (!hasNest) {
-        SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
-                                          InFlag);
-
-        Chain = EnvVal.getValue(0);
-        InFlag = EnvVal.getValue(1);
-      }
-
-      MTCTROps[0] = Chain;
-      MTCTROps[1] = LoadFuncPtr;
-      MTCTROps[2] = InFlag;
-    }
-
-    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
-                        makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
-    InFlag = Chain.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Other);
-    NodeTys.push_back(MVT::Glue);
-    Ops.push_back(Chain);
-    CallOpc = PPCISD::BCTRL;
-    Callee.setNode(nullptr);
-    // Add use of X11 (holding environment pointer)
-    if (is64BitELFv1ABI && !hasNest)
-      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
-    // Add CTR register as callee so a bctr can be emitted later.
-    if (isTailCall)
-      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
-  }
-
-  // If this is a direct call, pass the chain and the callee.
-  if (Callee.getNode()) {
-    Ops.push_back(Chain);
-    Ops.push_back(Callee);
-  }
-  // If this is a tail call add stack pointer delta.
-  if (isTailCall)
-    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
-
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
-  // live into the call.
-  // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
-  if ((isSVR4ABI && isPPC64) || isAIXABI) {
-    setUsesTOCBasePtr(DAG);
-
-    // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
-    // no way to mark dependencies as implicit here.
-    // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
-    if (!isPatchPoint)
-      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
-                                            : PPC::R2, PtrVT));
-  }
-
-  return CallOpc;
-}
-
 SDValue PPCTargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -5230,108 +5023,97 @@ SDValue PPCTargetLowering::LowerCallResult(
   return Chain;
 }
 
-SDValue PPCTargetLowering::FinishCall(
-    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
-    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
-    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
-    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
-    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
-    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
-  std::vector<EVT> NodeTys;
-  SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
-                                 SPDiff, isTailCall, isPatchPoint, hasNest,
-                                 RegsToPass, Ops, NodeTys, CS, Subtarget);
-
-  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
-  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
-    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
-
-  // When performing tail call optimization the callee pops its arguments off
-  // the stack. Account for this here so these bytes can be pushed back on in
-  // PPCFrameLowering::eliminateCallFramePseudoInstr.
-  int BytesCalleePops =
-    (CallConv == CallingConv::Fast &&
-     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
+                           const PPCSubtarget &Subtarget, bool isPatchPoint) {
+  // PatchPoint calls are not indirect.
+  if (isPatchPoint)
+    return false;
 
-  // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
-  const uint32_t *Mask =
-      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
-  assert(Mask && "Missing call preserved mask for calling convention");
-  Ops.push_back(DAG.getRegisterMask(Mask));
+  if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
+    return false;
 
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
+  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
+  // becuase the immediate function pointer points to a descriptor instead of
+  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
+  // pointer immediate points to the global entry point, while the BLA would
+  // need to jump to the local entry point (see rL211174).
+  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
+      isBLACompatibleAddress(Callee, DAG))
+    return false;
 
-  // Emit tail call.
-  if (isTailCall) {
-    assert(((Callee.getOpcode() == ISD::Register &&
-             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
-            Callee.getOpcode() == ISD::TargetExternalSymbol ||
-            Callee.getOpcode() == ISD::TargetGlobalAddress ||
-            isa<ConstantSDNode>(Callee)) &&
-    "Expecting an global address, external symbol, absolute value or register");
+  return true;
+}
 
-    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
-    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
-  }
+static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
+                              bool isTailCall, const Function &Caller,
+                              const SDValue &Callee,
+                              const PPCSubtarget &Subtarget,
+                              const TargetMachine &TM) {
+  if (isTailCall)
+    return PPCISD::TC_RETURN;
+
+  // This is a call through a function pointer.
+  if (isIndirectCall) {
+    // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
+    // indirect calls. The save of the caller's TOC pointer to the stack will be
+    // inserted into the DAG as part of call lowering. The restore of the TOC
+    // pointer is modeled by using a pseudo instruction for the call opcode that
+    // represents the 2 instruction sequence of an indirect branch and link,
+    // immediately followed by a load of the TOC pointer from the the stack save
+    // slot into gpr2.
+    if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+      return PPCISD::BCTRL_LOAD_TOC;
+
+    // An indirect call that does not need a TOC restore.
+    return PPCISD::BCTRL;
+  }
+
+  // The ABIs that maintain a TOC pointer accross calls need to have a nop
+  // immediately following the call instruction if the caller and callee may
+  // have 
diff erent TOC bases. At link time if the linker determines the calls
+  // may not share a TOC base, the call is redirected to a trampoline inserted
+  // by the linker. The trampoline will (among other things) save the callers
+  // TOC pointer at an ABI designated offset in the linkage area and the linker
+  // will rewrite the nop to be a load of the TOC pointer from the linkage area
+  // into gpr2.
+  if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+    return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+                                                  : PPCISD::CALL_NOP;
+
+  return PPCISD::CALL;
+}
+static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
+                               const SDLoc &dl, const PPCSubtarget &Subtarget) {
+  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
+    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+      return SDValue(Dest, 0);
+
+  // Returns true if the callee is local, and false otherwise.
+  auto isLocalCallee = [&]() {
+    const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+    const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+
+    return DAG.getTarget().shouldAssumeDSOLocal(*Mod,
+                                                G ? G->getGlobal() : nullptr);
+  };
 
-  // Add a NOP immediately after the branch instruction when using the 64-bit
-  // SVR4 or the AIX ABI.
-  // At link time, if caller and callee are in a 
diff erent module and
-  // thus have a 
diff erent TOC, the call will be replaced with a call to a stub
-  // function which saves the current TOC, loads the TOC of the callee and
-  // branches to the callee. The NOP will be replaced with a load instruction
-  // which restores the TOC of the caller from the TOC save slot of the current
-  // stack frame. If caller and callee belong to the same module (and have the
-  // same TOC), the NOP will remain unchanged, or become some other NOP.
+  bool UsePlt = Subtarget.is32BitELFABI() && !isLocalCallee();
 
-  MachineFunction &MF = DAG.getMachineFunction();
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  if (!isTailCall && !isPatchPoint &&
-      ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
-       Subtarget.isAIXABI())) {
-    if (CallOpc == PPCISD::BCTRL) {
-      if (Subtarget.isAIXABI())
-        report_fatal_error("Indirect call on AIX is not implemented.");
-
-      // This is a call through a function pointer.
-      // Restore the caller TOC from the save area into R2.
-      // See PrepareCall() for more information about calls through function
-      // pointers in the 64-bit SVR4 ABI.
-      // We are using a target-specific load with r2 hard coded, because the
-      // result of a target-independent load would never go directly into r2,
-      // since r2 is a reserved register (which prevents the register allocator
-      // from allocating it), resulting in an additional register being
-      // allocated and an unnecessary move instruction being generated.
-      CallOpc = PPCISD::BCTRL_LOAD_TOC;
-
-      SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
-      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
-      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
-      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
-
-      // The address needs to go after the chain input but before the flag (or
-      // any other variadic arguments).
-      Ops.insert(std::next(Ops.begin()), AddTOC);
-    } else if (CallOpc == PPCISD::CALL &&
-      !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
-      // Otherwise insert NOP for non-local calls.
-      CallOpc = PPCISD::CALL_NOP;
-    }
-  }
+  if (isFunctionGlobalAddress(Callee)) {
+    const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+    if (!Subtarget.isAIXABI())
+      return DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                        Callee.getValueType(), 0,
+                                        UsePlt ? PPCII::MO_PLT : 0);
 
-  if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
     // On AIX, direct function calls reference the symbol for the function's
-    // entry point, which is named by inserting a "." before the function's
+    // entry point, which is named by prepending a "." before the function's
     // C-linkage name.
-    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
     auto &Context = DAG.getMachineFunction().getMMI().getContext();
 
     const GlobalObject *GO = cast<GlobalObject>(G->getGlobal());
-    MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(Context.getOrCreateSymbol(
-        Twine(".") + Twine(GO->getName())));
+    MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
+        Context.getOrCreateSymbol(Twine(".") + Twine(GO->getName())));
 
     if (GO && GO->isDeclaration() && !S->hasContainingCsect()) {
       // On AIX, an undefined symbol needs to be associated with a
@@ -5345,22 +5127,265 @@ SDValue PPCTargetLowering::FinishCall(
       S->setContainingCsect(Sec);
     }
 
-    Callee = DAG.getMCSymbol(S, PtrVT);
-    // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
-    Ops[1] = Callee;
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+    return DAG.getMCSymbol(S, PtrVT);
+  }
+
+  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    return DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+                                       UsePlt ? PPCII::MO_PLT : 0);
+
+  // No transformation needed.
+  assert(Callee.getNode() && "What no callee?");
+  return Callee;
+}
+
+static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
+  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
+         "Expected a CALLSEQ_STARTSDNode.");
+
+  // The last operand is the chain, except when the node has glue. If the node
+  // has glue, then the last operand is the glue, and the chain is the second
+  // last operand.
+  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
+  if (LastValue.getValueType() != MVT::Glue)
+    return LastValue;
+
+  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
+}
+
+// Creates the node that moves a functions address into the count register
+// to prepare for an indirect call instruction.
+static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+                                SDValue &Glue, SDValue &Chain,
+                                const SDLoc &dl) {
+  SDValue MTCTROps[] = {Chain, Callee, Glue};
+  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
+  Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
+                      makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
+  // The glue is the second value produced.
+  Glue = Chain.getValue(1);
+}
+
+static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+                                          SDValue &Glue, SDValue &Chain,
+                                          SDValue CallSeqStart,
+                                          ImmutableCallSite CS, const SDLoc &dl,
+                                          bool hasNest,
+                                          const PPCSubtarget &Subtarget) {
+  // Function pointers in the 64-bit SVR4 ABI do not point to the function
+  // entry point, but to the function descriptor (the function entry point
+  // address is part of the function descriptor though).
+  // The function descriptor is a three doubleword structure with the
+  // following fields: function entry point, TOC base address and
+  // environment pointer.
+  // Thus for a call through a function pointer, the following actions need
+  // to be performed:
+  //   1. Save the TOC of the caller in the TOC save area of its stack
+  //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+  //   2. Load the address of the function entry point from the function
+  //      descriptor.
+  //   3. Load the TOC of the callee from the function descriptor into r2.
+  //   4. Load the environment pointer from the function descriptor into
+  //      r11.
+  //   5. Branch to the function entry point address.
+  //   6. On return of the callee, the TOC of the caller needs to be
+  //      restored (this is done in FinishCall()).
+  //
+  // The loads are scheduled at the beginning of the call sequence, and the
+  // register copies are flagged together to ensure that no other
+  // operations can be scheduled in between. E.g. without flagging the
+  // copies together, a TOC access in the caller could be scheduled between
+  // the assignment of the callee TOC and the branch to the callee, which leads
+  // to incorrect code.
+
+  // Start by loading the function address from the descriptor.
+  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
+  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
+                      ? (MachineMemOperand::MODereferenceable |
+                         MachineMemOperand::MOInvariant)
+                      : MachineMemOperand::MONone;
+
+  MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+
+  // One load for the functions entry point address.
+  SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
+                                    /* Alignment = */ 8, MMOFlags);
+
+  // One for loading the TOC anchor for the module that contains the called
+  // function.
+  SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
+  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
+  SDValue TOCPtr =
+      DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
+                  /* Alignment = */ 8, MMOFlags);
+
+  // One for loading the environment pointer.
+  SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
+  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+  SDValue LoadEnvPtr =
+      DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
+                  /* Alignment = */ 8, MMOFlags);
+
+  // Then copy the newly loaded TOC anchor to the TOC pointer.
+  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, Glue);
+  Chain = TOCVal.getValue(0);
+  Glue = TOCVal.getValue(1);
+
+  // If the function call has an explicit 'nest' parameter, it takes the
+  // place of the environment pointer.
+  if (!hasNest) {
+    SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, Glue);
+    Chain = EnvVal.getValue(0);
+    Glue = EnvVal.getValue(1);
+  }
+
+  // The rest of the indirect call sequence is the same as the non-descriptor
+  // DAG.
+  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
+}
+
+static void
+buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
+                  const SDLoc &dl, bool isTailCall, bool isVarArg,
+                  bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+                  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
+                  const PPCSubtarget &Subtarget, bool isIndirect) {
+  const bool IsPPC64 = Subtarget.isPPC64();
+  // MVT for a general purpose register.
+  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+  // First operand is always the chain.
+  Ops.push_back(Chain);
+
+  // If it's a direct call pass the callee as the second operand.
+  if (!isIndirect)
+    Ops.push_back(Callee);
+  else {
+    assert(!isPatchPoint && "Patch point call are not indirect.");
+    if (Subtarget.isAIXABI())
+      report_fatal_error("Indirect call on AIX is not implemented.");
+
+    // For 64-bit ELF we have saved the TOC pointer to the linkage area on the
+    // stack (this would have  been done in `LowerCall_64SVR4`). The call
+    // instruction is a pseudo instruction that represents both the indirect
+    // branch and a load that restores the TOC pointer from the linkage area.
+    // The operand for the TOC restore is an add of the TOC save offset to the
+    // stack pointer. This must be the second operand: after the chain input but
+    // before any other variadic arguments.
+    if (Subtarget.is64BitELFABI()) {
+      SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+      Ops.push_back(AddTOC);
+    }
+
+    // Add the register used for the environment pointer.
+    if (Subtarget.usesFunctionDescriptors() && !hasNest)
+      Ops.push_back(DAG.getRegister(PPC::X11, MVT::i64));
+
+    // Add CTR register as callee so a bctr can be emitted later.
+    if (isTailCall)
+      Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
   }
 
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
-  InFlag = Chain.getValue(1);
+  // If this is a tail call add stack pointer delta.
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+  // no way to mark dependencies as implicit here.
+  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+    Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::X2 : PPC::R2, RegVT));
+
+  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+  if (isVarArg && Subtarget.is32BitELFABI())
+    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // If the glue is valid, it is the last operand.
+  if (Glue.getNode())
+    Ops.push_back(Glue);
+}
+
+SDValue PPCTargetLowering::FinishCall(
+    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
+    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
+    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
+    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
+    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+
+  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+    setUsesTOCBasePtr(DAG);
+
+  const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
+  unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
+                                   DAG.getMachineFunction().getFunction(),
+                                   Callee, Subtarget, DAG.getTarget());
+
+  if (!isIndirect)
+    Callee = transformCallee(Callee, DAG, dl, Subtarget);
+  else if (Subtarget.usesFunctionDescriptors())
+    prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
+                                  dl, hasNest, Subtarget);
+  else
+    prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
+
+  // Build the operand list for the call instruction.
+  SmallVector<SDValue, 8> Ops;
+  buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+                    hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
+                    Subtarget, isIndirect);
+
+  // Emit tail call.
+  if (isTailCall) {
+    assert(((Callee.getOpcode() == ISD::Register &&
+             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+            Callee.getOpcode() == ISD::TargetExternalSymbol ||
+            Callee.getOpcode() == ISD::TargetGlobalAddress ||
+            isa<ConstantSDNode>(Callee)) &&
+           "Expecting a global address, external symbol, absolute value or "
+           "register");
+    assert(CallOpc == PPCISD::TC_RETURN &&
+           "Unexpected call opcode for a tail call.");
+    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
+    return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
+  }
+
+  std::array<EVT, 2> ReturnTypes = {MVT::Other, MVT::Glue};
+  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+  Glue = Chain.getValue(1);
+
+  // When performing tail call optimization the callee pops its arguments off
+  // the stack. Account for this here so these bytes can be pushed back on in
+  // PPCFrameLowering::eliminateCallFramePseudoInstr.
+  int BytesCalleePops = (CallConv == CallingConv::Fast &&
+                         getTargetMachine().Options.GuaranteedTailCallOpt)
+                            ? NumBytes
+                            : 0;
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
-                             InFlag, dl);
-  if (!Ins.empty())
-    InFlag = Chain.getValue(1);
+                             Glue, dl);
+  Glue = Chain.getValue(1);
 
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
-                         Ins, dl, DAG, InVals);
+  return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
 }
 
 SDValue
@@ -6313,8 +6338,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
   // Check if this is an indirect call (MTCTR/BCTRL).
-  // See PrepareCall() for more information about calls through function
-  // pointers in the 64-bit SVR4 ABI.
+  // See prepareDescriptorIndirectCall and buildCallOperands for more
+  // information about calls through function pointers in the 64-bit SVR4 ABI.
   if (!isTailCall && !isPatchPoint &&
       !isFunctionGlobalAddress(Callee) &&
       !isa<ExternalSymbolSDNode>(Callee)) {

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 7266d82a08b5..fdda92f0b34d 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -351,6 +351,13 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   /// True if the GV will be accessed via an indirect symbol.
   bool isGVIndirectSymbol(const GlobalValue *GV) const;
 
+  /// True if the ABI is descriptor based.
+  bool usesFunctionDescriptors() const {
+    // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit
+    // v1 ABI uses descriptors.
+    return isAIXABI() || (is64BitELFABI() && !isELFv2ABI());
+  }
+
   bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
 };
 } // End llvm namespace


        


More information about the llvm-commits mailing list