[llvm] [Xtensa] Implement base CallConvention. (PR #83280)

Sat Mar 23 17:10:47 PDT 2024

================
@@ -41,18 +52,507 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
 
   setMinFunctionAlignment(Align(4));
 
+  setOperationAction(ISD::Constant, MVT::i32, Custom);
+  setOperationAction(ISD::Constant, MVT::i64, Expand);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
+
+  // No sign extend instructions for i1
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+  }
+
+  setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+
   // Compute derived properties from the register classes
   computeRegisterProperties(STI.getRegisterInfo());
 }
 
+//===----------------------------------------------------------------------===//
+// Calling conventions
+//===----------------------------------------------------------------------===//
+
+#include "XtensaGenCallingConv.inc"
+
+static bool CC_Xtensa_Custom(unsigned ValNo, MVT ValVT, MVT LocVT,
+                             CCValAssign::LocInfo LocInfo,
+                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4,
+                                      Xtensa::A5, Xtensa::A6, Xtensa::A7};
+
+  if (ArgFlags.isByVal()) {
+    Align ByValAlign = ArgFlags.getNonZeroByValAlign();
+    unsigned ByValSize = ArgFlags.getByValSize();
+    if (ByValSize < 4) {
+      ByValSize = 4;
+    }
+    if (ByValAlign < Align(4)) {
+      ByValAlign = Align(4);
+    }
+    unsigned Offset = State.AllocateStack(ByValSize, ByValAlign);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    // Mark all unused registers as allocated to avoid misuse
+    // of such registers.
+    while (State.AllocateReg(IntRegs))
+      ;
+    return false;
+  }
+
+  // Promote i8 and i16
+  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  unsigned Register;
+
+  Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
+  bool needs64BitAlign = (ValVT == MVT::i32 && OrigAlign == Align(8));
+  bool needs128BitAlign = (ValVT == MVT::i32 && OrigAlign == Align(16));
+
+  if (ValVT == MVT::i32 || ValVT == MVT::f32) {
+    Register = State.AllocateReg(IntRegs);
+    // If this is the first part of an i64 arg,
+    // the allocated register must be either A2, A4 or A6.
+    if (needs64BitAlign && (Register == Xtensa::A3 || Register == Xtensa::A5 ||
+                            Register == Xtensa::A7))
+      Register = State.AllocateReg(IntRegs);
+    // arguments with 16byte alignment must be passed in the first register or
+    // passed via stack
+    if (needs128BitAlign && (Register != Xtensa::A2))
+      while ((Register = State.AllocateReg(IntRegs)))
+        ;
+    LocVT = MVT::i32;
+  } else if (ValVT == MVT::f64) {
+    // Allocate int register and shadow next int register.
+    Register = State.AllocateReg(IntRegs);
+    if (Register == Xtensa::A3 || Register == Xtensa::A5 ||
+        Register == Xtensa::A7)
+      Register = State.AllocateReg(IntRegs);
+    State.AllocateReg(IntRegs);
+    LocVT = MVT::i32;
+  } else {
+    report_fatal_error("Cannot handle this ValVT.");
+  }
+
+  if (!Register) {
+    unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  } else {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Register, LocVT, LocInfo));
+  }
+
+  return false;
+}
+
+CCAssignFn *XtensaTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+                                                    bool IsVarArg) const {
+  return CC_Xtensa_Custom;
+}
+
+SDValue XtensaTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  if (IsVarArg)
+    report_fatal_error("Var arg not supported by FormalArguments Lowering");
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, IsVarArg));
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      const TargetRegisterClass *RC;
+
+      if (RegVT == MVT::i32)
+        RC = &Xtensa::ARRegClass;
+      else
+        report_fatal_error("RegVT not supported by FormalArguments Lowering");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Register = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode((VA.getValVT() == MVT::f32) ? ISD::BITCAST
+                                                           : ISD::TRUNCATE,
+                               DL, VA.getValVT(), ArgValue);
+      }
+
+      InVals.push_back(ArgValue);
+
+    } else {
+      assert(VA.isMemLoc());
+
+      EVT ValVT = VA.getValVT();
+
+      // The stack pointer offset is relative to the caller stack frame.
+      int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
+                                     true);
+
+      if (Ins[VA.getValNo()].Flags.isByVal()) {
+        // Assume that in this case load operation is created
+        SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+        InVals.push_back(FIN);
+      } else {
+        // Create load nodes to retrieve arguments from the stack
+        SDValue FIN =
+            DAG.getFrameIndex(FI, getFrameIndexTy(DAG.getDataLayout()));
+        InVals.push_back(DAG.getLoad(
+            ValVT, DL, Chain, FIN,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
+      }
+    }
+  }
+
+  // All stores are grouped in one node to allow the matching between
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+  }
+
+  return Chain;
+}
+
+SDValue
+XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc &DL = CLI.DL;
+  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &IsTailCall = CLI.IsTailCall;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool IsVarArg = CLI.IsVarArg;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
+
+  // TODO: Support tail call optimization.
+  IsTailCall = false;
+
+  // Analyze the operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+  CCAssignFn *CC = CCAssignFnForCall(CallConv, IsVarArg);
+
+  CCInfo.AnalyzeCallOperands(Outs, CC);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getStackSize();
+
+  unsigned StackAlignment = TFL->getStackAlignment();
+  unsigned NextStackOffset = alignTo(NumBytes, StackAlignment);
+
+  Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL);
+
+  // Copy argument values to their designated locations.
+  std::deque<std::pair<unsigned, SDValue>> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+  SDValue StackPtr;
+  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+    CCValAssign &VA = ArgLocs[I];
+    SDValue ArgValue = OutVals[I];
+    ISD::ArgFlagsTy Flags = Outs[I].Flags;
+
+    if (VA.isRegLoc())
+      // Queue up the argument copies and emit them at the end.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+    else if (Flags.isByVal()) {
+      assert(VA.isMemLoc());
+      assert(Flags.getByValSize() &&
+             "ByVal args of size 0 should have been ignored by front-end.");
+      assert(!IsTailCall &&
+             "Do not tail-call optimize if there is a byval argument.");
+
+      if (!StackPtr.getNode())
+        StackPtr = DAG.getCopyFromReg(Chain, DL, Xtensa::SP, PtrVT);
+      unsigned Offset = VA.getLocMemOffset();
+      SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+                                    DAG.getIntPtrConstant(Offset, DL));
+      SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), DL, MVT::i32);
+      SDValue Memcpy = DAG.getMemcpy(
+          Chain, DL, Address, ArgValue, SizeNode, Flags.getNonZeroByValAlign(),
+          /*isVolatile=*/false, /*AlwaysInline=*/false,
+          /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
+      MemOpChains.push_back(Memcpy);
+    } else {
+      assert(VA.isMemLoc() && "Argument not register or memory");
+
+      // Work out the address of the stack slot.  Unpromoted ints and
+      // floats are passed as right-justified 8-byte values.
+      if (!StackPtr.getNode())
+        StackPtr = DAG.getCopyFromReg(Chain, DL, Xtensa::SP, PtrVT);
+      unsigned Offset = VA.getLocMemOffset();
+      SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+                                    DAG.getIntPtrConstant(Offset, DL));
+
+      // Emit the store.
+      MemOpChains.push_back(
+          DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
+    }
+  }
+
+  // Join the stores, which are independent of one another.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+  // Build a sequence of copy-to-reg nodes, chained and glued together.
+  SDValue Glue;
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+    unsigned Reg = RegsToPass[I].first;
+    Chain = DAG.getCopyToReg(Chain, DL, Reg, RegsToPass[I].second, Glue);
+    Glue = Chain.getValue(1);
+  }
+  std::string name;
+  unsigned char TF = 0;
+
+  // Accept direct calls by converting symbolic call addresses to the
+  // associated Target* opcodes.
+  if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    name = E->getSymbol();
+    TF = E->getTargetFlags();
+    if (isPositionIndependent()) {
+      report_fatal_error("PIC relocations is not supported");
+    } else
+      Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, TF);
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    name = GV->getName().str();
+  }
+
+  if ((!name.empty()) && isLongCall(name.c_str())) {
+    // Create a constant pool entry for the callee address
+    XtensaCP::XtensaCPModifier Modifier = XtensaCP::no_modifier;
+
+    XtensaConstantPoolValue *CPV = XtensaConstantPoolSymbol::Create(
+        *DAG.getContext(), name.c_str(), 0 /* XtensaCLabelIndex */, false,
+        Modifier);
+
+    // Get the address of the callee into a register
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4), 0, TF);
+    SDValue CPWrap = getAddrPCRel(CPAddr, DAG);
+    Callee = CPWrap;
+  }
+
+  // The first call operand is the chain and the second is the target address.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+    unsigned Reg = RegsToPass[I].first;
+    Ops.push_back(DAG.getRegister(Reg, RegsToPass[I].second.getValueType()));
+  }
+
+  // Glue the call to the argument copies, if any.
+  if (Glue.getNode())
+    Ops.push_back(Glue);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(XtensaISD::CALL, DL, NodeTys, Ops);
+  Glue = Chain.getValue(1);
+
+  // Mark the end of the call, which is glued to the call itself.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true),
+                             DAG.getConstant(0, DL, PtrVT, true), Glue, DL);
+  Glue = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
+  RetCCInfo.AnalyzeCallResult(Ins, RetCC_Xtensa);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+    CCValAssign &VA = RetLocs[I];
+
+    // Copy the value out, gluing the copy to the end of the call sequence.
+    unsigned Reg = VA.getLocReg();
+    SDValue RetValue = DAG.getCopyFromReg(Chain, DL, Reg, VA.getLocVT(), Glue);
+    Chain = RetValue.getValue(1);
+    Glue = RetValue.getValue(2);
+
+    InVals.push_back(RetValue);
+  }
+  return Chain;
+}
+
+bool XtensaTargetLowering::CanLowerReturn(
+    CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_Xtensa);
+}
+
+SDValue
+XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                                  bool IsVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  const SDLoc &DL, SelectionDAG &DAG) const {
+  if (IsVarArg)
+    report_fatal_error("VarArg not supported");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Assign locations to each returned value.
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
+  RetCCInfo.AnalyzeReturn(Outs, RetCC_Xtensa);
+
+  SDValue Glue;
+  // Quick exit for void returns
+  if (RetLocs.empty())
+    return DAG.getNode(XtensaISD::RET, DL, MVT::Other, Chain);
+
+  // Copy the result values into the output registers.
+  SmallVector<SDValue, 4> RetOps;
+  RetOps.push_back(Chain);
+  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+    CCValAssign &VA = RetLocs[I];
+    SDValue RetValue = OutVals[I];
+
+    // Make the return register live on exit.
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // Chain and glue the copies together.
+    unsigned Register = VA.getLocReg();
+    Chain = DAG.getCopyToReg(Chain, DL, Register, RetValue, Glue);
+    Glue = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(Register, VA.getLocVT()));
+  }
+
+  // Update chain and glue.
+  RetOps[0] = Chain;
+  if (Glue.getNode())
+    RetOps.push_back(Glue);
+
+  return DAG.getNode(XtensaISD::RET, DL, MVT::Other, RetOps);
+}
+
+SDValue XtensaTargetLowering::LowerImmediate(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  const ConstantSDNode *CN = cast<ConstantSDNode>(Op);
+  SDLoc DL(CN);
+  APInt apval = CN->getAPIntValue();
+  int64_t value = apval.getSExtValue();
+  if (Op.getValueType() == MVT::i32) {
+    // Check if use node maybe lowered to the MOVI instruction
+    if (value > -2048 && value <= 2047)
+      return Op;
+    // Check if use node maybe lowered to the ADDMI instruction
+    SDNode &OpNode = *Op.getNode();
+    if ((OpNode.hasOneUse() && OpNode.use_begin()->getOpcode() == ISD::ADD) &&
+        (value >= -32768) && (value <= 32512) && ((value & 0xff) == 0))
----------------
s-barannikov wrote:

Consider using isShiftedInt from MathExtras.h here and in other places.

https://github.com/llvm/llvm-project/pull/83280