[llvm] Xtensa] Implement windowed call ABI. (PR #130001)

Andrei Safronov via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 5 23:35:04 PST 2025


https://github.com/andreisfr created https://github.com/llvm/llvm-project/pull/130001

Implement base windowed call ABI. By defaullt use
rotation window by 8 registers.

>From 43bb5205c1872fcec4e301e315f8f3fb3022bcb3 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Thu, 6 Mar 2025 10:27:25 +0300
Subject: [PATCH] Xtensa] Implement windowed call ABI.

Implement base windowed call ABI. By defaullt use
rotation window by 8 registers.
---
 llvm/lib/Target/Xtensa/XtensaCallingConv.td   |  20 +++-
 .../lib/Target/Xtensa/XtensaFrameLowering.cpp | 112 +++++++++++++++++-
 llvm/lib/Target/Xtensa/XtensaFrameLowering.h  |   1 +
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp |  47 +++++++-
 llvm/lib/Target/Xtensa/XtensaISelLowering.h   |   5 +
 llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp    |   4 +
 llvm/lib/Target/Xtensa/XtensaInstrInfo.td     |  19 ++-
 .../Target/Xtensa/XtensaMachineFunctionInfo.h |   4 +
 llvm/lib/Target/Xtensa/XtensaOperators.td     |  11 ++
 llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp |  13 +-
 llvm/lib/Target/Xtensa/XtensaSubtarget.h      |  12 +-
 llvm/test/CodeGen/Xtensa/aligned_alloc.ll     |  32 +++++
 .../CodeGen/Xtensa/calling-conv-windowed.ll   | 103 ++++++++++++++++
 llvm/test/CodeGen/Xtensa/callw.ll             |  52 ++++++++
 14 files changed, 413 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/CodeGen/Xtensa/aligned_alloc.ll
 create mode 100644 llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll
 create mode 100644 llvm/test/CodeGen/Xtensa/callw.ll

diff --git a/llvm/lib/Target/Xtensa/XtensaCallingConv.td b/llvm/lib/Target/Xtensa/XtensaCallingConv.td
index a348b4c890b22..092c50947cee8 100644
--- a/llvm/lib/Target/Xtensa/XtensaCallingConv.td
+++ b/llvm/lib/Target/Xtensa/XtensaCallingConv.td
@@ -9,16 +9,30 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Xtensa return value calling convention
+// Xtensa base calling convention
 //===----------------------------------------------------------------------===//
+// Xtensa return value
 def RetCC_Xtensa : CallingConv<[
   // First two return values go in a2, a3, a4, a5
   CCIfType<[i32], CCAssignToReg<[A2, A3, A4, A5]>>,
   CCIfType<[i64], CCAssignToRegWithShadow<[A2, A4], [A3, A5]>>
 ]>;
 
+// Callee-saved register lists
+def CSR_Xtensa : CalleeSavedRegs<(add A0, A12, A13, A14, A15)>;
+
 //===----------------------------------------------------------------------===//
-// Callee-saved register lists.
+// Xtensa windowed calling convention. Currently by default implemented
+// rotation window by 8 registers.
 //===----------------------------------------------------------------------===//
+// Xtensa return value for 8 registers window
+def RetCCW8_Xtensa : CallingConv<[
+  //First two return values go in a10, a11, a12, a13
+  CCIfType<[i32], CCAssignToReg<[A10, A11, A12, A13]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[A10, A12], [A11, A13]>>
+]>;
 
-def CSR_Xtensa : CalleeSavedRegs<(add A0, A12, A13, A14, A15)>;
+// Callee-saved register lists for rotation window by 8 registers
+def CSRW8_Xtensa : CalleeSavedRegs<(add)> {
+  let OtherPreserved = (add A0, SP, A2, A3, A4, A5, A6, A7);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
index d09aac613f623..338a2bc958498 100644
--- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
@@ -23,9 +23,15 @@
 
 using namespace llvm;
 
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+   and the total number of words must be a multiple of 128 bits.  */
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
 XtensaFrameLowering::XtensaFrameLowering(const XtensaSubtarget &STI)
     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0,
-                          Align(4)),
+                          Align(4)), STI(STI),
       TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {}
 
 bool XtensaFrameLowering::hasFPImpl(const MachineFunction &MF) const {
@@ -43,6 +49,7 @@ void XtensaFrameLowering::emitPrologue(MachineFunction &MF,
   MCRegister SP = Xtensa::SP;
   MCRegister FP = TRI->getFrameRegister(MF);
   const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
+  XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
 
   // First, compute final stack size.
   uint64_t StackSize = MFI.getStackSize();
@@ -51,6 +58,83 @@ void XtensaFrameLowering::emitPrologue(MachineFunction &MF,
   // Round up StackSize to 16*N
   StackSize += (16 - StackSize) & 0xf;
 
+  if (STI.isWinABI()) {
+    StackSize += 32;
+    uint64_t MaxAlignment = MFI.getMaxAlign().value();
+    if(MaxAlignment > 32)
+      StackSize += MaxAlignment;
+
+    if (StackSize <= 32760) {
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::ENTRY))
+          .addReg(SP)
+          .addImm(StackSize);
+    } else {
+      /* Use a8 as a temporary since a0-a7 may be live.  */
+      unsigned TmpReg = Xtensa::A8;
+
+      const XtensaInstrInfo &TII = *static_cast<const XtensaInstrInfo *>(
+          MBB.getParent()->getSubtarget().getInstrInfo());
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::ENTRY))
+          .addReg(SP)
+          .addImm(MIN_FRAME_SIZE);
+      TII.loadImmediate(MBB, MBBI, &TmpReg, StackSize - MIN_FRAME_SIZE);
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::SUB), TmpReg)
+          .addReg(SP)
+          .addReg(TmpReg);
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::MOVSP), SP).addReg(TmpReg);
+    }
+
+    // Calculate how much is needed to have the correct alignment.
+    // Change offset to: alignment + difference.
+    // For example, in case of alignment of 128:
+    // diff_to_128_aligned_address = (128 - (SP & 127))
+    // new_offset = SP + diff_to_128_aligned_address
+    // This is safe to do because we increased the stack size by MaxAlignment.
+    unsigned Reg, RegMisAlign;
+    if (MaxAlignment > 32){
+      TII.loadImmediate(MBB, MBBI, &RegMisAlign, MaxAlignment - 1);
+      TII.loadImmediate(MBB, MBBI, &Reg, MaxAlignment);
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::AND))
+          .addReg(RegMisAlign, RegState::Define)
+          .addReg(FP)
+          .addReg(RegMisAlign);
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::SUB), RegMisAlign)
+          .addReg(Reg)
+          .addReg(RegMisAlign);
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::ADD), SP)
+          .addReg(SP)
+          .addReg(RegMisAlign, RegState::Kill);
+    }
+
+    // Store FP register in A8, because FP may be used to pass function
+    // arguments
+    if (XtensaFI->isSaveFrameRegister()) {
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::OR), Xtensa::A8)
+          .addReg(FP)
+          .addReg(FP);
+    }
+
+    // if framepointer enabled, set it to point to the stack pointer.
+    if (hasFP(MF)) {
+      // Insert instruction "move $fp, $sp" at this location.
+      BuildMI(MBB, MBBI, DL, TII.get(Xtensa::OR), FP)
+          .addReg(SP)
+          .addReg(SP)
+          .setMIFlag(MachineInstr::FrameSetup);
+
+      MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
+          nullptr, MRI->getDwarfRegNum(FP, true), StackSize);
+      unsigned CFIIndex = MF.addFrameInst(Inst);
+      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    } else {
+      // emit ".cfi_def_cfa_offset StackSize"
+      unsigned CFIIndex = MF.addFrameInst(
+          MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
+      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    }
+  } else {
   // No need to allocate space on the stack.
   if (StackSize == 0 && !MFI.adjustsStack())
     return;
@@ -122,6 +206,7 @@ void XtensaFrameLowering::emitPrologue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex);
   }
+  }
 
   if (StackSize != PrevStackSize) {
     MFI.setStackSize(StackSize);
@@ -179,10 +264,22 @@ void XtensaFrameLowering::emitEpilogue(MachineFunction &MF,
              "Unexpected callee-saved register restore instruction");
 #endif
     }
-
-    BuildMI(MBB, I, DL, TII.get(Xtensa::OR), SP).addReg(FP).addReg(FP);
+    if (STI.isWinABI()) {
+      // In most architectures, we need to explicitly restore the stack pointer
+      // before returning.
+      //
+      // For Xtensa Windowed Register option, it is not needed to explicitly
+      // restore the stack pointer. Reason being is that on function return,
+      // the window of the caller (including the old stack pointer) gets
+      // restored anyways.
+    } else {
+      BuildMI(MBB, I, DL, TII.get(Xtensa::OR), SP).addReg(FP).addReg(FP);
+    }
   }
 
+  if (STI.isWinABI())
+    return;
+
   // Get the number of bytes from FrameInfo
   uint64_t StackSize = MFI.getStackSize();
 
@@ -199,6 +296,9 @@ bool XtensaFrameLowering::spillCalleeSavedRegisters(
   MachineFunction *MF = MBB.getParent();
   MachineBasicBlock &EntryBlock = *(MF->begin());
 
+  if (STI.isWinABI())
+    return true;
+
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     // Add the callee-saved register as live-in. Do not add if the register is
     // A0 and return address is taken, because it will be implemented in
@@ -224,6 +324,8 @@ bool XtensaFrameLowering::spillCalleeSavedRegisters(
 bool XtensaFrameLowering::restoreCalleeSavedRegisters(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+  if (STI.isWinABI())
+    return true;
   return TargetFrameLowering::restoreCalleeSavedRegisters(MBB, MI, CSI, TRI);
 }
 
@@ -251,6 +353,10 @@ void XtensaFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                RegScavenger *RS) const {
   unsigned FP = TRI->getFrameRegister(MF);
 
+  if (STI.isWinABI()) {
+    return;
+  }
+
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
   // Mark $fp as used if function has dedicated frame pointer.
diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.h b/llvm/lib/Target/Xtensa/XtensaFrameLowering.h
index 3f946e1ea730f..f0095b8774154 100644
--- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.h
@@ -18,6 +18,7 @@ class XtensaInstrInfo;
 class XtensaRegisterInfo;
 
 class XtensaFrameLowering : public TargetFrameLowering {
+  const XtensaSubtarget &STI;
   const XtensaInstrInfo &TII;
   const XtensaRegisterInfo *TRI;
 
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 57f0cbbc36c24..75e8dad388d1e 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -42,6 +42,15 @@ static bool isLongCall(const char *str) {
   return true;
 }
 
+// The calling conventions in XtensaCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+  if (Reg >= Xtensa::A2 && Reg <= Xtensa::A7)
+    return Reg - Xtensa::A2 + Xtensa::A10;
+  return Reg;
+}
+
 XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
                                            const XtensaSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
@@ -339,7 +348,18 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      Register Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
+      Register Reg = 0;
+      unsigned FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
+
+      // Argument passed in FrameReg in WinABI we save in A8 (in emitPrologue),
+      // so load argument from A8
+      if (Subtarget.isWinABI() && (VA.getLocReg() == FrameReg)) {
+        Reg = MF.addLiveIn(Xtensa::A8, &Xtensa::ARRegClass);
+        XtensaFI->setSaveFrameRegister();
+      } else {
+        Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
+      }
+
       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
@@ -538,6 +558,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
   SDValue Glue;
   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
     unsigned Reg = RegsToPass[I].first;
+    if (Subtarget.isWinABI())
+      Reg = toCallerWindow(Reg);
     Chain = DAG.getCopyToReg(Chain, DL, Reg, RegsToPass[I].second, Glue);
     Glue = Chain.getValue(1);
   }
@@ -587,6 +609,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
   // known live into the call.
   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
     unsigned Reg = RegsToPass[I].first;
+    if (Subtarget.isWinABI())
+      Reg = toCallerWindow(Reg);
     Ops.push_back(DAG.getRegister(Reg, RegsToPass[I].second.getValueType()));
   }
 
@@ -595,7 +619,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
     Ops.push_back(Glue);
 
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  Chain = DAG.getNode(XtensaISD::CALL, DL, NodeTys, Ops);
+  Chain = DAG.getNode(Subtarget.isWinABI() ? XtensaISD::CALLW8 : XtensaISD::CALL,
+                      DL, NodeTys, Ops);
   Glue = Chain.getValue(1);
 
   // Mark the end of the call, which is glued to the call itself.
@@ -606,7 +631,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RetLocs;
   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
-  RetCCInfo.AnalyzeCallResult(Ins, RetCC_Xtensa);
+  RetCCInfo.AnalyzeCallResult(Ins, Subtarget.isWinABI() ? RetCCW8_Xtensa
+                                                        : RetCC_Xtensa);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
@@ -648,7 +674,8 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   SDValue Glue;
   // Quick exit for void returns
   if (RetLocs.empty())
-    return DAG.getNode(XtensaISD::RET, DL, MVT::Other, Chain);
+    return DAG.getNode(Subtarget.isWinABI() ? XtensaISD::RETW
+                                            : XtensaISD::RET, DL, MVT::Other, Chain);
 
   // Copy the result values into the output registers.
   SmallVector<SDValue, 4> RetOps;
@@ -672,7 +699,8 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   if (Glue.getNode())
     RetOps.push_back(Glue);
 
-  return DAG.getNode(XtensaISD::RET, DL, MVT::Other, RetOps);
+  return DAG.getNode(Subtarget.isWinABI() ? XtensaISD::RETW
+                                            : XtensaISD::RET, DL, MVT::Other, RetOps);
 }
 
 static unsigned getBranchOpcode(ISD::CondCode Cond) {
@@ -906,6 +934,9 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   unsigned SPReg = Xtensa::SP;
   SDValue SP = DAG.getCopyFromReg(Chain, DL, SPReg, VT);
   SDValue NewSP = DAG.getNode(ISD::SUB, DL, VT, SP, SizeRoundUp); // Value
+  if (Subtarget.isWinABI()) {
+    NewSP = DAG.getNode(XtensaISD::MOVSP, DL, MVT::i32, NewSP);
+  }
   Chain = DAG.getCopyToReg(SP.getValue(1), DL, SPReg, NewSP); // Output chain
 
   SDValue NewVal = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i32);
@@ -1230,12 +1261,18 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "XtensaISD::BR_JT";
   case XtensaISD::CALL:
     return "XtensaISD::CALL";
+  case XtensaISD::CALLW8:
+    return "XtensaISD::CALLW8";
   case XtensaISD::EXTUI:
     return "XtensaISD::EXTUI";
+  case XtensaISD::MOVSP:
+    return "XtensaISD::MOVSP";
   case XtensaISD::PCREL_WRAPPER:
     return "XtensaISD::PCREL_WRAPPER";
   case XtensaISD::RET:
     return "XtensaISD::RET";
+  case XtensaISD::RETW:
+    return "XtensaISD::RETW";
   case XtensaISD::SELECT_CC:
     return "XtensaISD::SELECT_CC";
   case XtensaISD::SRCL:
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index a959299d8ca6a..c7d4f41b1f08e 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -29,16 +29,21 @@ enum {
   // is the target address.  The arguments start at operand 2.
   // There is an optional glue operand at the end.
   CALL,
+  // Call with rotation window by 8 registers
+  CALLW8,
 
   // Extract unsigned immediate. Operand 0 is value, operand 1
   // is bit position of the field [0..31], operand 2 is bit size
   // of the field [1..16]
   EXTUI,
 
+  MOVSP,
+
   // Wraps a TargetGlobalAddress that should be loaded using PC-relative
   // accesses.  Operand 0 is the address.
   PCREL_WRAPPER,
   RET,
+  RETW,
 
   // Select with condition operator - This selects between a true value and
   // a false value (ops #2 and #3) based on the boolean result of comparing
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
index da2883e1902ca..788dc992d1d4e 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
@@ -100,9 +100,13 @@ void XtensaInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
         .addReg(Reg1, RegState::Kill);
   }
 
+  if (STI.isWinABI()) {
+    BuildMI(MBB, I, DL, get(Xtensa::MOVSP), SP).addReg(Reg, RegState::Kill);
+  } else {
   BuildMI(MBB, I, DL, get(Xtensa::OR), SP)
       .addReg(Reg, RegState::Kill)
       .addReg(Reg, RegState::Kill);
+      }
 }
 
 void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
index e52dcbf1377c5..31c5a1d5db68e 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
@@ -709,22 +709,35 @@ let isCall = 1, Defs = [A0] in {
   }
 }
 
+// Windowed call patterns. Currently rotation
+// window by 8 is implemented.
+def : Pat<(Xtensa_callw8 (i32 tglobaladdr:$dst)),
+          (CALL8 tglobaladdr:$dst)>;
+def : Pat<(Xtensa_callw8 (i32 texternalsym:$dst)),
+          (CALL8 texternalsym:$dst)>;
+def : Pat<(Xtensa_callw8 AR:$dst),
+          (CALLX8 AR:$dst)>;
+
 def MOVSP : RRR_Inst<0x00, 0x00, 0x00, (outs AR:$t), (ins AR:$s),
-                    "movsp\t$t, $s", []>, Requires<[HasWindowed]> {
+                    "movsp\t$t, $s",
+                    [(set AR:$t, (Xtensa_movsp AR:$s))]>,
+                    Requires<[HasWindowed]> {
   let r = 0x01;
 }
 
 let isReturn = 1, isTerminator = 1,
     isBarrier = 1, Uses = [A0] in {
   def RETW_N : RRRN_Inst<0x0D, (outs), (ins),
-                        "retw.n", []>, Requires<[HasWindowed, HasDensity]> {
+                        "retw.n", [(Xtensa_retw)]>,
+                        Requires<[HasWindowed, HasDensity]> {
     let r = 0x0F;
     let s = 0;
     let t = 1;
   }
 
   def RETW : CALLX_Inst<0x00, 0x00, 0x00, (outs), (ins),
-                       "retw", []>, Requires<[HasWindowed]> {
+                       "retw", [(Xtensa_retw)]>,
+                       Requires<[HasWindowed]> {
     let m = 0x2;
     let n = 0x1;
     let s = 0;
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index c430562091ba7..bc051d9ca14fa 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -27,6 +27,7 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   unsigned VarArgsFirstGPR;
   int VarArgsOnStackFrameIndex;
   int VarArgsInRegsFrameIndex;
+  bool SaveFrameRegister = false;
 
 public:
   explicit XtensaMachineFunctionInfo(const Function &F,
@@ -50,6 +51,9 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   // Get and set the frame index of the first stack vararg.
   int getVarArgsInRegsFrameIndex() const { return VarArgsInRegsFrameIndex; }
   void setVarArgsInRegsFrameIndex(int FI) { VarArgsInRegsFrameIndex = FI; }
+
+  bool isSaveFrameRegister() const { return SaveFrameRegister; }
+  void setSaveFrameRegister() { SaveFrameRegister = true; }
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td
index 3dd73b44f336a..1fe5c3f64b607 100644
--- a/llvm/lib/Target/Xtensa/XtensaOperators.td
+++ b/llvm/lib/Target/Xtensa/XtensaOperators.td
@@ -31,15 +31,23 @@ def SDT_XtensaSRC                 : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCi
 def SDT_XtensaEXTUI               : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                                          SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
 
+def SDT_XtensaMOVSP               : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisVT<0, i32>]>;
+
 //===----------------------------------------------------------------------===//
 // Node definitions
 //===----------------------------------------------------------------------===//
 def Xtensa_call: SDNode<"XtensaISD::CALL", SDT_XtensaCall,
                        [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
 
+def Xtensa_callw8: SDNode<"XtensaISD::CALLW8", SDT_XtensaCall,
+                        [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+
 def Xtensa_ret: SDNode<"XtensaISD::RET", SDTNone,
                        [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
+def Xtensa_retw: SDNode<"XtensaISD::RETW", SDTNone,
+                       [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
 def Xtensa_pcrel_wrapper: SDNode<"XtensaISD::PCREL_WRAPPER", SDT_XtensaWrapPtr, []>;
 
 def Xtensa_callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_XtensaCallSeqStart,
@@ -59,3 +67,6 @@ def Xtensa_srcl: SDNode<"XtensaISD::SRCL", SDT_XtensaSRC>;
 def Xtensa_srcr: SDNode<"XtensaISD::SRCR", SDT_XtensaSRC>;
 
 def Xtensa_extui: SDNode<"XtensaISD::EXTUI", SDT_XtensaEXTUI>;
+
+def Xtensa_movsp: SDNode<"XtensaISD::MOVSP", SDT_XtensaMOVSP,
+                        [SDNPInGlue]>;
diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp
index 4a8bafc540df0..cd8bfa0d4d164 100644
--- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp
@@ -34,13 +34,19 @@ XtensaRegisterInfo::XtensaRegisterInfo(const XtensaSubtarget &STI)
 
 const uint16_t *
 XtensaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  return CSR_Xtensa_SaveList;
+  if (Subtarget.isWinABI())
+    return CSRW8_Xtensa_SaveList;
+  else
+    return CSR_Xtensa_SaveList;
 }
 
 const uint32_t *
 XtensaRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                          CallingConv::ID) const {
-  return CSR_Xtensa_RegMask;
+  if (Subtarget.isWinABI())
+    return CSRW8_Xtensa_RegMask;
+  else
+    return CSR_Xtensa_RegMask;
 }
 
 BitVector XtensaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -129,5 +135,6 @@ bool XtensaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
 Register XtensaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  return TFI->hasFP(MF) ? Xtensa::A15 : Xtensa::SP;
+  return TFI->hasFP(MF) ? (Subtarget.isWinABI() ? Xtensa::A7 : Xtensa::A15)
+                        : Xtensa::SP;
 }
diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h
index 962bed2c2e36f..ffd12a203d87f 100644
--- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h
+++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h
@@ -30,17 +30,17 @@ class StringRef;
 
 class XtensaSubtarget : public XtensaGenSubtargetInfo {
 private:
+// Bool members corresponding to the SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
+  bool ATTRIBUTE = DEFAULT;
+#include "XtensaGenSubtargetInfo.inc"
+
   const Triple &TargetTriple;
   XtensaInstrInfo InstrInfo;
   XtensaTargetLowering TLInfo;
   SelectionDAGTargetInfo TSInfo;
   XtensaFrameLowering FrameLowering;
 
-// Bool members corresponding to the SubtargetFeatures defined in tablegen
-#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
-  bool ATTRIBUTE = DEFAULT;
-#include "XtensaGenSubtargetInfo.inc"
-
   XtensaSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
 
 public:
@@ -70,6 +70,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo {
 
   bool hasBoolean() const { return HasBoolean; }
 
+  bool isWinABI() const { return hasWindowed(); }
+
   // Automatically generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 };
diff --git a/llvm/test/CodeGen/Xtensa/aligned_alloc.ll b/llvm/test/CodeGen/Xtensa/aligned_alloc.ll
new file mode 100644
index 0000000000000..ebb24d9272ddc
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/aligned_alloc.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=xtensa -O0 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=XTENSA
+
+define i8 @loadi8_128(i8 %a) {
+; XTENSA-LABEL: loadi8_128:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -128
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    .cfi_def_cfa_offset 128
+; XTENSA-NEXT:    s32i a0, a1, 124 # 4-byte Folded Spill
+; XTENSA-NEXT:    .cfi_offset a0, -4
+; XTENSA-NEXT:    addi a2, a1, 0
+; XTENSA-NEXT:    movi a3, 0
+; XTENSA-NEXT:    movi a4, 64
+; XTENSA-NEXT:    l32r a8, .LCPI0_0
+; XTENSA-NEXT:    callx0 a8
+; XTENSA-NEXT:    l8ui a2, a1, 0
+; XTENSA-NEXT:    l32i a0, a1, 124 # 4-byte Folded Reload
+; XTENSA-NEXT:    movi a8, 128
+; XTENSA-NEXT:    add a8, a1, a8
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    ret
+    %aligned = alloca i8, align 128
+    call void @llvm.memset.p0.i64(ptr noundef nonnull align 64 dereferenceable(64) %aligned, i8 0, i64 64, i1 false)
+    %1 = load i8, ptr %aligned, align 128
+    ret i8 %1
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll b/llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll
new file mode 100644
index 0000000000000..27855b1cdf481
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=xtensa -O1 -mattr=+windowed -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=XTENSA
+
+; Check placement of first 6 arguments in registers and 7th argument on stack
+define dso_local i32 @test1(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, ptr nocapture noundef readonly byval(i32) align 4 %6) {
+; XTENSA-LABEL: test1:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    entry a1, 32
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    add a8, a7, a2
+; XTENSA-NEXT:    l32i a9, a1, 32
+; XTENSA-NEXT:    add a2, a8, a9
+; XTENSA-NEXT:    retw
+  %8 = load i32, ptr %6, align 4
+  %9 = add nsw i32 %5, %0
+  %10 = add nsw i32 %9, %8
+  ret i32 %10
+}
+
+; Check placement of second i64 argument in registers
+define dso_local i32 @test2(i32 noundef %0, i64 noundef %1, i32 noundef %2) {
+; XTENSA-LABEL: test2:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    entry a1, 32
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    add a8, a6, a2
+; XTENSA-NEXT:    add a2, a8, a4
+; XTENSA-NEXT:    retw
+  %4 = trunc i64 %1 to i32
+  %5 = add nsw i32 %2, %0
+  %6 = add nsw i32 %5, %4
+  ret i32 %6
+}
+
+; Check placement of first argument typeof i8 in register
+define dso_local i32 @test3(i8 noundef signext %0, i64 noundef %1, i32 noundef %2) {
+; XTENSA-LABEL: test3:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    entry a1, 32
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    add a8, a2, a6
+; XTENSA-NEXT:    add a2, a8, a4
+; XTENSA-NEXT:    retw
+  %4 = trunc i64 %1 to i32
+  %5 = sext i8 %0 to i32
+  %6 = add nsw i32 %5, %2
+  %7 = add nsw i32 %6, %4
+  ret i32 %7
+}
+
+; Check placement of 4th argument typeof i64 on stack
+define dso_local i32 @test4(i8 noundef signext %0, i64 noundef %1, i32 noundef %2, ptr nocapture noundef readonly byval(i64) align 8 %3) {
+; XTENSA-LABEL: test4:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    entry a1, 32
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    add a8, a2, a6
+; XTENSA-NEXT:    add a8, a8, a4
+; XTENSA-NEXT:    l32i a9, a1, 32
+; XTENSA-NEXT:    add a2, a8, a9
+; XTENSA-NEXT:    retw
+  %5 = load i64, ptr %3, align 8
+  %6 = trunc i64 %1 to i32
+  %7 = trunc i64 %5 to i32
+  %8 = sext i8 %0 to i32
+  %9 = add nsw i32 %8, %2
+  %10 = add nsw i32 %9, %6
+  %11 = add nsw i32 %10, %7
+  ret i32 %11
+}
+
+; Check placement of 128 bit structure on registers
+define dso_local i32 @test5([4 x i32] %0, i32 noundef %1) {
+; XTENSA-LABEL: test5:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    entry a1, 32
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    add a2, a2, a6
+; XTENSA-NEXT:    retw
+  %3 = extractvalue [4 x i32] %0, 0
+  %4 = add nsw i32 %3, %1
+  ret i32 %4
+}
+
+; Check placement of 128 bit structure on stack
+define dso_local i32 @test6(i32 noundef %0, [4 x i32] %1) {
+; XTENSA-LABEL: test6:
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    entry a1, 32
+; XTENSA-NEXT:    .cfi_def_cfa_offset 32
+; XTENSA-NEXT:    add a2, a3, a2
+; XTENSA-NEXT:    retw
+  %3 = extractvalue [4 x i32] %1, 0
+  %4 = add nsw i32 %3, %0
+  ret i32 %4
+}
diff --git a/llvm/test/CodeGen/Xtensa/callw.ll b/llvm/test/CodeGen/Xtensa/callw.ll
new file mode 100644
index 0000000000000..21549bcf22678
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/callw.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=xtensa -mattr=+windowed < %s | FileCheck %s
+
+declare i32 @external_function(i32)
+
+define i32 @test_call_external(i32 %a) nounwind {
+; CHECK-LABEL: test_call_external:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    entry a1, 32
+; CHECK-NEXT:    l32r a8, .LCPI0_0
+; CHECK-NEXT:    or a10, a2, a2
+; CHECK-NEXT:    callx8 a8
+; CHECK-NEXT:    or a2, a10, a10
+; CHECK-NEXT:    retw
+  %1 = call i32 @external_function(i32 %a)
+  ret i32 %1
+}
+
+define i32 @defined_function(i32 %a) nounwind {
+; CHECK-LABEL: defined_function:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    entry a1, 32
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    retw
+  %1 = add i32 %a, 1
+  ret i32 %1
+}
+
+define i32 @test_call_defined(i32 %a) nounwind {
+; CHECK-LABEL: test_call_defined:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    entry a1, 32
+; CHECK-NEXT:    l32r a8, .LCPI2_0
+; CHECK-NEXT:    or a10, a2, a2
+; CHECK-NEXT:    callx8 a8
+; CHECK-NEXT:    or a2, a10, a10
+; CHECK-NEXT:    retw
+  %1 = call i32 @defined_function(i32 %a) nounwind
+  ret i32 %1
+}
+
+define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
+; CHECK-LABEL: test_call_indirect:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    entry a1, 32
+; CHECK-NEXT:    or a10, a3, a3
+; CHECK-NEXT:    callx8 a2
+; CHECK-NEXT:    or a2, a10, a10
+; CHECK-NEXT:    retw
+  %1 = call i32 %a(i32 %b)
+  ret i32 %1
+}



More information about the llvm-commits mailing list