[llvm] Xtensa] Implement windowed call ABI. (PR #130001)
Andrei Safronov via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 23:35:04 PST 2025
https://github.com/andreisfr created https://github.com/llvm/llvm-project/pull/130001
Implement base windowed call ABI. By defaullt use
rotation window by 8 registers.
>From 43bb5205c1872fcec4e301e315f8f3fb3022bcb3 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Thu, 6 Mar 2025 10:27:25 +0300
Subject: [PATCH] Xtensa] Implement windowed call ABI.
Implement base windowed call ABI. By defaullt use
rotation window by 8 registers.
---
llvm/lib/Target/Xtensa/XtensaCallingConv.td | 20 +++-
.../lib/Target/Xtensa/XtensaFrameLowering.cpp | 112 +++++++++++++++++-
llvm/lib/Target/Xtensa/XtensaFrameLowering.h | 1 +
llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 47 +++++++-
llvm/lib/Target/Xtensa/XtensaISelLowering.h | 5 +
llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp | 4 +
llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 19 ++-
.../Target/Xtensa/XtensaMachineFunctionInfo.h | 4 +
llvm/lib/Target/Xtensa/XtensaOperators.td | 11 ++
llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp | 13 +-
llvm/lib/Target/Xtensa/XtensaSubtarget.h | 12 +-
llvm/test/CodeGen/Xtensa/aligned_alloc.ll | 32 +++++
.../CodeGen/Xtensa/calling-conv-windowed.ll | 103 ++++++++++++++++
llvm/test/CodeGen/Xtensa/callw.ll | 52 ++++++++
14 files changed, 413 insertions(+), 22 deletions(-)
create mode 100644 llvm/test/CodeGen/Xtensa/aligned_alloc.ll
create mode 100644 llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll
create mode 100644 llvm/test/CodeGen/Xtensa/callw.ll
diff --git a/llvm/lib/Target/Xtensa/XtensaCallingConv.td b/llvm/lib/Target/Xtensa/XtensaCallingConv.td
index a348b4c890b22..092c50947cee8 100644
--- a/llvm/lib/Target/Xtensa/XtensaCallingConv.td
+++ b/llvm/lib/Target/Xtensa/XtensaCallingConv.td
@@ -9,16 +9,30 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Xtensa return value calling convention
+// Xtensa base calling convention
//===----------------------------------------------------------------------===//
+// Xtensa return value
def RetCC_Xtensa : CallingConv<[
// First two return values go in a2, a3, a4, a5
CCIfType<[i32], CCAssignToReg<[A2, A3, A4, A5]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[A2, A4], [A3, A5]>>
]>;
+// Callee-saved register lists
+def CSR_Xtensa : CalleeSavedRegs<(add A0, A12, A13, A14, A15)>;
+
//===----------------------------------------------------------------------===//
-// Callee-saved register lists.
+// Xtensa windowed calling convention. Currently by default implemented
+// rotation window by 8 registers.
//===----------------------------------------------------------------------===//
+// Xtensa return value for 8 registers window
+def RetCCW8_Xtensa : CallingConv<[
+ //First two return values go in a10, a11, a12, a13
+ CCIfType<[i32], CCAssignToReg<[A10, A11, A12, A13]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[A10, A12], [A11, A13]>>
+]>;
-def CSR_Xtensa : CalleeSavedRegs<(add A0, A12, A13, A14, A15)>;
+// Callee-saved register lists for rotation window by 8 registers
+def CSRW8_Xtensa : CalleeSavedRegs<(add)> {
+ let OtherPreserved = (add A0, SP, A2, A3, A4, A5, A6, A7);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
index d09aac613f623..338a2bc958498 100644
--- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp
@@ -23,9 +23,15 @@
using namespace llvm;
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+ and the total number of words must be a multiple of 128 bits. */
+/* Width of a word, in units (bytes). */
+#define UNITS_PER_WORD 4
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
XtensaFrameLowering::XtensaFrameLowering(const XtensaSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0,
- Align(4)),
+ Align(4)), STI(STI),
TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {}
bool XtensaFrameLowering::hasFPImpl(const MachineFunction &MF) const {
@@ -43,6 +49,7 @@ void XtensaFrameLowering::emitPrologue(MachineFunction &MF,
MCRegister SP = Xtensa::SP;
MCRegister FP = TRI->getFrameRegister(MF);
const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
+ XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
// First, compute final stack size.
uint64_t StackSize = MFI.getStackSize();
@@ -51,6 +58,83 @@ void XtensaFrameLowering::emitPrologue(MachineFunction &MF,
// Round up StackSize to 16*N
StackSize += (16 - StackSize) & 0xf;
+ if (STI.isWinABI()) {
+ StackSize += 32;
+ uint64_t MaxAlignment = MFI.getMaxAlign().value();
+ if(MaxAlignment > 32)
+ StackSize += MaxAlignment;
+
+ if (StackSize <= 32760) {
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::ENTRY))
+ .addReg(SP)
+ .addImm(StackSize);
+ } else {
+ /* Use a8 as a temporary since a0-a7 may be live. */
+ unsigned TmpReg = Xtensa::A8;
+
+ const XtensaInstrInfo &TII = *static_cast<const XtensaInstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::ENTRY))
+ .addReg(SP)
+ .addImm(MIN_FRAME_SIZE);
+ TII.loadImmediate(MBB, MBBI, &TmpReg, StackSize - MIN_FRAME_SIZE);
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::SUB), TmpReg)
+ .addReg(SP)
+ .addReg(TmpReg);
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::MOVSP), SP).addReg(TmpReg);
+ }
+
+ // Calculate how much is needed to have the correct alignment.
+ // Change offset to: alignment + difference.
+ // For example, in case of alignment of 128:
+ // diff_to_128_aligned_address = (128 - (SP & 127))
+ // new_offset = SP + diff_to_128_aligned_address
+ // This is safe to do because we increased the stack size by MaxAlignment.
+ unsigned Reg, RegMisAlign;
+ if (MaxAlignment > 32){
+ TII.loadImmediate(MBB, MBBI, &RegMisAlign, MaxAlignment - 1);
+ TII.loadImmediate(MBB, MBBI, &Reg, MaxAlignment);
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::AND))
+ .addReg(RegMisAlign, RegState::Define)
+ .addReg(FP)
+ .addReg(RegMisAlign);
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::SUB), RegMisAlign)
+ .addReg(Reg)
+ .addReg(RegMisAlign);
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::ADD), SP)
+ .addReg(SP)
+ .addReg(RegMisAlign, RegState::Kill);
+ }
+
+ // Store FP register in A8, because FP may be used to pass function
+ // arguments
+ if (XtensaFI->isSaveFrameRegister()) {
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::OR), Xtensa::A8)
+ .addReg(FP)
+ .addReg(FP);
+ }
+
+ // if framepointer enabled, set it to point to the stack pointer.
+ if (hasFP(MF)) {
+ // Insert instruction "move $fp, $sp" at this location.
+ BuildMI(MBB, MBBI, DL, TII.get(Xtensa::OR), FP)
+ .addReg(SP)
+ .addReg(SP)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI->getDwarfRegNum(FP, true), StackSize);
+ unsigned CFIIndex = MF.addFrameInst(Inst);
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ } else {
+ // emit ".cfi_def_cfa_offset StackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ } else {
// No need to allocate space on the stack.
if (StackSize == 0 && !MFI.adjustsStack())
return;
@@ -122,6 +206,7 @@ void XtensaFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
+ }
if (StackSize != PrevStackSize) {
MFI.setStackSize(StackSize);
@@ -179,10 +264,22 @@ void XtensaFrameLowering::emitEpilogue(MachineFunction &MF,
"Unexpected callee-saved register restore instruction");
#endif
}
-
- BuildMI(MBB, I, DL, TII.get(Xtensa::OR), SP).addReg(FP).addReg(FP);
+ if (STI.isWinABI()) {
+ // In most architectures, we need to explicitly restore the stack pointer
+ // before returning.
+ //
+ // For Xtensa Windowed Register option, it is not needed to explicitly
+ // restore the stack pointer. Reason being is that on function return,
+ // the window of the caller (including the old stack pointer) gets
+ // restored anyways.
+ } else {
+ BuildMI(MBB, I, DL, TII.get(Xtensa::OR), SP).addReg(FP).addReg(FP);
+ }
}
+ if (STI.isWinABI())
+ return;
+
// Get the number of bytes from FrameInfo
uint64_t StackSize = MFI.getStackSize();
@@ -199,6 +296,9 @@ bool XtensaFrameLowering::spillCalleeSavedRegisters(
MachineFunction *MF = MBB.getParent();
MachineBasicBlock &EntryBlock = *(MF->begin());
+ if (STI.isWinABI())
+ return true;
+
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in. Do not add if the register is
// A0 and return address is taken, because it will be implemented in
@@ -224,6 +324,8 @@ bool XtensaFrameLowering::spillCalleeSavedRegisters(
bool XtensaFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (STI.isWinABI())
+ return true;
return TargetFrameLowering::restoreCalleeSavedRegisters(MBB, MI, CSI, TRI);
}
@@ -251,6 +353,10 @@ void XtensaFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
unsigned FP = TRI->getFrameRegister(MF);
+ if (STI.isWinABI()) {
+ return;
+ }
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
// Mark $fp as used if function has dedicated frame pointer.
diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.h b/llvm/lib/Target/Xtensa/XtensaFrameLowering.h
index 3f946e1ea730f..f0095b8774154 100644
--- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.h
@@ -18,6 +18,7 @@ class XtensaInstrInfo;
class XtensaRegisterInfo;
class XtensaFrameLowering : public TargetFrameLowering {
+ const XtensaSubtarget &STI;
const XtensaInstrInfo &TII;
const XtensaRegisterInfo *TRI;
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 57f0cbbc36c24..75e8dad388d1e 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -42,6 +42,15 @@ static bool isLongCall(const char *str) {
return true;
}
+// The calling conventions in XtensaCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+ if (Reg >= Xtensa::A2 && Reg <= Xtensa::A7)
+ return Reg - Xtensa::A2 + Xtensa::A10;
+ return Reg;
+}
+
XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
const XtensaSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -339,7 +348,18 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
// Transform the arguments stored on
// physical registers into virtual ones
- Register Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
+ Register Reg = 0;
+ unsigned FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
+
+ // Argument passed in FrameReg in WinABI we save in A8 (in emitPrologue),
+ // so load argument from A8
+ if (Subtarget.isWinABI() && (VA.getLocReg() == FrameReg)) {
+ Reg = MF.addLiveIn(Xtensa::A8, &Xtensa::ARRegClass);
+ XtensaFI->setSaveFrameRegister();
+ } else {
+ Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
+ }
+
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8 or 16-bit value, it has been passed promoted
@@ -538,6 +558,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Glue;
for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
unsigned Reg = RegsToPass[I].first;
+ if (Subtarget.isWinABI())
+ Reg = toCallerWindow(Reg);
Chain = DAG.getCopyToReg(Chain, DL, Reg, RegsToPass[I].second, Glue);
Glue = Chain.getValue(1);
}
@@ -587,6 +609,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
// known live into the call.
for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
unsigned Reg = RegsToPass[I].first;
+ if (Subtarget.isWinABI())
+ Reg = toCallerWindow(Reg);
Ops.push_back(DAG.getRegister(Reg, RegsToPass[I].second.getValueType()));
}
@@ -595,7 +619,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(Glue);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(XtensaISD::CALL, DL, NodeTys, Ops);
+ Chain = DAG.getNode(Subtarget.isWinABI() ? XtensaISD::CALLW8 : XtensaISD::CALL,
+ DL, NodeTys, Ops);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
@@ -606,7 +631,8 @@ XtensaTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
- RetCCInfo.AnalyzeCallResult(Ins, RetCC_Xtensa);
+ RetCCInfo.AnalyzeCallResult(Ins, Subtarget.isWinABI() ? RetCCW8_Xtensa
+ : RetCC_Xtensa);
// Copy all of the result registers out of their specified physreg.
for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
@@ -648,7 +674,8 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Glue;
// Quick exit for void returns
if (RetLocs.empty())
- return DAG.getNode(XtensaISD::RET, DL, MVT::Other, Chain);
+ return DAG.getNode(Subtarget.isWinABI() ? XtensaISD::RETW
+ : XtensaISD::RET, DL, MVT::Other, Chain);
// Copy the result values into the output registers.
SmallVector<SDValue, 4> RetOps;
@@ -672,7 +699,8 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Glue.getNode())
RetOps.push_back(Glue);
- return DAG.getNode(XtensaISD::RET, DL, MVT::Other, RetOps);
+ return DAG.getNode(Subtarget.isWinABI() ? XtensaISD::RETW
+ : XtensaISD::RET, DL, MVT::Other, RetOps);
}
static unsigned getBranchOpcode(ISD::CondCode Cond) {
@@ -906,6 +934,9 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
unsigned SPReg = Xtensa::SP;
SDValue SP = DAG.getCopyFromReg(Chain, DL, SPReg, VT);
SDValue NewSP = DAG.getNode(ISD::SUB, DL, VT, SP, SizeRoundUp); // Value
+ if (Subtarget.isWinABI()) {
+ NewSP = DAG.getNode(XtensaISD::MOVSP, DL, MVT::i32, NewSP);
+ }
Chain = DAG.getCopyToReg(SP.getValue(1), DL, SPReg, NewSP); // Output chain
SDValue NewVal = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i32);
@@ -1230,12 +1261,18 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "XtensaISD::BR_JT";
case XtensaISD::CALL:
return "XtensaISD::CALL";
+ case XtensaISD::CALLW8:
+ return "XtensaISD::CALLW8";
case XtensaISD::EXTUI:
return "XtensaISD::EXTUI";
+ case XtensaISD::MOVSP:
+ return "XtensaISD::MOVSP";
case XtensaISD::PCREL_WRAPPER:
return "XtensaISD::PCREL_WRAPPER";
case XtensaISD::RET:
return "XtensaISD::RET";
+ case XtensaISD::RETW:
+ return "XtensaISD::RETW";
case XtensaISD::SELECT_CC:
return "XtensaISD::SELECT_CC";
case XtensaISD::SRCL:
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index a959299d8ca6a..c7d4f41b1f08e 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -29,16 +29,21 @@ enum {
// is the target address. The arguments start at operand 2.
// There is an optional glue operand at the end.
CALL,
+ // Call with rotation window by 8 registers
+ CALLW8,
// Extract unsigned immediate. Operand 0 is value, operand 1
// is bit position of the field [0..31], operand 2 is bit size
// of the field [1..16]
EXTUI,
+ MOVSP,
+
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
// accesses. Operand 0 is the address.
PCREL_WRAPPER,
RET,
+ RETW,
// Select with condition operator - This selects between a true value and
// a false value (ops #2 and #3) based on the boolean result of comparing
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
index da2883e1902ca..788dc992d1d4e 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.cpp
@@ -100,9 +100,13 @@ void XtensaInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
.addReg(Reg1, RegState::Kill);
}
+ if (STI.isWinABI()) {
+ BuildMI(MBB, I, DL, get(Xtensa::MOVSP), SP).addReg(Reg, RegState::Kill);
+ } else {
BuildMI(MBB, I, DL, get(Xtensa::OR), SP)
.addReg(Reg, RegState::Kill)
.addReg(Reg, RegState::Kill);
+ }
}
void XtensaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
index e52dcbf1377c5..31c5a1d5db68e 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
@@ -709,22 +709,35 @@ let isCall = 1, Defs = [A0] in {
}
}
+// Windowed call patterns. Currently rotation
+// window by 8 is implemented.
+def : Pat<(Xtensa_callw8 (i32 tglobaladdr:$dst)),
+ (CALL8 tglobaladdr:$dst)>;
+def : Pat<(Xtensa_callw8 (i32 texternalsym:$dst)),
+ (CALL8 texternalsym:$dst)>;
+def : Pat<(Xtensa_callw8 AR:$dst),
+ (CALLX8 AR:$dst)>;
+
def MOVSP : RRR_Inst<0x00, 0x00, 0x00, (outs AR:$t), (ins AR:$s),
- "movsp\t$t, $s", []>, Requires<[HasWindowed]> {
+ "movsp\t$t, $s",
+ [(set AR:$t, (Xtensa_movsp AR:$s))]>,
+ Requires<[HasWindowed]> {
let r = 0x01;
}
let isReturn = 1, isTerminator = 1,
isBarrier = 1, Uses = [A0] in {
def RETW_N : RRRN_Inst<0x0D, (outs), (ins),
- "retw.n", []>, Requires<[HasWindowed, HasDensity]> {
+ "retw.n", [(Xtensa_retw)]>,
+ Requires<[HasWindowed, HasDensity]> {
let r = 0x0F;
let s = 0;
let t = 1;
}
def RETW : CALLX_Inst<0x00, 0x00, 0x00, (outs), (ins),
- "retw", []>, Requires<[HasWindowed]> {
+ "retw", [(Xtensa_retw)]>,
+ Requires<[HasWindowed]> {
let m = 0x2;
let n = 0x1;
let s = 0;
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index c430562091ba7..bc051d9ca14fa 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -27,6 +27,7 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsFirstGPR;
int VarArgsOnStackFrameIndex;
int VarArgsInRegsFrameIndex;
+ bool SaveFrameRegister = false;
public:
explicit XtensaMachineFunctionInfo(const Function &F,
@@ -50,6 +51,9 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
// Get and set the frame index of the first stack vararg.
int getVarArgsInRegsFrameIndex() const { return VarArgsInRegsFrameIndex; }
void setVarArgsInRegsFrameIndex(int FI) { VarArgsInRegsFrameIndex = FI; }
+
+ bool isSaveFrameRegister() const { return SaveFrameRegister; }
+ void setSaveFrameRegister() { SaveFrameRegister = true; }
};
} // namespace llvm
diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td
index 3dd73b44f336a..1fe5c3f64b607 100644
--- a/llvm/lib/Target/Xtensa/XtensaOperators.td
+++ b/llvm/lib/Target/Xtensa/XtensaOperators.td
@@ -31,15 +31,23 @@ def SDT_XtensaSRC : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCi
def SDT_XtensaEXTUI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_XtensaMOVSP : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisVT<0, i32>]>;
+
//===----------------------------------------------------------------------===//
// Node definitions
//===----------------------------------------------------------------------===//
def Xtensa_call: SDNode<"XtensaISD::CALL", SDT_XtensaCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def Xtensa_callw8: SDNode<"XtensaISD::CALLW8", SDT_XtensaCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+
def Xtensa_ret: SDNode<"XtensaISD::RET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def Xtensa_retw: SDNode<"XtensaISD::RETW", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
def Xtensa_pcrel_wrapper: SDNode<"XtensaISD::PCREL_WRAPPER", SDT_XtensaWrapPtr, []>;
def Xtensa_callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_XtensaCallSeqStart,
@@ -59,3 +67,6 @@ def Xtensa_srcl: SDNode<"XtensaISD::SRCL", SDT_XtensaSRC>;
def Xtensa_srcr: SDNode<"XtensaISD::SRCR", SDT_XtensaSRC>;
def Xtensa_extui: SDNode<"XtensaISD::EXTUI", SDT_XtensaEXTUI>;
+
+def Xtensa_movsp: SDNode<"XtensaISD::MOVSP", SDT_XtensaMOVSP,
+ [SDNPInGlue]>;
diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp
index 4a8bafc540df0..cd8bfa0d4d164 100644
--- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.cpp
@@ -34,13 +34,19 @@ XtensaRegisterInfo::XtensaRegisterInfo(const XtensaSubtarget &STI)
const uint16_t *
XtensaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_Xtensa_SaveList;
+ if (Subtarget.isWinABI())
+ return CSRW8_Xtensa_SaveList;
+ else
+ return CSR_Xtensa_SaveList;
}
const uint32_t *
XtensaRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const {
- return CSR_Xtensa_RegMask;
+ if (Subtarget.isWinABI())
+ return CSRW8_Xtensa_RegMask;
+ else
+ return CSR_Xtensa_RegMask;
}
BitVector XtensaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -129,5 +135,6 @@ bool XtensaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register XtensaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- return TFI->hasFP(MF) ? Xtensa::A15 : Xtensa::SP;
+ return TFI->hasFP(MF) ? (Subtarget.isWinABI() ? Xtensa::A7 : Xtensa::A15)
+ : Xtensa::SP;
}
diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h
index 962bed2c2e36f..ffd12a203d87f 100644
--- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h
+++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h
@@ -30,17 +30,17 @@ class StringRef;
class XtensaSubtarget : public XtensaGenSubtargetInfo {
private:
+// Bool members corresponding to the SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "XtensaGenSubtargetInfo.inc"
+
const Triple &TargetTriple;
XtensaInstrInfo InstrInfo;
XtensaTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
XtensaFrameLowering FrameLowering;
-// Bool members corresponding to the SubtargetFeatures defined in tablegen
-#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
- bool ATTRIBUTE = DEFAULT;
-#include "XtensaGenSubtargetInfo.inc"
-
XtensaSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
public:
@@ -70,6 +70,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo {
bool hasBoolean() const { return HasBoolean; }
+ bool isWinABI() const { return hasWindowed(); }
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
};
diff --git a/llvm/test/CodeGen/Xtensa/aligned_alloc.ll b/llvm/test/CodeGen/Xtensa/aligned_alloc.ll
new file mode 100644
index 0000000000000..ebb24d9272ddc
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/aligned_alloc.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=xtensa -O0 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=XTENSA
+
+define i8 @loadi8_128(i8 %a) {
+; XTENSA-LABEL: loadi8_128:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: addi a8, a1, -128
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: .cfi_def_cfa_offset 128
+; XTENSA-NEXT: s32i a0, a1, 124 # 4-byte Folded Spill
+; XTENSA-NEXT: .cfi_offset a0, -4
+; XTENSA-NEXT: addi a2, a1, 0
+; XTENSA-NEXT: movi a3, 0
+; XTENSA-NEXT: movi a4, 64
+; XTENSA-NEXT: l32r a8, .LCPI0_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l8ui a2, a1, 0
+; XTENSA-NEXT: l32i a0, a1, 124 # 4-byte Folded Reload
+; XTENSA-NEXT: movi a8, 128
+; XTENSA-NEXT: add a8, a1, a8
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %aligned = alloca i8, align 128
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 64 dereferenceable(64) %aligned, i8 0, i64 64, i1 false)
+ %1 = load i8, ptr %aligned, align 128
+ ret i8 %1
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll b/llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll
new file mode 100644
index 0000000000000..27855b1cdf481
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/calling-conv-windowed.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=xtensa -O1 -mattr=+windowed -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=XTENSA
+
+; Check placement of first 6 arguments in registers and 7th argument on stack
+define dso_local i32 @test1(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, ptr nocapture noundef readonly byval(i32) align 4 %6) {
+; XTENSA-LABEL: test1:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: .cfi_def_cfa_offset 32
+; XTENSA-NEXT: add a8, a7, a2
+; XTENSA-NEXT: l32i a9, a1, 32
+; XTENSA-NEXT: add a2, a8, a9
+; XTENSA-NEXT: retw
+ %8 = load i32, ptr %6, align 4
+ %9 = add nsw i32 %5, %0
+ %10 = add nsw i32 %9, %8
+ ret i32 %10
+}
+
+; Check placement of second i64 argument in registers
+define dso_local i32 @test2(i32 noundef %0, i64 noundef %1, i32 noundef %2) {
+; XTENSA-LABEL: test2:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: .cfi_def_cfa_offset 32
+; XTENSA-NEXT: add a8, a6, a2
+; XTENSA-NEXT: add a2, a8, a4
+; XTENSA-NEXT: retw
+ %4 = trunc i64 %1 to i32
+ %5 = add nsw i32 %2, %0
+ %6 = add nsw i32 %5, %4
+ ret i32 %6
+}
+
+; Check placement of first argument typeof i8 in register
+define dso_local i32 @test3(i8 noundef signext %0, i64 noundef %1, i32 noundef %2) {
+; XTENSA-LABEL: test3:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: .cfi_def_cfa_offset 32
+; XTENSA-NEXT: add a8, a2, a6
+; XTENSA-NEXT: add a2, a8, a4
+; XTENSA-NEXT: retw
+ %4 = trunc i64 %1 to i32
+ %5 = sext i8 %0 to i32
+ %6 = add nsw i32 %5, %2
+ %7 = add nsw i32 %6, %4
+ ret i32 %7
+}
+
+; Check placement of 4th argument typeof i64 on stack
+define dso_local i32 @test4(i8 noundef signext %0, i64 noundef %1, i32 noundef %2, ptr nocapture noundef readonly byval(i64) align 8 %3) {
+; XTENSA-LABEL: test4:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: .cfi_def_cfa_offset 32
+; XTENSA-NEXT: add a8, a2, a6
+; XTENSA-NEXT: add a8, a8, a4
+; XTENSA-NEXT: l32i a9, a1, 32
+; XTENSA-NEXT: add a2, a8, a9
+; XTENSA-NEXT: retw
+ %5 = load i64, ptr %3, align 8
+ %6 = trunc i64 %1 to i32
+ %7 = trunc i64 %5 to i32
+ %8 = sext i8 %0 to i32
+ %9 = add nsw i32 %8, %2
+ %10 = add nsw i32 %9, %6
+ %11 = add nsw i32 %10, %7
+ ret i32 %11
+}
+
+; Check placement of 128 bit structure on registers
+define dso_local i32 @test5([4 x i32] %0, i32 noundef %1) {
+; XTENSA-LABEL: test5:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: .cfi_def_cfa_offset 32
+; XTENSA-NEXT: add a2, a2, a6
+; XTENSA-NEXT: retw
+ %3 = extractvalue [4 x i32] %0, 0
+ %4 = add nsw i32 %3, %1
+ ret i32 %4
+}
+
+; Check placement of 128 bit structure on stack
+define dso_local i32 @test6(i32 noundef %0, [4 x i32] %1) {
+; XTENSA-LABEL: test6:
+; XTENSA: .cfi_startproc
+; XTENSA-NEXT: # %bb.0:
+; XTENSA-NEXT: entry a1, 32
+; XTENSA-NEXT: .cfi_def_cfa_offset 32
+; XTENSA-NEXT: add a2, a3, a2
+; XTENSA-NEXT: retw
+ %3 = extractvalue [4 x i32] %1, 0
+ %4 = add nsw i32 %3, %0
+ ret i32 %4
+}
diff --git a/llvm/test/CodeGen/Xtensa/callw.ll b/llvm/test/CodeGen/Xtensa/callw.ll
new file mode 100644
index 0000000000000..21549bcf22678
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/callw.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=xtensa -mattr=+windowed < %s | FileCheck %s
+
+declare i32 @external_function(i32)
+
+define i32 @test_call_external(i32 %a) nounwind {
+; CHECK-LABEL: test_call_external:
+; CHECK: # %bb.0:
+; CHECK-NEXT: entry a1, 32
+; CHECK-NEXT: l32r a8, .LCPI0_0
+; CHECK-NEXT: or a10, a2, a2
+; CHECK-NEXT: callx8 a8
+; CHECK-NEXT: or a2, a10, a10
+; CHECK-NEXT: retw
+ %1 = call i32 @external_function(i32 %a)
+ ret i32 %1
+}
+
+define i32 @defined_function(i32 %a) nounwind {
+; CHECK-LABEL: defined_function:
+; CHECK: # %bb.0:
+; CHECK-NEXT: entry a1, 32
+; CHECK-NEXT: addi a2, a2, 1
+; CHECK-NEXT: retw
+ %1 = add i32 %a, 1
+ ret i32 %1
+}
+
+define i32 @test_call_defined(i32 %a) nounwind {
+; CHECK-LABEL: test_call_defined:
+; CHECK: # %bb.0:
+; CHECK-NEXT: entry a1, 32
+; CHECK-NEXT: l32r a8, .LCPI2_0
+; CHECK-NEXT: or a10, a2, a2
+; CHECK-NEXT: callx8 a8
+; CHECK-NEXT: or a2, a10, a10
+; CHECK-NEXT: retw
+ %1 = call i32 @defined_function(i32 %a) nounwind
+ ret i32 %1
+}
+
+define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
+; CHECK-LABEL: test_call_indirect:
+; CHECK: # %bb.0:
+; CHECK-NEXT: entry a1, 32
+; CHECK-NEXT: or a10, a3, a3
+; CHECK-NEXT: callx8 a2
+; CHECK-NEXT: or a2, a10, a10
+; CHECK-NEXT: retw
+ %1 = call i32 %a(i32 %b)
+ ret i32 %1
+}
More information about the llvm-commits
mailing list