[llvm] [Xtensa] Implement vararg support. (PR #117126)

Andrei Safronov via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 9 15:54:49 PST 2024


https://github.com/andreisfr updated https://github.com/llvm/llvm-project/pull/117126

>From d3db27e843b610c0397a119a89b3867f6a0b2a30 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Thu, 28 Nov 2024 00:29:21 +0300
Subject: [PATCH 1/6] [Xtensa] Implement variable arguments support.

---
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 233 +++++++++++++++++-
 llvm/lib/Target/Xtensa/XtensaISelLowering.h   |   9 +
 .../Target/Xtensa/XtensaMachineFunctionInfo.h |  16 +-
 llvm/test/CodeGen/Xtensa/vararg.ll            |  87 +++++++
 4 files changed, 338 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/Xtensa/vararg.ll

diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 5450222a7b2e1d..d04c82e533d5ae 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -14,6 +14,7 @@
 #include "XtensaISelLowering.h"
 #include "XtensaConstantPoolValue.h"
 #include "XtensaInstrInfo.h"
+#include "XtensaMachineFunctionInfo.h"
 #include "XtensaSubtarget.h"
 #include "XtensaTargetMachine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -133,6 +134,14 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
 
+  // VASTART and VACOPY need to deal with the Xtensa-specific varargs
+  // structure, but VAEND is a no-op.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  // we use special va_list structure so we have to customize this
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
   // Compute derived properties from the register classes
   computeRegisterProperties(STI.getRegisterInfo());
 }
@@ -211,6 +220,11 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint(
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
+unsigned XtensaTargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
+  // 2 * sizeof(int*) + sizeof(int)
+  return 3 * 4;
+}
+
 //===----------------------------------------------------------------------===//
 // Calling conventions
 //===----------------------------------------------------------------------===//
@@ -304,13 +318,14 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
+  XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
+  EVT PtrVT = getPointerTy(MF.getDataLayout());
+
+  XtensaFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
 
-  if (IsVarArg)
-    report_fatal_error("Var arg not supported by FormalArguments Lowering");
-
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
@@ -378,6 +393,68 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     }
   }
 
+  if (IsVarArg) {
+    static const MCPhysReg XtensaArgRegs[6] = {
+        Xtensa::A2, Xtensa::A3, Xtensa::A4, Xtensa::A5, Xtensa::A6, Xtensa::A7};
+    ArrayRef<MCPhysReg> ArgRegs = ArrayRef(XtensaArgRegs);
+    unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+    const TargetRegisterClass *RC = &Xtensa::ARRegClass;
+    MachineFrameInfo &MFI = MF.getFrameInfo();
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    unsigned RegSize = 4;
+    MVT RegTy = MVT::getIntegerVT(RegSize * 8);
+
+    XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register
+
+    XtensaFI->setVarArgsStackOffset(MFI.CreateFixedObject(
+        PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
+
+    // Offset of the first variable argument from stack pointer, and size of
+    // the vararg save area. For now, the varargs save area is either zero or
+    // large enough to hold a0-a7.
+    int VaArgOffset, VarArgsSaveSize;
+
+    // If all registers are allocated, then all varargs must be passed on the
+    // stack and we don't need to save any argregs.
+    if (ArgRegs.size() == Idx) {
+      VaArgOffset = CCInfo.getStackSize();
+      VarArgsSaveSize = 0;
+    } else {
+      VarArgsSaveSize = RegSize * (ArgRegs.size() - Idx);
+      VaArgOffset = -VarArgsSaveSize;
+    }
+
+    // Record the frame index of the first variable argument
+    // which is a value necessary to VASTART.
+    int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+    XtensaFI->setVarArgsFrameIndex(FI);
+
+    // Copy the integer registers that may have been used for passing varargs
+    // to the vararg save area.
+    for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) {
+      const unsigned Reg = RegInfo.createVirtualRegister(RC);
+      unsigned FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
+
+      // Argument passed in FrameReg we save in A8 (in emitPrologue),
+      // so load argument from A8
+      if (ArgRegs[I] == FrameReg) {
+        RegInfo.addLiveIn(Xtensa::A8, Reg);
+      } else {
+        RegInfo.addLiveIn(ArgRegs[I], Reg);
+      }
+
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
+      FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+      SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+                                   MachinePointerInfo::getFixedStack(MF, FI));
+      cast<StoreSDNode>(Store.getNode())
+          ->getMemOperand()
+          ->setValue((Value *)nullptr);
+      OutChains.push_back(Store);
+    }
+  }
+
   // All stores are grouped in one node to allow the matching between
   // the size of Ins and InVals. This only happens when on varg functions
   if (!OutChains.empty()) {
@@ -579,9 +656,6 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   const SmallVectorImpl<SDValue> &OutVals,
                                   const SDLoc &DL, SelectionDAG &DAG) const {
-  if (IsVarArg)
-    report_fatal_error("VarArg not supported");
-
   MachineFunction &MF = DAG.getMachineFunction();
 
   // Assign locations to each returned value.
@@ -859,6 +933,147 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
+  SDValue Chain = Op.getOperand(0);
+  SDValue Addr = Op.getOperand(1);
+  EVT PtrVT = Addr.getValueType();
+  SDLoc DL(Op);
+
+  // Struct va_list_tag
+  // int32 *va_stk - points to the arguments passed in memory
+  // int32 *va_reg - points to the registers with arguments saved in memory
+  // int32 va_ndx  - offset from va_stk or va_reg pointers which points to  the
+  // next variable argument
+
+  SDValue VAIndex;
+  SDValue StackOffsetFI =
+      DAG.getFrameIndex(XtensaFI->getVarArgsStackOffset(), PtrVT);
+  unsigned ArgWords = XtensaFI->getVarArgsFirstGPR() - 2;
+
+  // If first variable argument passed in registers (maximum words in registers
+  // is 6) then set va_ndx to the position of this argument in registers area
+  // stored in memory (va_reg pointer). Otherwise va_ndx should point to the
+  // position of the first variable argument on stack (va_stk pointer).
+  if (ArgWords < 6) {
+    VAIndex = DAG.getConstant(ArgWords * 4, DL, MVT::i32);
+  } else {
+    VAIndex = DAG.getConstant(32, DL, MVT::i32);
+  }
+
+  SDValue FrameIndex =
+      DAG.getFrameIndex(XtensaFI->getVarArgsFrameIndex(), PtrVT);
+  uint64_t FrameOffset = PtrVT.getStoreSize();
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+  // Store pointer to arguments given on stack (va_stk)
+  SDValue StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackOffsetFI,
+                                 DAG.getConstant(32, DL, PtrVT));
+  SDValue StoreStackPtr =
+      DAG.getStore(Chain, DL, StackPtr, Addr, MachinePointerInfo(SV));
+
+  uint64_t NextOffset = FrameOffset;
+  SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr,
+                                DAG.getConstant(NextOffset, DL, PtrVT));
+
+  // Store pointer to arguments given on registers (va_reg)
+  SDValue StoreRegPtr = DAG.getStore(StoreStackPtr, DL, FrameIndex, NextPtr,
+                                     MachinePointerInfo(SV, NextOffset));
+  NextOffset += FrameOffset;
+  NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr,
+                        DAG.getConstant(NextOffset, DL, PtrVT));
+
+  // Store third word : position in bytes of the first VA argument (va_ndx)
+  return DAG.getStore(StoreRegPtr, DL, VAIndex, NextPtr,
+                      MachinePointerInfo(SV, NextOffset));
+}
+
+SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  unsigned VAListSize = getVaListSizeInBits(DAG.getDataLayout()) / 8;
+  return DAG.getMemcpy(
+      Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
+      DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), Align(4),
+      /*isVolatile=*/false, /*AlwaysInline=*/false,
+      /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  EVT PtrVT = Op.getValueType();
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  SDLoc DL(Node);
+  auto &TD = DAG.getDataLayout();
+  Align ArgAlignment = TD.getPrefTypeAlign(VT.getTypeForEVT(*DAG.getContext()));
+  unsigned ArgAlignInBytes = ArgAlignment.value();
+  unsigned ArgSizeInBytes =
+      TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
+  unsigned VASizeInBytes = (ArgSizeInBytes + 3) & 0x3;
+
+  // va_stk
+  SDValue VAStack =
+      DAG.getLoad(MVT::i32, DL, InChain, VAListPtr, MachinePointerInfo());
+  InChain = VAStack.getValue(1);
+
+  // va_reg
+  SDValue VARegPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAListPtr,
+                                 DAG.getConstant(4, DL, MVT::i32));
+  SDValue VAReg =
+      DAG.getLoad(MVT::i32, DL, InChain, VARegPtr, MachinePointerInfo());
+  InChain = VAReg.getValue(1);
+
+  // va_ndx
+  SDValue VarArgIndexPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VARegPtr,
+                                       DAG.getConstant(4, DL, MVT::i32));
+  SDValue VAIndex =
+      DAG.getLoad(MVT::i32, DL, InChain, VarArgIndexPtr, MachinePointerInfo());
+  InChain = VAIndex.getValue(1);
+
+  SDValue OrigIndex = VAIndex;
+
+  if (ArgAlignInBytes > 4) {
+    OrigIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex,
+                            DAG.getConstant(ArgAlignInBytes - 1, DL, MVT::i32));
+    OrigIndex = DAG.getNode(ISD::AND, DL, PtrVT, OrigIndex,
+                            DAG.getConstant(-ArgAlignInBytes, DL, MVT::i32));
+  }
+
+  VAIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex,
+                        DAG.getConstant(VASizeInBytes, DL, MVT::i32));
+
+  SDValue CC = DAG.getSetCC(DL, MVT::i32, OrigIndex,
+                            DAG.getConstant(6 * 4, DL, MVT::i32), ISD::SETLE);
+
+  SDValue StkIndex =
+      DAG.getNode(ISD::ADD, DL, PtrVT, VAIndex,
+                  DAG.getConstant(32 + VASizeInBytes, DL, MVT::i32));
+
+  CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32),
+                    ISD::SETLE);
+
+  SDValue Array = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAReg, VAStack);
+
+  VAIndex = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAIndex, StkIndex);
+
+  CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32),
+                    ISD::SETLE);
+
+  SDValue VAIndexStore = DAG.getStore(InChain, DL, VAIndex, VarArgIndexPtr,
+                                      MachinePointerInfo(SV));
+  InChain = VAIndexStore;
+
+  SDValue Addr = DAG.getNode(ISD::SUB, DL, PtrVT, VAIndex,
+                             DAG.getConstant(VASizeInBytes, DL, MVT::i32));
+
+  Addr = DAG.getNode(ISD::ADD, DL, PtrVT, Array, Addr);
+
+  return DAG.getLoad(VT, DL, InChain, Addr, MachinePointerInfo());
+}
+
 SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -1001,6 +1216,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
     return LowerFRAMEADDR(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG);
+  case ISD::VACOPY:
+    return LowerVACOPY(Op, DAG);
   case ISD::SHL_PARTS:
     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index f1cd00c41437a4..973b51cd73e4b7 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -74,6 +74,9 @@ class XtensaTargetLowering : public TargetLowering {
 
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
+  /// Returns the size of the platform's va_list object.
+  unsigned getVaListSizeInBits(const DataLayout &DL) const override;
+
   const char *getTargetNodeName(unsigned Opcode) const override;
 
   std::pair<unsigned, const TargetRegisterClass *>
@@ -148,6 +151,12 @@ class XtensaTargetLowering : public TargetLowering {
 
   SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index c38c060b9387ff..36fbd018bb8c9f 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -24,10 +24,14 @@ namespace llvm {
 class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   /// FrameIndex of the spill slot for the scratch register in BranchRelaxation.
   int BranchRelaxationScratchFrameIndex = -1;
+  unsigned VarArgsFirstGPR;
+  int VarArgsStackOffset;
+  unsigned VarArgsFrameIndex;
 
 public:
   explicit XtensaMachineFunctionInfo(const Function &F,
-                                     const TargetSubtargetInfo *STI) {}
+                                     const TargetSubtargetInfo *STI)
+      : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {}
 
   int getBranchRelaxationScratchFrameIndex() const {
     return BranchRelaxationScratchFrameIndex;
@@ -35,6 +39,16 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   void setBranchRelaxationScratchFrameIndex(int Index) {
     BranchRelaxationScratchFrameIndex = Index;
   }
+
+  unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+  void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+  int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+  void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+  // Get and set the frame index of the first stack vararg.
+  unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
 };
 
 } // namespace llvm
diff --git a/llvm/test/CodeGen/Xtensa/vararg.ll b/llvm/test/CodeGen/Xtensa/vararg.ll
new file mode 100644
index 00000000000000..baf1bd34a31249
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/vararg.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s --mtriple=xtensa | FileCheck %s
+
+define void @test(...) {
+; CHECK-LABEL: test:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    s32i a7, a1, 20
+; CHECK-NEXT:    s32i a6, a1, 16
+; CHECK-NEXT:    s32i a5, a1, 12
+; CHECK-NEXT:    s32i a4, a1, 8
+; CHECK-NEXT:    s32i a3, a1, 4
+; CHECK-NEXT:    s32i a2, a1, 0
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  ret void
+}
+
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+declare void @f(i32) nounwind
+define void @test_vararg(...) nounwind {
+; CHECK-LABEL: test_vararg:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -48
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 12 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a13, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 36
+; CHECK-NEXT:    s32i a6, a1, 32
+; CHECK-NEXT:    s32i a5, a1, 28
+; CHECK-NEXT:    s32i a4, a1, 24
+; CHECK-NEXT:    s32i a3, a1, 20
+; CHECK-NEXT:    s32i a2, a1, 16
+; CHECK-NEXT:    movi a8, 0
+; CHECK-NEXT:    s32i a8, a1, 8
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    s32i a8, a1, 4
+; CHECK-NEXT:    addi a8, a1, 48
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    l32r a13, .LCPI1_0
+; CHECK-NEXT:    j .LBB1_2
+; CHECK-NEXT:  .LBB1_1: # %for.cond
+; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    s32i a8, a1, 8
+; CHECK-NEXT:    add a8, a8, a9
+; CHECK-NEXT:    addi a8, a8, -3
+; CHECK-NEXT:    l32i a2, a8, 0
+; CHECK-NEXT:    callx0 a13
+; CHECK-NEXT:  .LBB1_2: # %for.cond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    l32i a10, a1, 8
+; CHECK-NEXT:    addi a8, a10, 3
+; CHECK-NEXT:    blt a12, a8, .LBB1_4
+; CHECK-NEXT:  # %bb.3: # %for.cond
+; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    bge a12, a8, .LBB1_1
+; CHECK-NEXT:    j .LBB1_5
+; CHECK-NEXT:  .LBB1_4: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    l32i a9, a1, 0
+; CHECK-NEXT:    bge a12, a8, .LBB1_1
+; CHECK-NEXT:  .LBB1_5: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    addi a8, a10, 38
+; CHECK-NEXT:    j .LBB1_1
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+  br label %for.cond
+
+for.cond:
+  %0 = va_arg ptr %list, i32
+  call void @f(i32 %0)
+  br label %for.cond
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}

>From 467e1d09a17f22e04af47dc3b2b9ee099d28965c Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Wed, 4 Dec 2024 22:23:28 +0300
Subject: [PATCH 2/6] [Xtensa] Minor fixes.

---
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 85 +++++++------------
 llvm/lib/Target/Xtensa/XtensaISelLowering.h   |  3 -
 .../Target/Xtensa/XtensaMachineFunctionInfo.h |  8 +-
 3 files changed, 35 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index d04c82e533d5ae..ee5409431b973a 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -134,10 +134,9 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
 
-  // VASTART and VACOPY need to deal with the Xtensa-specific varargs
+  // VASTART, VAARG and VACOPY need to deal with the Xtensa-specific varargs
   // structure, but VAEND is a no-op.
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
-  // we use special va_list structure so we have to customize this
   setOperationAction(ISD::VAARG, MVT::Other, Custom);
   setOperationAction(ISD::VACOPY, MVT::Other, Custom);
   setOperationAction(ISD::VAEND, MVT::Other, Expand);
@@ -220,23 +219,18 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint(
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-unsigned XtensaTargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
-  // 2 * sizeof(int*) + sizeof(int)
-  return 3 * 4;
-}
-
 //===----------------------------------------------------------------------===//
 // Calling conventions
 //===----------------------------------------------------------------------===//
 
 #include "XtensaGenCallingConv.inc"
 
+static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4,
+                                    Xtensa::A5, Xtensa::A6, Xtensa::A7};
+
 static bool CC_Xtensa_Custom(unsigned ValNo, MVT ValVT, MVT LocVT,
                              CCValAssign::LocInfo LocInfo,
                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
-  static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4,
-                                      Xtensa::A5, Xtensa::A6, Xtensa::A7};
-
   if (ArgFlags.isByVal()) {
     Align ByValAlign = ArgFlags.getNonZeroByValAlign();
     unsigned ByValSize = ArgFlags.getByValSize();
@@ -319,9 +313,6 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   XtensaMachineFunctionInfo *XtensaFI = MF.getInfo<XtensaMachineFunctionInfo>();
-  EVT PtrVT = getPointerTy(MF.getDataLayout());
-
-  XtensaFI->setVarArgsFrameIndex(0);
 
   // Used with vargs to acumulate store chains.
   std::vector<SDValue> OutChains;
@@ -338,16 +329,13 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     // Arguments stored on registers
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
-      const TargetRegisterClass *RC;
 
-      if (RegVT == MVT::i32)
-        RC = &Xtensa::ARRegClass;
-      else
+      if (RegVT != MVT::i32)
         report_fatal_error("RegVT not supported by FormalArguments Lowering");
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      unsigned Register = MF.addLiveIn(VA.getLocReg(), RC);
+      unsigned Register = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
@@ -394,20 +382,18 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
   }
 
   if (IsVarArg) {
-    static const MCPhysReg XtensaArgRegs[6] = {
-        Xtensa::A2, Xtensa::A3, Xtensa::A4, Xtensa::A5, Xtensa::A6, Xtensa::A7};
-    ArrayRef<MCPhysReg> ArgRegs = ArrayRef(XtensaArgRegs);
+    ArrayRef<MCPhysReg> ArgRegs = ArrayRef(IntRegs);
     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
     const TargetRegisterClass *RC = &Xtensa::ARRegClass;
     MachineFrameInfo &MFI = MF.getFrameInfo();
     MachineRegisterInfo &RegInfo = MF.getRegInfo();
     unsigned RegSize = 4;
-    MVT RegTy = MVT::getIntegerVT(RegSize * 8);
+    MVT RegTy = MVT::i32;
 
     XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register
 
-    XtensaFI->setVarArgsStackOffset(MFI.CreateFixedObject(
-        PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
+    XtensaFI->setVarArgsOnStackFrameIndex(
+        MFI.CreateFixedObject(4, CCInfo.getStackSize(), true));
 
     // Offset of the first variable argument from stack pointer, and size of
     // the vararg save area. For now, the varargs save area is either zero or
@@ -422,36 +408,26 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
     } else {
       VarArgsSaveSize = RegSize * (ArgRegs.size() - Idx);
       VaArgOffset = -VarArgsSaveSize;
-    }
 
-    // Record the frame index of the first variable argument
-    // which is a value necessary to VASTART.
-    int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
-    XtensaFI->setVarArgsFrameIndex(FI);
-
-    // Copy the integer registers that may have been used for passing varargs
-    // to the vararg save area.
-    for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) {
-      const unsigned Reg = RegInfo.createVirtualRegister(RC);
-      unsigned FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
-
-      // Argument passed in FrameReg we save in A8 (in emitPrologue),
-      // so load argument from A8
-      if (ArgRegs[I] == FrameReg) {
-        RegInfo.addLiveIn(Xtensa::A8, Reg);
-      } else {
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+      XtensaFI->setVarArgsInRegsFrameIndex(FI);
+
+      // Copy the integer registers that may have been used for passing varargs
+      // to the vararg save area.
+      for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) {
+        const Register Reg = RegInfo.createVirtualRegister(RC);
         RegInfo.addLiveIn(ArgRegs[I], Reg);
-      }
 
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
-      FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
-      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-      SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
-                                   MachinePointerInfo::getFixedStack(MF, FI));
-      cast<StoreSDNode>(Store.getNode())
-          ->getMemOperand()
-          ->setValue((Value *)nullptr);
-      OutChains.push_back(Store);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
+        FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
+        SDValue PtrOff =
+            DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+        SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+                                     MachinePointerInfo::getFixedStack(MF, FI));
+        OutChains.push_back(Store);
+      }
     }
   }
 
@@ -950,7 +926,7 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
 
   SDValue VAIndex;
   SDValue StackOffsetFI =
-      DAG.getFrameIndex(XtensaFI->getVarArgsStackOffset(), PtrVT);
+      DAG.getFrameIndex(XtensaFI->getVarArgsOnStackFrameIndex(), PtrVT);
   unsigned ArgWords = XtensaFI->getVarArgsFirstGPR() - 2;
 
   // If first variable argument passed in registers (maximum words in registers
@@ -964,7 +940,7 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
   }
 
   SDValue FrameIndex =
-      DAG.getFrameIndex(XtensaFI->getVarArgsFrameIndex(), PtrVT);
+      DAG.getFrameIndex(XtensaFI->getVarArgsInRegsFrameIndex(), PtrVT);
   uint64_t FrameOffset = PtrVT.getStoreSize();
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
@@ -991,7 +967,8 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
 }
 
 SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
-  unsigned VAListSize = getVaListSizeInBits(DAG.getDataLayout()) / 8;
+  // Size of the va_list_tag structure
+  constexpr unsigned VAListSize = 3 * 4;
   return DAG.getMemcpy(
       Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
       DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), Align(4),
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index 973b51cd73e4b7..cebd7d2016c8ee 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -74,9 +74,6 @@ class XtensaTargetLowering : public TargetLowering {
 
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
-  /// Returns the size of the platform's va_list object.
-  unsigned getVaListSizeInBits(const DataLayout &DL) const override;
-
   const char *getTargetNodeName(unsigned Opcode) const override;
 
   std::pair<unsigned, const TargetRegisterClass *>
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index 36fbd018bb8c9f..86b0dad4403a4b 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -43,12 +43,12 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
   void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
 
-  int getVarArgsStackOffset() const { return VarArgsStackOffset; }
-  void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+  int getVarArgsOnStackFrameIndex() const { return VarArgsStackOffset; }
+  void setVarArgsOnStackFrameIndex(int Offset) { VarArgsStackOffset = Offset; }
 
   // Get and set the frame index of the first stack vararg.
-  unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
-  void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+  unsigned getVarArgsInRegsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
 };
 
 } // namespace llvm

>From dfb7ee4ee9a66009f5b4cdfcad8337d0174fcbdf Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Thu, 5 Dec 2024 01:04:15 +0300
Subject: [PATCH 3/6] [Xtensa] Minor fixes.

Fix variable names in XtensaMachineFunctionInfo class. Fix
LowerFormalArguments function.
---
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 16 +++++++-------
 .../Target/Xtensa/XtensaMachineFunctionInfo.h | 21 ++++++++++++-------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index ee5409431b973a..4f02acf9800384 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -335,8 +335,8 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      unsigned Register = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT);
+      Register Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
       // to 32 bits.  Insert an assert[sz]ext to capture this, then
@@ -382,8 +382,8 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
   }
 
   if (IsVarArg) {
-    ArrayRef<MCPhysReg> ArgRegs = ArrayRef(IntRegs);
-    unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+    unsigned Idx = CCInfo.getFirstUnallocated(IntRegs);
+    unsigned ArgRegsNum = std::size(IntRegs);
     const TargetRegisterClass *RC = &Xtensa::ARRegClass;
     MachineFrameInfo &MFI = MF.getFrameInfo();
     MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -402,11 +402,11 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
 
     // If all registers are allocated, then all varargs must be passed on the
     // stack and we don't need to save any argregs.
-    if (ArgRegs.size() == Idx) {
+    if (ArgRegsNum == Idx) {
       VaArgOffset = CCInfo.getStackSize();
       VarArgsSaveSize = 0;
     } else {
-      VarArgsSaveSize = RegSize * (ArgRegs.size() - Idx);
+      VarArgsSaveSize = RegSize * (ArgRegsNum - Idx);
       VaArgOffset = -VarArgsSaveSize;
 
       // Record the frame index of the first variable argument
@@ -416,9 +416,9 @@ SDValue XtensaTargetLowering::LowerFormalArguments(
 
       // Copy the integer registers that may have been used for passing varargs
       // to the vararg save area.
-      for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) {
+      for (unsigned I = Idx; I < ArgRegsNum; ++I, VaArgOffset += RegSize) {
         const Register Reg = RegInfo.createVirtualRegister(RC);
-        RegInfo.addLiveIn(ArgRegs[I], Reg);
+        RegInfo.addLiveIn(IntRegs[I], Reg);
 
         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
         FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true);
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index 86b0dad4403a4b..f7f6922bec0413 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -25,13 +25,14 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   /// FrameIndex of the spill slot for the scratch register in BranchRelaxation.
   int BranchRelaxationScratchFrameIndex = -1;
   unsigned VarArgsFirstGPR;
-  int VarArgsStackOffset;
-  unsigned VarArgsFrameIndex;
+  unsigned VarArgsOnStackFrameIndex;
+  unsigned VarArgsInRegsFrameIndex;
 
 public:
   explicit XtensaMachineFunctionInfo(const Function &F,
                                      const TargetSubtargetInfo *STI)
-      : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {}
+      : VarArgsFirstGPR(0), VarArgsOnStackFrameIndex(0),
+        VarArgsInRegsFrameIndex(0) {}
 
   int getBranchRelaxationScratchFrameIndex() const {
     return BranchRelaxationScratchFrameIndex;
@@ -43,12 +44,18 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
   void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
 
-  int getVarArgsOnStackFrameIndex() const { return VarArgsStackOffset; }
-  void setVarArgsOnStackFrameIndex(int Offset) { VarArgsStackOffset = Offset; }
+  unsigned getVarArgsOnStackFrameIndex() const {
+    return VarArgsOnStackFrameIndex;
+  }
+  void setVarArgsOnStackFrameIndex(unsigned FI) {
+    VarArgsOnStackFrameIndex = FI;
+  }
 
   // Get and set the frame index of the first stack vararg.
-  unsigned getVarArgsInRegsFrameIndex() const { return VarArgsFrameIndex; }
-  void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+  unsigned getVarArgsInRegsFrameIndex() const {
+    return VarArgsInRegsFrameIndex;
+  }
+  void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsInRegsFrameIndex = FI; }
 };
 
 } // namespace llvm

>From 3998c2f8caf2ea4f1d4eb1cbed6eb79e6fcdcfb3 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Thu, 5 Dec 2024 01:19:50 +0300
Subject: [PATCH 4/6] [Xtensa] Fix XtensaMachineFunctionInfo.

---
 .../Target/Xtensa/XtensaMachineFunctionInfo.h  | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
index f7f6922bec0413..c430562091ba75 100644
--- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -25,8 +25,8 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   /// FrameIndex of the spill slot for the scratch register in BranchRelaxation.
   int BranchRelaxationScratchFrameIndex = -1;
   unsigned VarArgsFirstGPR;
-  unsigned VarArgsOnStackFrameIndex;
-  unsigned VarArgsInRegsFrameIndex;
+  int VarArgsOnStackFrameIndex;
+  int VarArgsInRegsFrameIndex;
 
 public:
   explicit XtensaMachineFunctionInfo(const Function &F,
@@ -44,18 +44,12 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo {
   unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
   void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
 
-  unsigned getVarArgsOnStackFrameIndex() const {
-    return VarArgsOnStackFrameIndex;
-  }
-  void setVarArgsOnStackFrameIndex(unsigned FI) {
-    VarArgsOnStackFrameIndex = FI;
-  }
+  int getVarArgsOnStackFrameIndex() const { return VarArgsOnStackFrameIndex; }
+  void setVarArgsOnStackFrameIndex(int FI) { VarArgsOnStackFrameIndex = FI; }
 
   // Get and set the frame index of the first stack vararg.
-  unsigned getVarArgsInRegsFrameIndex() const {
-    return VarArgsInRegsFrameIndex;
-  }
-  void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsInRegsFrameIndex = FI; }
+  int getVarArgsInRegsFrameIndex() const { return VarArgsInRegsFrameIndex; }
+  void setVarArgsInRegsFrameIndex(int FI) { VarArgsInRegsFrameIndex = FI; }
 };
 
 } // namespace llvm

>From 862f1de8b894577cd760cc5422040b0c9c0f1b1d Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Fri, 6 Dec 2024 16:35:40 +0300
Subject: [PATCH 5/6] [Xtensa] Minor fixes in lowering VASTART/VAARG.

---
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 4f02acf9800384..6a2c7de3062932 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -947,19 +947,19 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
   // Store pointer to arguments given on stack (va_stk)
   SDValue StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackOffsetFI,
                                  DAG.getConstant(32, DL, PtrVT));
+
   SDValue StoreStackPtr =
       DAG.getStore(Chain, DL, StackPtr, Addr, MachinePointerInfo(SV));
 
   uint64_t NextOffset = FrameOffset;
-  SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr,
-                                DAG.getConstant(NextOffset, DL, PtrVT));
+  SDValue NextPtr =
+      DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset));
 
   // Store pointer to arguments given on registers (va_reg)
   SDValue StoreRegPtr = DAG.getStore(StoreStackPtr, DL, FrameIndex, NextPtr,
                                      MachinePointerInfo(SV, NextOffset));
   NextOffset += FrameOffset;
-  NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr,
-                        DAG.getConstant(NextOffset, DL, PtrVT));
+  NextPtr = DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset));
 
   // Store third word : position in bytes of the first VA argument (va_ndx)
   return DAG.getStore(StoreRegPtr, DL, VAIndex, NextPtr,
@@ -997,15 +997,15 @@ SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   InChain = VAStack.getValue(1);
 
   // va_reg
-  SDValue VARegPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAListPtr,
-                                 DAG.getConstant(4, DL, MVT::i32));
+  SDValue VARegPtr =
+      DAG.getObjectPtrOffset(DL, VAListPtr, TypeSize::getFixed(4));
   SDValue VAReg =
       DAG.getLoad(MVT::i32, DL, InChain, VARegPtr, MachinePointerInfo());
   InChain = VAReg.getValue(1);
 
   // va_ndx
-  SDValue VarArgIndexPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VARegPtr,
-                                       DAG.getConstant(4, DL, MVT::i32));
+  SDValue VarArgIndexPtr =
+      DAG.getObjectPtrOffset(DL, VARegPtr, TypeSize::getFixed(4));
   SDValue VAIndex =
       DAG.getLoad(MVT::i32, DL, InChain, VarArgIndexPtr, MachinePointerInfo());
   InChain = VAIndex.getValue(1);

>From ae7bff062a4eb45b6179e571ced800251f3a6671 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Tue, 10 Dec 2024 02:50:44 +0300
Subject: [PATCH 6/6] [Xtensa] Fix lowering VACOPY/VAARG.

---
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp |  25 +-
 llvm/test/CodeGen/Xtensa/vararg.ll            | 537 ++++++++++++++++--
 2 files changed, 507 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 6a2c7de3062932..45bde4a180c1e0 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -969,27 +969,34 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op,
 SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   // Size of the va_list_tag structure
   constexpr unsigned VAListSize = 3 * 4;
-  return DAG.getMemcpy(
-      Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
-      DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), Align(4),
-      /*isVolatile=*/false, /*AlwaysInline=*/false,
-      /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
+  SDValue Chain = Op.getOperand(0);
+  SDValue DstPtr = Op.getOperand(1);
+  SDValue SrcPtr = Op.getOperand(2);
+  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  SDLoc DL(Op);
+
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+                       DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32),
+                       Align(4), /*isVolatile*/ false, /*AlwaysInline*/ true,
+                       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
+                       MachinePointerInfo(SrcSV));
 }
 
 SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   EVT VT = Node->getValueType(0);
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
   EVT PtrVT = Op.getValueType();
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   SDLoc DL(Node);
   auto &TD = DAG.getDataLayout();
-  Align ArgAlignment = TD.getPrefTypeAlign(VT.getTypeForEVT(*DAG.getContext()));
+  Align ArgAlignment = TD.getABITypeAlign(Ty);
   unsigned ArgAlignInBytes = ArgAlignment.value();
-  unsigned ArgSizeInBytes =
-      TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
-  unsigned VASizeInBytes = (ArgSizeInBytes + 3) & 0x3;
+  unsigned ArgSizeInBytes = TD.getTypeAllocSize(Ty);
+  unsigned VASizeInBytes = llvm::alignTo(ArgSizeInBytes, 4);
 
   // va_stk
   SDValue VAStack =
diff --git a/llvm/test/CodeGen/Xtensa/vararg.ll b/llvm/test/CodeGen/Xtensa/vararg.ll
index baf1bd34a31249..d85752e11fa6bb 100644
--- a/llvm/test/CodeGen/Xtensa/vararg.ll
+++ b/llvm/test/CodeGen/Xtensa/vararg.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s --mtriple=xtensa | FileCheck %s
 
-define void @test(...) {
-; CHECK-LABEL: test:
+define void @vararg(...) {
+; CHECK-LABEL: vararg:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    addi a8, a1, -32
@@ -21,66 +21,511 @@ entry:
   ret void
 }
 
-
 declare void @llvm.va_start(ptr) nounwind
 declare void @llvm.va_end(ptr) nounwind
-declare void @f(i32) nounwind
-define void @test_vararg(...) nounwind {
-; CHECK-LABEL: test_vararg:
+declare void @f_i32(i32) nounwind
+declare void @f_i64(i64) nounwind
+
+define void @vararg_fixed_0(...) nounwind {
+; CHECK-LABEL: vararg_fixed_0:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi a8, a1, -48
 ; CHECK-NEXT:    or a1, a8, a8
-; CHECK-NEXT:    s32i a0, a1, 12 # 4-byte Folded Spill
-; CHECK-NEXT:    s32i a12, a1, 8 # 4-byte Folded Spill
-; CHECK-NEXT:    s32i a13, a1, 4 # 4-byte Folded Spill
-; CHECK-NEXT:    s32i a7, a1, 36
-; CHECK-NEXT:    s32i a6, a1, 32
-; CHECK-NEXT:    s32i a5, a1, 28
-; CHECK-NEXT:    s32i a4, a1, 24
-; CHECK-NEXT:    s32i a3, a1, 20
-; CHECK-NEXT:    s32i a2, a1, 16
-; CHECK-NEXT:    movi a8, 0
-; CHECK-NEXT:    s32i a8, a1, 8
-; CHECK-NEXT:    addi a8, a1, 16
-; CHECK-NEXT:    s32i a8, a1, 4
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 32
+; CHECK-NEXT:    s32i a6, a1, 28
+; CHECK-NEXT:    s32i a5, a1, 24
+; CHECK-NEXT:    s32i a4, a1, 20
+; CHECK-NEXT:    s32i a3, a1, 16
+; CHECK-NEXT:    s32i a2, a1, 12
+; CHECK-NEXT:    addi a10, a1, 12
+; CHECK-NEXT:    s32i a10, a1, 4
 ; CHECK-NEXT:    addi a8, a1, 48
 ; CHECK-NEXT:    addi a8, a8, -32
 ; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 4
 ; CHECK-NEXT:    movi a12, 24
-; CHECK-NEXT:    l32r a13, .LCPI1_0
-; CHECK-NEXT:    j .LBB1_2
-; CHECK-NEXT:  .LBB1_1: # %for.cond
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    s32i a8, a1, 8
-; CHECK-NEXT:    add a8, a8, a9
-; CHECK-NEXT:    addi a8, a8, -3
+; CHECK-NEXT:    blt a12, a9, .LBB1_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB1_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB1_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 40
+; CHECK-NEXT:  .LBB1_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
 ; CHECK-NEXT:    l32i a2, a8, 0
-; CHECK-NEXT:    callx0 a13
-; CHECK-NEXT:  .LBB1_2: # %for.cond
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    l32i a10, a1, 8
-; CHECK-NEXT:    addi a8, a10, 3
-; CHECK-NEXT:    blt a12, a8, .LBB1_4
-; CHECK-NEXT:  # %bb.3: # %for.cond
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    l32r a8, .LCPI1_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
 ; CHECK-NEXT:    l32i a9, a1, 4
-; CHECK-NEXT:    bge a12, a8, .LBB1_1
-; CHECK-NEXT:    j .LBB1_5
-; CHECK-NEXT:  .LBB1_4: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    l32i a9, a1, 0
-; CHECK-NEXT:    bge a12, a8, .LBB1_1
-; CHECK-NEXT:  .LBB1_5: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    addi a8, a10, 38
-; CHECK-NEXT:    j .LBB1_1
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB1_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB1_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB1_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB1_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB1_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB1_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB1_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB1_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI1_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 48
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
 entry:
   %list = alloca ptr, align 4
   call void @llvm.va_start(ptr %list)
-  br label %for.cond
 
-for.cond:
   %0 = va_arg ptr %list, i32
-  call void @f(i32 %0)
-  br label %for.cond
+  call void @f_i32(i32 %0)
+  %1 = va_arg ptr %list, i64
+  call void @f_i64(i64 %1)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_1(i32 %a1, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 28
+; CHECK-NEXT:    s32i a6, a1, 24
+; CHECK-NEXT:    s32i a5, a1, 20
+; CHECK-NEXT:    s32i a4, a1, 16
+; CHECK-NEXT:    s32i a3, a1, 12
+; CHECK-NEXT:    addi a10, a1, 12
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 8
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB2_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB2_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 44
+; CHECK-NEXT:  .LBB2_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI2_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB2_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB2_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB2_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB2_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB2_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB2_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB2_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB2_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI2_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 16
+; CHECK-NEXT:    s32i a6, a1, 12
+; CHECK-NEXT:    addi a10, a1, 12
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 20
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB3_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB3_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB3_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 56
+; CHECK-NEXT:  .LBB3_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI3_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB3_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB3_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB3_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB3_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB3_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB3_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB3_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB3_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI3_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 32
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_5(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a7, a1, 12
+; CHECK-NEXT:    addi a9, a1, 12
+; CHECK-NEXT:    s32i a9, a1, 4
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a12, .LBB4_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB4_2: # %entry
+; CHECK-NEXT:    blt a12, a12, .LBB4_4
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    or a9, a12, a12
+; CHECK-NEXT:    j .LBB4_5
+; CHECK-NEXT:  .LBB4_4:
+; CHECK-NEXT:    movi a9, 60
+; CHECK-NEXT:  .LBB4_5: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI4_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB4_7
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB4_7: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB4_9
+; CHECK-NEXT:  # %bb.8:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB4_9: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB4_11
+; CHECK-NEXT:  # %bb.10: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB4_11: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB4_13
+; CHECK-NEXT:  # %bb.12:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB4_13: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI4_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_6(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    addi a10, a1, 0
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 36
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB5_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB5_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB5_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 72
+; CHECK-NEXT:  .LBB5_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI5_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB5_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB5_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB5_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB5_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB5_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB5_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB5_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB5_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI5_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
+
+  call void @llvm.va_end(ptr %list)
+  ret void
+}
+
+define void @vararg_fixed_7(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, ...) nounwind {
+; CHECK-LABEL: vararg_fixed_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a8, a1, -16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
+; CHECK-NEXT:    s32i a12, a1, 4 # 4-byte Folded Spill
+; CHECK-NEXT:    addi a10, a1, 0
+; CHECK-NEXT:    s32i a10, a1, 4
+; CHECK-NEXT:    addi a8, a1, 20
+; CHECK-NEXT:    addi a8, a8, -32
+; CHECK-NEXT:    s32i a8, a1, 0
+; CHECK-NEXT:    movi a9, 36
+; CHECK-NEXT:    movi a12, 24
+; CHECK-NEXT:    blt a12, a9, .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    or a8, a10, a10
+; CHECK-NEXT:  .LBB6_2: # %entry
+; CHECK-NEXT:    bge a12, a9, .LBB6_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    movi a9, 72
+; CHECK-NEXT:  .LBB6_4: # %entry
+; CHECK-NEXT:    s32i a9, a1, 8
+; CHECK-NEXT:    add a8, a9, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a8, a8, 0
+; CHECK-NEXT:    add a2, a8, a2
+; CHECK-NEXT:    l32r a8, .LCPI6_0
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a7, a1, 8
+; CHECK-NEXT:    addi a10, a7, 4
+; CHECK-NEXT:    l32i a9, a1, 4
+; CHECK-NEXT:    l32i a8, a1, 0
+; CHECK-NEXT:    or a11, a8, a8
+; CHECK-NEXT:    blt a12, a10, .LBB6_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    or a11, a9, a9
+; CHECK-NEXT:  .LBB6_6: # %entry
+; CHECK-NEXT:    bge a12, a10, .LBB6_8
+; CHECK-NEXT:  # %bb.7:
+; CHECK-NEXT:    addi a10, a7, 40
+; CHECK-NEXT:  .LBB6_8: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a10, a10, a11
+; CHECK-NEXT:    addi a7, a10, -4
+; CHECK-NEXT:    l32i a11, a1, 8
+; CHECK-NEXT:    addi a10, a11, 4
+; CHECK-NEXT:    blt a12, a10, .LBB6_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    or a8, a9, a9
+; CHECK-NEXT:  .LBB6_10: # %entry
+; CHECK-NEXT:    l32i a2, a7, 0
+; CHECK-NEXT:    bge a12, a10, .LBB6_12
+; CHECK-NEXT:  # %bb.11:
+; CHECK-NEXT:    addi a10, a11, 40
+; CHECK-NEXT:  .LBB6_12: # %entry
+; CHECK-NEXT:    s32i a10, a1, 8
+; CHECK-NEXT:    add a8, a10, a8
+; CHECK-NEXT:    addi a8, a8, -4
+; CHECK-NEXT:    l32i a3, a8, 0
+; CHECK-NEXT:    l32r a8, .LCPI6_1
+; CHECK-NEXT:    callx0 a8
+; CHECK-NEXT:    l32i a12, a1, 4 # 4-byte Folded Reload
+; CHECK-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
+; CHECK-NEXT:    addi a8, a1, 16
+; CHECK-NEXT:    or a1, a8, a8
+; CHECK-NEXT:    ret
+entry:
+  %list = alloca ptr, align 4
+  call void @llvm.va_start(ptr %list)
+
+  %va32 = va_arg ptr %list, i32
+  %sum = add nsw i32 %va32, %a1
+  call void @f_i32(i32 %sum)
+
+  %va64 = va_arg ptr %list, i64
+  call void @f_i64(i64 %va64)
 
   call void @llvm.va_end(ptr %list)
   ret void



More information about the llvm-commits mailing list