[llvm] 0c2b738 - [LoongArch] Support for varargs
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 7 23:02:21 PDT 2022
Author: wanglei
Date: 2022-08-08T14:01:24+08:00
New Revision: 0c2b738f8f1d252d31f1075f040af664802b9bac
URL: https://github.com/llvm/llvm-project/commit/0c2b738f8f1d252d31f1075f040af664802b9bac
DIFF: https://github.com/llvm/llvm-project/commit/0c2b738f8f1d252d31f1075f040af664802b9bac.diff
LOG: [LoongArch] Support for varargs
This patch ensures the `$fp` always points to the bottom of the vararg
spill region.
Includes support for expand `ISD::DYNAMIC_STACKALLOC`.
Differential Revision: https://reviews.llvm.org/D130250
Added:
llvm/test/CodeGen/LoongArch/vararg.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 0d9ec9e2eaaa..59b462a6f31b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -87,6 +87,7 @@ void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const {
void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
const LoongArchInstrInfo *TII = STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -138,11 +139,14 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
// Generate new FP.
if (hasFP(MF)) {
- adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup);
-
- // Emit ".cfi_def_cfa $fp, 0"
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, RI->getDwarfRegNum(FPReg, true), 0));
+ adjustReg(MBB, MBBI, DL, FPReg, SPReg,
+ StackSize - LoongArchFI->getVarArgsSaveSize(),
+ MachineInstr::FrameSetup);
+
+ // Emit ".cfi_def_cfa $fp, LoongArchFI->getVarArgsSaveSize()"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, RI->getDwarfRegNum(FPReg, true),
+ LoongArchFI->getVarArgsSaveSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
@@ -153,6 +157,7 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
Register SPReg = LoongArch::R3;
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
@@ -170,7 +175,8 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the stack pointer.
if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) {
assert(hasFP(MF) && "frame pointer should not have been eliminated");
- adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize,
+ adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22,
+ -StackSize + LoongArchFI->getVarArgsSaveSize(),
MachineInstr::FrameDestroy);
}
@@ -193,10 +199,49 @@ void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(LoongArchABI::getBPReg());
}
+// Do not preserve stack space within prologue for outgoing variables if the
+// function contains variable size objects.
+// Let eliminateCallFramePseudoInstr preserve stack space for it.
+bool LoongArchFrameLowering::hasReservedCallFrame(
+ const MachineFunction &MF) const {
+ return !MF.getFrameInfo().hasVarSizedObjects();
+}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
+MachineBasicBlock::iterator
+LoongArchFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ Register SPReg = LoongArch::R3;
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (!hasReservedCallFrame(MF)) {
+ // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
+ // ADJCALLSTACKUP must be converted to instructions manipulating the stack
+ // pointer. This is necessary when there is a variable length stack
+ // allocation (e.g. alloca), which means it's not possible to allocate
+ // space for outgoing arguments from within the function prologue.
+ int64_t Amount = MI->getOperand(0).getImm();
+
+ if (Amount != 0) {
+ // Ensure the stack remains aligned after adjustment.
+ Amount = alignSPAdjust(Amount);
+
+ if (MI->getOpcode() == LoongArch::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ adjustReg(MBB, MI, DL, SPReg, SPReg, Amount, MachineInstr::NoFlags);
+ }
+ }
+
+ return MBB.erase(MI);
+}
+
StackOffset LoongArchFrameLowering::getFrameIndexReference(
const MachineFunction &MF, int FI, Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
// Callee-saved registers should be referenced relative to the stack
// pointer (positive offset), otherwise use the frame pointer (negative
@@ -213,10 +258,12 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference(
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
}
- FrameReg = RI->getFrameRegister(MF);
if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) {
FrameReg = LoongArch::R3;
Offset += StackOffset::getFixed(MFI.getStackSize());
+ } else {
+ FrameReg = RI->getFrameRegister(MF);
+ Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize());
}
return Offset;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
index 72d8e006a0bb..e1e3e260f97a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
@@ -34,11 +34,10 @@ class LoongArchFrameLowering : public TargetFrameLowering {
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const override {
- return MBB.erase(MI);
- }
+ MachineBasicBlock::iterator MI) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 12761244f5fe..08833d7c5b22 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -55,6 +55,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
+
if (Subtarget.is64Bit()) {
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
@@ -137,9 +141,27 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return SDValue();
case ISD::UINT_TO_FP:
return lowerUINT_TO_FP(Op, DAG);
+ case ISD::VASTART:
+ return lowerVASTART(Op, DAG);
}
}
+SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
+
+ SDLoc DL(Op);
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy(MF.getDataLayout()));
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
+ MachinePointerInfo(SV));
+}
+
SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
@@ -1174,6 +1196,10 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
+ // Used with varargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign> ArgLocs;
@@ -1211,8 +1237,66 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
}
if (IsVarArg) {
- // TODO: Support vararg.
- report_fatal_error("Not support vararg");
+ ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+ const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
+
+ // Offset of the first variable argument from stack pointer, and size of
+ // the vararg save area. For now, the varargs save area is either zero or
+ // large enough to hold a0-a7.
+ int VaArgOffset, VarArgsSaveSize;
+
+ // If all registers are allocated, then all varargs must be passed on the
+ // stack and we don't need to save any argregs.
+ if (ArgRegs.size() == Idx) {
+ VaArgOffset = CCInfo.getNextStackOffset();
+ VarArgsSaveSize = 0;
+ } else {
+ VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
+ VaArgOffset = -VarArgsSaveSize;
+ }
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
+ LoongArchFI->setVarArgsFrameIndex(FI);
+
+ // If saving an odd number of registers then create an extra stack slot to
+ // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
+ // offsets to even-numbered registered remain 2*GRLen-aligned.
+ if (Idx % 2) {
+ MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
+ true);
+ VarArgsSaveSize += GRLenInBytes;
+ }
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ for (unsigned I = Idx; I < ArgRegs.size();
+ ++I, VaArgOffset += GRLenInBytes) {
+ const Register Reg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(ArgRegs[I], Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
+ FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ cast<StoreSDNode>(Store.getNode())
+ ->getMemOperand()
+ ->setValue((Value *)nullptr);
+ OutChains.push_back(Store);
+ }
+ LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens for vararg functions.
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
}
return Chain;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index efcc27f037f5..5589bf6a0f5c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -110,6 +110,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll
new file mode 100644
index 000000000000..5b196fe9bb86
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/vararg.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \
+; RUN: | FileCheck --check-prefix=LA64-FPELIM %s
+; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s \
+; RUN: --frame-pointer=all < %s \
+; RUN: | FileCheck --check-prefix=LA64-WITHFP %s
+
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+
+declare void @notdead(ptr)
+
+define i64 @va1(ptr %fmt, ...) {
+; LA64-FPELIM-LABEL: va1:
+; LA64-FPELIM: # %bb.0:
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, -80
+; LA64-FPELIM-NEXT: .cfi_def_cfa_offset 80
+; LA64-FPELIM-NEXT: move $a0, $a1
+; LA64-FPELIM-NEXT: st.d $a7, $sp, 72
+; LA64-FPELIM-NEXT: st.d $a6, $sp, 64
+; LA64-FPELIM-NEXT: st.d $a5, $sp, 56
+; LA64-FPELIM-NEXT: st.d $a4, $sp, 48
+; LA64-FPELIM-NEXT: st.d $a3, $sp, 40
+; LA64-FPELIM-NEXT: st.d $a2, $sp, 32
+; LA64-FPELIM-NEXT: addi.d $a1, $sp, 32
+; LA64-FPELIM-NEXT: st.d $a1, $sp, 8
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 24
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, 80
+; LA64-FPELIM-NEXT: jirl $zero, $ra, 0
+;
+; LA64-WITHFP-LABEL: va1:
+; LA64-WITHFP: # %bb.0:
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96
+; LA64-WITHFP-NEXT: .cfi_def_cfa_offset 96
+; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: .cfi_offset 1, -72
+; LA64-WITHFP-NEXT: .cfi_offset 22, -80
+; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32
+; LA64-WITHFP-NEXT: .cfi_def_cfa 22, 64
+; LA64-WITHFP-NEXT: move $a0, $a1
+; LA64-WITHFP-NEXT: st.d $a7, $fp, 56
+; LA64-WITHFP-NEXT: st.d $a6, $fp, 48
+; LA64-WITHFP-NEXT: st.d $a5, $fp, 40
+; LA64-WITHFP-NEXT: st.d $a4, $fp, 32
+; LA64-WITHFP-NEXT: st.d $a3, $fp, 24
+; LA64-WITHFP-NEXT: st.d $a2, $fp, 16
+; LA64-WITHFP-NEXT: addi.d $a1, $fp, 16
+; LA64-WITHFP-NEXT: st.d $a1, $fp, -24
+; LA64-WITHFP-NEXT: st.d $a0, $fp, 8
+; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96
+; LA64-WITHFP-NEXT: jirl $zero, $ra, 0
+ %va = alloca ptr, align 8
+ call void @llvm.va_start(ptr %va)
+ %argp.cur = load ptr, ptr %va, align 8
+ %argp.next = getelementptr inbounds i64, ptr %argp.cur, i32 1
+ store ptr %argp.next, ptr %va, align 8
+ %1 = load i64, ptr %argp.cur, align 8
+ call void @llvm.va_end(ptr %va)
+ ret i64 %1
+}
+
+define i64 @va1_va_arg(ptr %fmt, ...) nounwind {
+; LA64-FPELIM-LABEL: va1_va_arg:
+; LA64-FPELIM: # %bb.0:
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, -80
+; LA64-FPELIM-NEXT: move $a0, $a1
+; LA64-FPELIM-NEXT: st.d $a7, $sp, 72
+; LA64-FPELIM-NEXT: st.d $a6, $sp, 64
+; LA64-FPELIM-NEXT: st.d $a5, $sp, 56
+; LA64-FPELIM-NEXT: st.d $a4, $sp, 48
+; LA64-FPELIM-NEXT: st.d $a3, $sp, 40
+; LA64-FPELIM-NEXT: st.d $a2, $sp, 32
+; LA64-FPELIM-NEXT: addi.d $a1, $sp, 32
+; LA64-FPELIM-NEXT: st.d $a1, $sp, 8
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 24
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, 80
+; LA64-FPELIM-NEXT: jirl $zero, $ra, 0
+;
+; LA64-WITHFP-LABEL: va1_va_arg:
+; LA64-WITHFP: # %bb.0:
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96
+; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32
+; LA64-WITHFP-NEXT: move $a0, $a1
+; LA64-WITHFP-NEXT: st.d $a7, $fp, 56
+; LA64-WITHFP-NEXT: st.d $a6, $fp, 48
+; LA64-WITHFP-NEXT: st.d $a5, $fp, 40
+; LA64-WITHFP-NEXT: st.d $a4, $fp, 32
+; LA64-WITHFP-NEXT: st.d $a3, $fp, 24
+; LA64-WITHFP-NEXT: st.d $a2, $fp, 16
+; LA64-WITHFP-NEXT: addi.d $a1, $fp, 16
+; LA64-WITHFP-NEXT: st.d $a1, $fp, -24
+; LA64-WITHFP-NEXT: st.d $a0, $fp, 8
+; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96
+; LA64-WITHFP-NEXT: jirl $zero, $ra, 0
+ %va = alloca ptr, align 8
+ call void @llvm.va_start(ptr %va)
+ %1 = va_arg ptr %va, i64
+ call void @llvm.va_end(ptr %va)
+ ret i64 %1
+}
+
+;; Ensure the adjustment when restoring the stack pointer using the frame
+;; pointer is correct
+
+define i64 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
+; LA64-FPELIM-LABEL: va1_va_arg_alloca:
+; LA64-FPELIM: # %bb.0:
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, -96
+; LA64-FPELIM-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-FPELIM-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-FPELIM-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64-FPELIM-NEXT: addi.d $fp, $sp, 32
+; LA64-FPELIM-NEXT: move $s0, $a1
+; LA64-FPELIM-NEXT: st.d $a7, $fp, 56
+; LA64-FPELIM-NEXT: st.d $a6, $fp, 48
+; LA64-FPELIM-NEXT: st.d $a5, $fp, 40
+; LA64-FPELIM-NEXT: st.d $a4, $fp, 32
+; LA64-FPELIM-NEXT: st.d $a3, $fp, 24
+; LA64-FPELIM-NEXT: st.d $a2, $fp, 16
+; LA64-FPELIM-NEXT: addi.d $a0, $fp, 16
+; LA64-FPELIM-NEXT: st.d $a0, $fp, -32
+; LA64-FPELIM-NEXT: addi.d $a0, $a1, 15
+; LA64-FPELIM-NEXT: addi.w $a1, $zero, -16
+; LA64-FPELIM-NEXT: and $a0, $a0, $a1
+; LA64-FPELIM-NEXT: st.d $s0, $fp, 8
+; LA64-FPELIM-NEXT: sub.d $a0, $sp, $a0
+; LA64-FPELIM-NEXT: move $sp, $a0
+; LA64-FPELIM-NEXT: bl notdead
+; LA64-FPELIM-NEXT: move $a0, $s0
+; LA64-FPELIM-NEXT: addi.d $sp, $fp, -32
+; LA64-FPELIM-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64-FPELIM-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-FPELIM-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, 96
+; LA64-FPELIM-NEXT: jirl $zero, $ra, 0
+;
+; LA64-WITHFP-LABEL: va1_va_arg_alloca:
+; LA64-WITHFP: # %bb.0:
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, -96
+; LA64-WITHFP-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: addi.d $fp, $sp, 32
+; LA64-WITHFP-NEXT: move $s0, $a1
+; LA64-WITHFP-NEXT: st.d $a7, $fp, 56
+; LA64-WITHFP-NEXT: st.d $a6, $fp, 48
+; LA64-WITHFP-NEXT: st.d $a5, $fp, 40
+; LA64-WITHFP-NEXT: st.d $a4, $fp, 32
+; LA64-WITHFP-NEXT: st.d $a3, $fp, 24
+; LA64-WITHFP-NEXT: st.d $a2, $fp, 16
+; LA64-WITHFP-NEXT: addi.d $a0, $fp, 16
+; LA64-WITHFP-NEXT: st.d $a0, $fp, -32
+; LA64-WITHFP-NEXT: addi.d $a0, $a1, 15
+; LA64-WITHFP-NEXT: addi.w $a1, $zero, -16
+; LA64-WITHFP-NEXT: and $a0, $a0, $a1
+; LA64-WITHFP-NEXT: st.d $s0, $fp, 8
+; LA64-WITHFP-NEXT: sub.d $a0, $sp, $a0
+; LA64-WITHFP-NEXT: move $sp, $a0
+; LA64-WITHFP-NEXT: bl notdead
+; LA64-WITHFP-NEXT: move $a0, $s0
+; LA64-WITHFP-NEXT: addi.d $sp, $fp, -32
+; LA64-WITHFP-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, 96
+; LA64-WITHFP-NEXT: jirl $zero, $ra, 0
+ %va = alloca ptr, align 8
+ call void @llvm.va_start(ptr %va)
+ %1 = va_arg ptr %va, i64
+ %2 = alloca i8, i64 %1
+ call void @notdead(ptr %2)
+ call void @llvm.va_end(ptr %va)
+ ret i64 %1
+}
+
+define void @va1_caller() nounwind {
+; LA64-FPELIM-LABEL: va1_caller:
+; LA64-FPELIM: # %bb.0:
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, -16
+; LA64-FPELIM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-FPELIM-NEXT: lu52i.d $a1, $zero, 1023
+; LA64-FPELIM-NEXT: ori $a2, $zero, 2
+; LA64-FPELIM-NEXT: bl va1
+; LA64-FPELIM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, 16
+; LA64-FPELIM-NEXT: jirl $zero, $ra, 0
+;
+; LA64-WITHFP-LABEL: va1_caller:
+; LA64-WITHFP: # %bb.0:
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, -16
+; LA64-WITHFP-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: addi.d $fp, $sp, 16
+; LA64-WITHFP-NEXT: lu52i.d $a1, $zero, 1023
+; LA64-WITHFP-NEXT: ori $a2, $zero, 2
+; LA64-WITHFP-NEXT: bl va1
+; LA64-WITHFP-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, 16
+; LA64-WITHFP-NEXT: jirl $zero, $ra, 0
+ %1 = call i64 (ptr, ...) @va1(ptr undef, double 1.0, i64 2)
+ ret void
+}
+
+;; Ensure a named 2*GRLen argument is passed in a1 and a2, while the
+;; vararg long double is passed in a4 and a5 (rather than a3 and a4)
+
+declare i64 @va_aligned_register(i64 %a, i128 %b, ...)
+
+define void @va_aligned_register_caller() nounwind {
+; LA64-FPELIM-LABEL: va_aligned_register_caller:
+; LA64-FPELIM: # %bb.0:
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, -16
+; LA64-FPELIM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-FPELIM-NEXT: lu12i.w $a0, 335544
+; LA64-FPELIM-NEXT: ori $a0, $a0, 1311
+; LA64-FPELIM-NEXT: lu32i.d $a0, 335544
+; LA64-FPELIM-NEXT: lu52i.d $a4, $a0, -328
+; LA64-FPELIM-NEXT: lu12i.w $a0, -503317
+; LA64-FPELIM-NEXT: ori $a0, $a0, 2129
+; LA64-FPELIM-NEXT: lu32i.d $a0, 37355
+; LA64-FPELIM-NEXT: lu52i.d $a5, $a0, 1024
+; LA64-FPELIM-NEXT: ori $a0, $zero, 2
+; LA64-FPELIM-NEXT: ori $a1, $zero, 1111
+; LA64-FPELIM-NEXT: move $a2, $zero
+; LA64-FPELIM-NEXT: bl va_aligned_register
+; LA64-FPELIM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, 16
+; LA64-FPELIM-NEXT: jirl $zero, $ra, 0
+;
+; LA64-WITHFP-LABEL: va_aligned_register_caller:
+; LA64-WITHFP: # %bb.0:
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, -16
+; LA64-WITHFP-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: addi.d $fp, $sp, 16
+; LA64-WITHFP-NEXT: lu12i.w $a0, 335544
+; LA64-WITHFP-NEXT: ori $a0, $a0, 1311
+; LA64-WITHFP-NEXT: lu32i.d $a0, 335544
+; LA64-WITHFP-NEXT: lu52i.d $a4, $a0, -328
+; LA64-WITHFP-NEXT: lu12i.w $a0, -503317
+; LA64-WITHFP-NEXT: ori $a0, $a0, 2129
+; LA64-WITHFP-NEXT: lu32i.d $a0, 37355
+; LA64-WITHFP-NEXT: lu52i.d $a5, $a0, 1024
+; LA64-WITHFP-NEXT: ori $a0, $zero, 2
+; LA64-WITHFP-NEXT: ori $a1, $zero, 1111
+; LA64-WITHFP-NEXT: move $a2, $zero
+; LA64-WITHFP-NEXT: bl va_aligned_register
+; LA64-WITHFP-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, 16
+; LA64-WITHFP-NEXT: jirl $zero, $ra, 0
+ %1 = call i64 (i64, i128, ...) @va_aligned_register(i64 2, i128 1111,
+ fp128 0xLEB851EB851EB851F400091EB851EB851)
+ ret void
+}
+
+;; Check 2*GRLen values are aligned appropriately when passed on the stack
+;; in a vararg call
+
+declare i32 @va_aligned_stack_callee(i32, ...)
+
+define void @va_aligned_stack_caller() nounwind {
+; LA64-FPELIM-LABEL: va_aligned_stack_caller:
+; LA64-FPELIM: # %bb.0:
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, -112
+; LA64-FPELIM-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
+; LA64-FPELIM-NEXT: ori $a0, $zero, 17
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 48
+; LA64-FPELIM-NEXT: ori $a0, $zero, 16
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 40
+; LA64-FPELIM-NEXT: ori $a0, $zero, 15
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 32
+; LA64-FPELIM-NEXT: ori $a0, $zero, 14
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 0
+; LA64-FPELIM-NEXT: lu12i.w $a0, -503317
+; LA64-FPELIM-NEXT: ori $a0, $a0, 2129
+; LA64-FPELIM-NEXT: lu32i.d $a0, 37355
+; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 24
+; LA64-FPELIM-NEXT: lu12i.w $a0, 335544
+; LA64-FPELIM-NEXT: ori $a0, $a0, 1311
+; LA64-FPELIM-NEXT: lu32i.d $a0, 335544
+; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, -328
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 16
+; LA64-FPELIM-NEXT: ori $a0, $zero, 1000
+; LA64-FPELIM-NEXT: st.d $a0, $sp, 64
+; LA64-FPELIM-NEXT: st.d $zero, $sp, 88
+; LA64-FPELIM-NEXT: st.d $zero, $sp, 80
+; LA64-FPELIM-NEXT: st.d $zero, $sp, 72
+; LA64-FPELIM-NEXT: ori $a1, $zero, 11
+; LA64-FPELIM-NEXT: addi.d $a2, $sp, 64
+; LA64-FPELIM-NEXT: ori $a3, $zero, 12
+; LA64-FPELIM-NEXT: ori $a4, $zero, 13
+; LA64-FPELIM-NEXT: ori $a0, $zero, 1
+; LA64-FPELIM-NEXT: move $a6, $zero
+; LA64-FPELIM-NEXT: move $a7, $a0
+; LA64-FPELIM-NEXT: bl va_aligned_stack_callee
+; LA64-FPELIM-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; LA64-FPELIM-NEXT: addi.d $sp, $sp, 112
+; LA64-FPELIM-NEXT: jirl $zero, $ra, 0
+;
+; LA64-WITHFP-LABEL: va_aligned_stack_caller:
+; LA64-WITHFP: # %bb.0:
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, -112
+; LA64-WITHFP-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
+; LA64-WITHFP-NEXT: addi.d $fp, $sp, 112
+; LA64-WITHFP-NEXT: ori $a0, $zero, 17
+; LA64-WITHFP-NEXT: st.d $a0, $sp, 48
+; LA64-WITHFP-NEXT: ori $a0, $zero, 16
+; LA64-WITHFP-NEXT: st.d $a0, $sp, 40
+; LA64-WITHFP-NEXT: ori $a0, $zero, 15
+; LA64-WITHFP-NEXT: st.d $a0, $sp, 32
+; LA64-WITHFP-NEXT: ori $a0, $zero, 14
+; LA64-WITHFP-NEXT: st.d $a0, $sp, 0
+; LA64-WITHFP-NEXT: lu12i.w $a0, -503317
+; LA64-WITHFP-NEXT: ori $a0, $a0, 2129
+; LA64-WITHFP-NEXT: lu32i.d $a0, 37355
+; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-WITHFP-NEXT: st.d $a0, $sp, 24
+; LA64-WITHFP-NEXT: lu12i.w $a0, 335544
+; LA64-WITHFP-NEXT: ori $a0, $a0, 1311
+; LA64-WITHFP-NEXT: lu32i.d $a0, 335544
+; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, -328
+; LA64-WITHFP-NEXT: st.d $a0, $sp, 16
+; LA64-WITHFP-NEXT: ori $a0, $zero, 1000
+; LA64-WITHFP-NEXT: st.d $a0, $fp, -48
+; LA64-WITHFP-NEXT: st.d $zero, $fp, -24
+; LA64-WITHFP-NEXT: st.d $zero, $fp, -32
+; LA64-WITHFP-NEXT: st.d $zero, $fp, -40
+; LA64-WITHFP-NEXT: ori $a1, $zero, 11
+; LA64-WITHFP-NEXT: addi.d $a2, $fp, -48
+; LA64-WITHFP-NEXT: ori $a3, $zero, 12
+; LA64-WITHFP-NEXT: ori $a4, $zero, 13
+; LA64-WITHFP-NEXT: ori $a0, $zero, 1
+; LA64-WITHFP-NEXT: move $a6, $zero
+; LA64-WITHFP-NEXT: move $a7, $a0
+; LA64-WITHFP-NEXT: bl va_aligned_stack_callee
+; LA64-WITHFP-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; LA64-WITHFP-NEXT: addi.d $sp, $sp, 112
+; LA64-WITHFP-NEXT: jirl $zero, $ra, 0
+ %1 = call i32 (i32, ...) @va_aligned_stack_callee(i32 1, i32 11,
+ i256 1000, i32 12, i32 13, i128 18446744073709551616, i32 14,
+ fp128 0xLEB851EB851EB851F400091EB851EB851, i64 15,
+ [2 x i64] [i64 16, i64 17])
+ ret void
+}
More information about the llvm-commits
mailing list