[llvm] [LoongArch] Add codegen support for LA32D calling convention (PR #141539)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 19:30:57 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
This patch adds codegen support for the LA32D calling convention, which involves passing `f64` values on LA32D targets using a soft-float ABI. Similar to RISC-V, it introduces pseudo-instructions to construct an `f64` value from a pair of `i32`s, and to split an `f64` into two `i32` values.
---
Patch is 36.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141539.diff
8 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td (+16)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+261-35)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+4)
- (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+12)
- (added) llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll (+193)
- (modified) llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll (+5-9)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll (+4-10)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll (+16-24)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index c5d176596d8c6..616640152c8d3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -319,3 +319,19 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;
let Predicates = [HasBasicD, IsLA64] in {
def : PatFpr<frint, FRINT_D, FPR64>;
} // Predicates = [HasBasicD, IsLA64]
+
+/// Pseudo-instructions needed for the soft-float ABI with LA32D
+
+let Predicates = [HasBasicD, IsLA32] in {
+// Moves two GPRs to an FPR.
+let usesCustomInserter = 1 in
+def BuildPairF64Pseudo
+ : Pseudo<(outs FPR64:$dst), (ins GPR:$src1, GPR:$src2),
+ [(set FPR64:$dst, (loongarch_build_pair_f64 GPR:$src1, GPR:$src2))]>;
+
+// Moves an FPR to two GPRs.
+let usesCustomInserter = 1 in
+def SplitPairF64Pseudo
+ : Pseudo<(outs GPR:$dst1, GPR:$dst2), (ins FPR64:$src),
+ [(set GPR:$dst1, GPR:$dst2, (loongarch_split_pair_f64 FPR64:$src))]>;
+} // Predicates = [HasBasicD, IsLA32]
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9f5c94ddea44f..5e530db506e2a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -169,6 +169,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+ if (Subtarget.hasBasicD())
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
}
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -2578,13 +2580,20 @@ SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ EVT VT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
- if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
+ if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
Subtarget.is64Bit() && Subtarget.hasBasicF()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
}
+ if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
+ return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
+ }
return Op;
}
@@ -3869,6 +3878,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
SDValue Dst =
DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
+ } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
+ SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), Src);
+ SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
+ NewReg.getValue(0), NewReg.getValue(1));
+ Results.push_back(RetReg);
}
break;
}
@@ -5289,6 +5304,37 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue
+performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ SDLoc DL(N);
+
+ // If the input to SplitPairF64 is just BuildPairF64 then the operation is
+ // redundant. Instead, use BuildPairF64's operands directly.
+ if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
+ return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+
+ if (Op0->isUndef()) {
+ SDValue Lo = DAG.getUNDEF(MVT::i32);
+ SDValue Hi = DAG.getUNDEF(MVT::i32);
+ return DCI.CombineTo(N, Lo, Hi);
+ }
+
+ // It's cheaper to materialise two 32-bit integers than to load a double
+ // from the constant pool and transfer it to integer registers through the
+ // stack.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
+ APInt V = C->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
+ return DCI.CombineTo(N, Lo, Hi);
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5307,6 +5353,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performBITREV_WCombine(N, DAG, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
+ case LoongArchISD::SPLIT_PAIR_F64:
+ return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
@@ -5589,6 +5637,50 @@ static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
return BB;
}
+static MachineBasicBlock *
+emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
+ const LoongArchSubtarget &Subtarget) {
+ assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
+ "Unexpected instruction");
+
+ MachineFunction &MF = *BB->getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ Register LoReg = MI.getOperand(0).getReg();
+ Register HiReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
+
+ BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
+ BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static MachineBasicBlock *
+emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
+ const LoongArchSubtarget &Subtarget) {
+ assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
+ "Unexpected instruction");
+
+ MachineFunction &MF = *BB->getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LoReg = MI.getOperand(1).getReg();
+ Register HiReg = MI.getOperand(2).getReg();
+
+ BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
+ .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
+ BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
static bool isSelectPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
@@ -5769,6 +5861,10 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
}
case LoongArch::Select_GPR_Using_CC_GPR:
return emitSelectPseudo(MI, BB, Subtarget);
+ case LoongArch::BuildPairF64Pseudo:
+ return emitBuildPairF64Pseudo(MI, BB, Subtarget);
+ case LoongArch::SplitPairF64Pseudo:
+ return emitSplitPairF64Pseudo(MI, BB, Subtarget);
case LoongArch::PseudoVBZ:
case LoongArch::PseudoVBZ_B:
case LoongArch::PseudoVBZ_H:
@@ -5850,6 +5946,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MOVGR2FR_W_LA64)
NODE_NAME_CASE(MOVFR2GR_S_LA64)
NODE_NAME_CASE(FTINT)
+ NODE_NAME_CASE(BUILD_PAIR_F64)
+ NODE_NAME_CASE(SPLIT_PAIR_F64)
NODE_NAME_CASE(REVB_2H)
NODE_NAME_CASE(REVB_2W)
NODE_NAME_CASE(BITREV_4B)
@@ -6021,21 +6119,6 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
break;
}
- // FPR32 and FPR64 alias each other.
- if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
- UseGPRForFloat = true;
-
- if (UseGPRForFloat && ValVT == MVT::f32) {
- LocVT = GRLenVT;
- LocInfo = CCValAssign::BCvt;
- } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
- LocVT = MVT::i64;
- LocInfo = CCValAssign::BCvt;
- } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
- // TODO: Handle passing f64 on LA32 with D feature.
- report_fatal_error("Passing f64 with GPR on LA32 is undefined");
- }
-
// If this is a variadic argument, the LoongArch calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
// byte alignment. An aligned register should be used regardless of whether
@@ -6058,6 +6141,45 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
assert(PendingLocs.size() == PendingArgFlags.size() &&
"PendingLocs and PendingArgFlags out of sync");
+ // FPR32 and FPR64 alias each other.
+ if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
+ UseGPRForFloat = true;
+
+ if (UseGPRForFloat && ValVT == MVT::f32) {
+ LocVT = GRLenVT;
+ LocInfo = CCValAssign::BCvt;
+ } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
+ } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
+ // Handle passing f64 on LA32D with a soft float ABI or when floating point
+ // registers are exhausted.
+ assert(PendingLocs.empty() && "Can't lower f64 if it is split");
+ // Depending on available argument GPRS, f64 may be passed in a pair of
+ // GPRs, split between a GPR and the stack, or passed completely on the
+ // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
+ // cases.
+ MCRegister Reg = State.AllocateReg(ArgGPRs);
+ if (!Reg) {
+ int64_t StackOffset = State.AllocateStack(8, Align(8));
+ State.addLoc(
+ CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ return false;
+ }
+ LocVT = MVT::i32;
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ MCRegister HiReg = State.AllocateReg(ArgGPRs);
+ if (HiReg) {
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
+ } else {
+ int64_t StackOffset = State.AllocateStack(4, Align(4));
+ State.addLoc(
+ CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ }
+ return false;
+ }
+
// Split arguments might be passed indirectly, so keep track of the pending
// values.
if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
@@ -6258,6 +6380,38 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
}
+static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
+ const CCValAssign &VA,
+ const CCValAssign &HiVA,
+ const SDLoc &DL) {
+ assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
+ "Unexpected VA");
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ assert(VA.isRegLoc() && "Expected register VA assignment");
+
+ Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
+ SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
+ SDValue Hi;
+ if (HiVA.isMemLoc()) {
+ // Second half of f64 is passed on the stack.
+ int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
+ /*IsImmutable=*/true);
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ } else {
+ // Second half of f64 is passed in another GPR.
+ Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
+ RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
+ Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
+ }
+ return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
+}
+
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL) {
EVT LocVT = VA.getLocVT();
@@ -6358,7 +6512,12 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
- if (VA.isRegLoc())
+ // Passing f64 on LA32D with a soft float ABI must be handled as a special
+ // case.
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.needsCustom());
+ ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
+ } else if (VA.isRegLoc())
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -6606,31 +6765,67 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<std::pair<Register, SDValue>> RegsToPass;
SmallVector<SDValue> MemOpChains;
SDValue StackPtr;
- for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
+ ++i, ++OutIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue ArgValue = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue ArgValue = OutVals[OutIdx];
+ ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
+
+ // Handle passing f64 on LA32D with a soft float ABI as a special case.
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.isRegLoc() && "Expected register VA assignment");
+ assert(VA.needsCustom());
+ SDValue SplitF64 =
+ DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
+ SDValue Lo = SplitF64.getValue(0);
+ SDValue Hi = SplitF64.getValue(1);
+
+ Register RegLo = VA.getLocReg();
+ RegsToPass.push_back(std::make_pair(RegLo, Lo));
+
+ // Get the CCValAssign for the Hi part.
+ CCValAssign &HiVA = ArgLocs[++i];
+
+ if (HiVA.isMemLoc()) {
+ // Second half of f64 is passed on the stack.
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
+ // Emit the store.
+ MemOpChains.push_back(DAG.getStore(
+ Chain, DL, Hi, Address,
+ MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));
+ } else {
+ // Second half of f64 is passed in another GPR.
+ Register RegHigh = HiVA.getLocReg();
+ RegsToPass.push_back(std::make_pair(RegHigh, Hi));
+ }
+ continue;
+ }
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
Align StackAlign =
- std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
+ std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
getPrefTypeAlign(ArgValue.getValueType(), DAG));
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split and passed by reference, we need to
// store the required parts of it here (and pass just one address).
- unsigned ArgIndex = Outs[i].OrigArgIndex;
- unsigned ArgPartOffset = Outs[i].PartOffset;
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
assert(ArgPartOffset == 0);
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
SmallVector<std::pair<SDValue, SDValue>> Parts;
- while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
- SDValue PartValue = OutVals[i + 1];
- unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
+ SDValue PartValue = OutVals[OutIdx + 1];
+ unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
EVT PartVT = PartValue.getValueType();
@@ -6638,6 +6833,7 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
Parts.push_back(std::make_pair(PartValue, Offset));
++i;
+ ++OutIdx;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -6773,7 +6969,8 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
// Copy all of the result registers out of their specified physreg.
- for (auto &VA : RVLocs) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ auto &VA = RVLocs[i];
// Copy the value out.
SDValue RetValue =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
@@ -6781,7 +6978,16 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
- RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.needsCustom());
+ SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
+ MVT::i32, Glue);
+ Chain = RetValue2.getValue(1);
+ Glue = RetValue2.getValue(2);
+ RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
+ RetValue, RetValue2);
+ } else
+ RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
InVals.push_back(RetValue);
}
@@ -6827,17 +7033,37 @@ SDValue LoongArchTargetLowering::LowerReturn(
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
+ for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
+ SDValue Val = OutVals[OutIdx];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- // Handle a 'normal' return.
- SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ // Handle returning f64 on LA32D with a soft float ABI.
+ assert(VA.isRegLoc() && "Expected return via registers");
+ assert(VA.needsCustom());
+ SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), Val);
+ SDValue Lo = SplitF64.getValue(0);
+ SDValue Hi = SplitF64.getValue(1);
+ Register RegLo = VA.getLocReg();
+ Register RegHi = RVLocs[++i].getLocReg();
+
+ Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
+ Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
+ } else {
+ // Handle a 'normal' return.
+ Val = convertValVTToLocVT(DAG, Val, VA, DL);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141539
More information about the llvm-commits
mailing list