[llvm] [LoongArch] Custom lower FP_TO_FP16 and FP16_TO_FP to correct ABI of libcall (PR #141702)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 19:10:25 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: None (Ami-zhang)
<details>
<summary>Changes</summary>
This change passes 'half' in the lower 16 bits of an f32 value with F/D ABI. LoongArch currently lacks a hardware extension for the fp16 data type, and the ABI manual now documents the half-precision floating-point type following FP calling conventions.
Previously, we maintained the 'half' type in its 16-bit format between operations. Regardless of whether the F extension is enabled, the value would be passed in the lower 16 bits of a GPR in its 'half' format.
With this patch, depending on the ABI in use, the value will be passed either in an FPR or a GPR in 'half' format. This ensures consistency with the bits location when the fp16 hardware extension is enabled.
---
Patch is 123.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141702.diff
6 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+135-3)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+24)
- (added) llvm/test/CodeGen/LoongArch/calling-conv-half.ll (+1628)
- (modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+131-71)
- (added) llvm/test/CodeGen/LoongArch/issue97975.ll (+444)
- (added) llvm/test/CodeGen/LoongArch/issue97981.ll (+127)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9f5c94ddea44f..c7b2a1a8ffbf8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -199,8 +199,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32,
+ Subtarget.isSoftFPABI() ? LibCall : Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32,
+ Subtarget.isSoftFPABI() ? LibCall : Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f32, Legal);
@@ -239,7 +241,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64,
+ Subtarget.isSoftFPABI() ? LibCall : Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::FRINT, MVT::f64, Legal);
@@ -490,6 +493,10 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerPREFETCH(Op, DAG);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
+ case ISD::FP_TO_FP16:
+ return lowerFP_TO_FP16(Op, DAG);
+ case ISD::FP16_TO_FP:
+ return lowerFP16_TO_FP(Op, DAG);
}
return SDValue();
}
@@ -2242,6 +2249,40 @@ SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
return SDValue();
}
+SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Custom lower to ensure the libcall return is passed in an FPR on hard
+ // float ABIs.
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Chain = SDValue();
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
+ SDValue Res;
+ std::tie(Res, Chain) =
+ makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
+ if (Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
+ return DAG.getBitcast(MVT::i32, Res);
+}
+
+SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Custom lower to ensure the libcall argument is passed in an FPR on hard
+ // float ABIs.
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Chain = SDValue();
+ SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
+ DL, MVT::f32, Op0)
+ : DAG.getBitcast(MVT::f32, Op0);
+ SDValue Res;
+ std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
+ CallOptions, DL, Chain);
+ return Res;
+}
+
static bool isConstantOrUndef(const SDValue Op) {
if (Op->isUndef())
return true;
@@ -3841,6 +3882,8 @@ void LoongArchTargetLowering::ReplaceNodeResults(
EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
TargetLowering::TypeSoftenFloat) {
+ if (!isTypeLegal(Src.getValueType()))
+ return;
if (Src.getValueType() == MVT::f16)
Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
@@ -5289,6 +5332,33 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
+ // conversion is unnecessary and can be replaced with the
+ // MOVFR2GR_S_LA64 operand.
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
+ return Op0.getOperand(0);
+ return SDValue();
+}
+
+static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
+ // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
+ // operand.
+ SDValue Op0 = N->getOperand(0);
+ MVT VT = N->getSimpleValueType(0);
+ if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
+ assert(Op0.getOperand(0).getValueType() == VT && "Unexpected value type!");
+ return Op0.getOperand(0);
+ }
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5307,6 +5377,10 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performBITREV_WCombine(N, DAG, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
+ case LoongArchISD::MOVGR2FR_W_LA64:
+ return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
+ case LoongArchISD::MOVFR2GR_S_LA64:
+ return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
@@ -7633,3 +7707,61 @@ LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+
+bool LoongArchTargetLowering::splitValueIntoRegisterParts(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.has_value();
+ EVT ValueVT = Val.getValueType();
+
+ if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
+ // Cast the f16 to i16, extend to i32, pad with ones to make a float
+ // nan, and cast to f32.
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
+ Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
+ DAG.getConstant(0xFFFF0000, DL, MVT::i32));
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
+ Parts[0] = Val;
+ return true;
+ }
+
+ return false;
+}
+
+SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
+ SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.has_value();
+
+ if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
+ SDValue Val = Parts[0];
+
+ // Cast the f32 to i32, truncate to i16, and cast back to f16.
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ return Val;
+ }
+
+ return SDValue();
+}
+
+MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ // Use f32 to pass f16.
+ if (VT == MVT::f16 && Subtarget.hasBasicF())
+ return MVT::f32;
+
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+}
+
+unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
+ // Use f32 to pass f16.
+ if (VT == MVT::f16 && Subtarget.hasBasicF())
+ return 1;
+
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 6bf295984dfc5..8c00ec75db94b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -361,6 +361,8 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
@@ -385,6 +387,28 @@ class LoongArchTargetLowering : public TargetLowering {
const SmallVectorImpl<CCValAssign> &ArgLocs) const;
bool softPromoteHalfType() const override { return true; }
+
+ bool
+ splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ std::optional<CallingConv::ID> CC) const override;
+
+ SDValue
+ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT,
+ std::optional<CallingConv::ID> CC) const override;
+
+ /// Return the register type for a given MVT, ensuring vectors are treated
+ /// as a series of gpr sized integers.
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+ EVT VT) const override;
+
+ /// Return the number of registers for a given MVT, ensuring vectors are
+ /// treated as a series of gpr sized integers.
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
new file mode 100644
index 0000000000000..c88b67f13d1e7
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
@@ -0,0 +1,1628 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32S
+; RUN: llc --mtriple=loongarch32 --mattr=+f -target-abi=ilp32s --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32F-ILP32S
+; RUN: llc --mtriple=loongarch32 --mattr=+f -target-abi=ilp32d --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32F-ILP32D
+; RUN: llc --mtriple=loongarch32 --mattr=+d -target-abi=ilp32s --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32D-ILP32S
+; RUN: llc --mtriple=loongarch32 --mattr=+d -target-abi=ilp32d --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32D-ILP32D
+; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64S
+; RUN: llc --mtriple=loongarch64 --mattr=+f -target-abi=lp64s --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64F-LP64S
+; RUN: llc --mtriple=loongarch64 --mattr=+f -target-abi=lp64d --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64F-LP64D
+; RUN: llc --mtriple=loongarch64 --mattr=+d -target-abi=lp64s --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64D-LP64S
+; RUN: llc --mtriple=loongarch64 --mattr=+d -target-abi=lp64d --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64D-LP64D
+
+define i32 @callee_half_in_fregs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, half %i) nounwind {
+; LA32S-LABEL: callee_half_in_fregs:
+; LA32S: # %bb.0:
+; LA32S-NEXT: addi.w $sp, $sp, -16
+; LA32S-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32S-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32S-NEXT: ld.hu $a1, $sp, 16
+; LA32S-NEXT: move $fp, $a0
+; LA32S-NEXT: move $a0, $a1
+; LA32S-NEXT: bl __extendhfsf2
+; LA32S-NEXT: bl __fixsfsi
+; LA32S-NEXT: add.w $a0, $fp, $a0
+; LA32S-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32S-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32S-NEXT: addi.w $sp, $sp, 16
+; LA32S-NEXT: ret
+;
+; LA32F-ILP32S-LABEL: callee_half_in_fregs:
+; LA32F-ILP32S: # %bb.0:
+; LA32F-ILP32S-NEXT: addi.w $sp, $sp, -16
+; LA32F-ILP32S-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-ILP32S-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32F-ILP32S-NEXT: ld.hu $a1, $sp, 16
+; LA32F-ILP32S-NEXT: move $fp, $a0
+; LA32F-ILP32S-NEXT: move $a0, $a1
+; LA32F-ILP32S-NEXT: bl __extendhfsf2
+; LA32F-ILP32S-NEXT: movgr2fr.w $fa0, $a0
+; LA32F-ILP32S-NEXT: ftintrz.w.s $fa0, $fa0
+; LA32F-ILP32S-NEXT: movfr2gr.s $a0, $fa0
+; LA32F-ILP32S-NEXT: add.w $a0, $fp, $a0
+; LA32F-ILP32S-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32F-ILP32S-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-ILP32S-NEXT: addi.w $sp, $sp, 16
+; LA32F-ILP32S-NEXT: ret
+;
+; LA32F-ILP32D-LABEL: callee_half_in_fregs:
+; LA32F-ILP32D: # %bb.0:
+; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16
+; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-ILP32D-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32F-ILP32D-NEXT: move $fp, $a0
+; LA32F-ILP32D-NEXT: bl __extendhfsf2
+; LA32F-ILP32D-NEXT: ftintrz.w.s $fa0, $fa0
+; LA32F-ILP32D-NEXT: movfr2gr.s $a0, $fa0
+; LA32F-ILP32D-NEXT: add.w $a0, $fp, $a0
+; LA32F-ILP32D-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32F-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-ILP32D-NEXT: addi.w $sp, $sp, 16
+; LA32F-ILP32D-NEXT: ret
+;
+; LA32D-ILP32S-LABEL: callee_half_in_fregs:
+; LA32D-ILP32S: # %bb.0:
+; LA32D-ILP32S-NEXT: addi.w $sp, $sp, -16
+; LA32D-ILP32S-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32D-ILP32S-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32D-ILP32S-NEXT: ld.hu $a1, $sp, 16
+; LA32D-ILP32S-NEXT: move $fp, $a0
+; LA32D-ILP32S-NEXT: move $a0, $a1
+; LA32D-ILP32S-NEXT: bl __extendhfsf2
+; LA32D-ILP32S-NEXT: movgr2fr.w $fa0, $a0
+; LA32D-ILP32S-NEXT: ftintrz.w.s $fa0, $fa0
+; LA32D-ILP32S-NEXT: movfr2gr.s $a0, $fa0
+; LA32D-ILP32S-NEXT: add.w $a0, $fp, $a0
+; LA32D-ILP32S-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32D-ILP32S-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32D-ILP32S-NEXT: addi.w $sp, $sp, 16
+; LA32D-ILP32S-NEXT: ret
+;
+; LA32D-ILP32D-LABEL: callee_half_in_fregs:
+; LA32D-ILP32D: # %bb.0:
+; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16
+; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32D-ILP32D-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32D-ILP32D-NEXT: move $fp, $a0
+; LA32D-ILP32D-NEXT: bl __extendhfsf2
+; LA32D-ILP32D-NEXT: ftintrz.w.s $fa0, $fa0
+; LA32D-ILP32D-NEXT: movfr2gr.s $a0, $fa0
+; LA32D-ILP32D-NEXT: add.w $a0, $fp, $a0
+; LA32D-ILP32D-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32D-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32D-ILP32D-NEXT: addi.w $sp, $sp, 16
+; LA32D-ILP32D-NEXT: ret
+;
+; LA64S-LABEL: callee_half_in_fregs:
+; LA64S: # %bb.0:
+; LA64S-NEXT: addi.d $sp, $sp, -16
+; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64S-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64S-NEXT: move $fp, $a0
+; LA64S-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64S-NEXT: jirl $ra, $ra, 0
+; LA64S-NEXT: ftintrz.l.s $fa0, $fa0
+; LA64S-NEXT: movfr2gr.d $a0, $fa0
+; LA64S-NEXT: add.w $a0, $fp, $a0
+; LA64S-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64S-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64S-NEXT: addi.d $sp, $sp, 16
+; LA64S-NEXT: ret
+;
+; LA64F-LP64S-LABEL: callee_half_in_fregs:
+; LA64F-LP64S: # %bb.0:
+; LA64F-LP64S-NEXT: addi.d $sp, $sp, -16
+; LA64F-LP64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-LP64S-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64F-LP64S-NEXT: ld.hu $a1, $sp, 16
+; LA64F-LP64S-NEXT: move $fp, $a0
+; LA64F-LP64S-NEXT: move $a0, $a1
+; LA64F-LP64S-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64F-LP64S-NEXT: jirl $ra, $ra, 0
+; LA64F-LP64S-NEXT: movgr2fr.w $fa0, $a0
+; LA64F-LP64S-NEXT: ftintrz.l.s $fa0, $fa0
+; LA64F-LP64S-NEXT: movfr2gr.d $a0, $fa0
+; LA64F-LP64S-NEXT: add.w $a0, $fp, $a0
+; LA64F-LP64S-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64F-LP64S-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-LP64S-NEXT: addi.d $sp, $sp, 16
+; LA64F-LP64S-NEXT: ret
+;
+; LA64F-LP64D-LABEL: callee_half_in_fregs:
+; LA64F-LP64D: # %bb.0:
+; LA64F-LP64D-NEXT: addi.d $sp, $sp, -16
+; LA64F-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-LP64D-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64F-LP64D-NEXT: move $fp, $a0
+; LA64F-LP64D-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64F-LP64D-NEXT: jirl $ra, $ra, 0
+; LA64F-LP64D-NEXT: ftintrz.l.s $fa0, $fa0
+; LA64F-LP64D-NEXT: movfr2gr.d $a0, $fa0
+; LA64F-LP64D-NEXT: add.w $a0, $fp, $a0
+; LA64F-LP64D-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64F-LP64D-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-LP64D-NEXT: addi.d $sp, $sp, 16
+; LA64F-LP64D-NEXT: ret
+;
+; LA64D-LP64S-LABEL: callee_half_in_fregs:
+; LA64D-LP64S: # %bb.0:
+; LA64D-LP64S-NEXT: addi.d $sp, $sp, -16
+; LA64D-LP64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64D-LP64S-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64D-LP64S-NEXT: ld.hu $a1, $sp, 16
+; LA64D-LP64S-NEXT: move $fp, $a0
+; LA64D-LP64S-NEXT: move $a0, $a1
+; LA64D-LP64S-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64D-LP64S-NEXT: jirl $ra, $ra, 0
+; LA64D-LP64S-NEXT: movgr2fr.w $fa0, $a0
+; LA64D-LP64S-NEXT: ftintrz.l.s $fa0, $fa0
+; LA64D-LP64S-NEXT: movfr2gr.d $a0, $fa0
+; LA64D-LP64S-NEXT: add.w $a0, $fp, $a0
+; LA64D-LP64S-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64D-LP64S-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64D-LP64S-NEXT: addi.d $sp, $sp, 16
+; LA64D-LP64S-NEXT: ret
+;
+; LA64D-LP64D-LABEL: callee_half_in_fregs:
+; LA64D-LP64D: # %bb.0:
+; LA64D-LP64D-NEXT: addi.d $sp, $sp, -16
+; LA64D-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64D-LP64D-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; LA64D-LP64D-NEXT: move $fp, $a0
+; LA64D-LP64D-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64D-LP64D-NEXT: jirl $ra, $ra, 0
+; LA64D-LP64D-NEXT: ftintrz.l.s $fa0, $fa0
+; LA64D-LP64D-NEXT: movfr2gr.d $a0, $fa0
+; LA64D-LP64D-NEXT: add.w $a0, $fp, $a0
+; LA64D-LP64D-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
+; LA64D-LP64D-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64D-LP64D-NEXT: addi.d $sp, $sp, 16
+; LA64D-LP64D-NEXT: ret
+ %1 = fptosi half %i to i32
+ %2 = add i32 %a, %1
+ ret i32 %2
+}
+
+define i32 @caller_half_in_fregs() nounwind {
+; LA32S-LABEL: caller_half_in_fregs:
+; LA32S: # %bb.0:
+; LA32S-NEXT: addi.w $sp, $sp, -16
+; LA32S-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32S-NEXT: lu12i.w $t0, 4
+; LA32S-NEXT: ori $a0, $zero, 1
+; LA32S-NEXT: ori $a1, $zero, 2
+; LA32S-NEXT: ori $a2, $zero, 3
+; LA32S-NEXT: ori $a3, $zero, 4
+; LA32S-NEXT: ori $a4, $zero, 5
+; LA32S-NEXT: ori $a5, $zero, 6
+; LA32S-NEXT: ori $a6, $zero, 7
+; LA32S-NEXT: ori $a7, $zero, 8
+; LA32S-NEXT: st.w $t0, $sp, 0
+; LA32S-NEXT: bl callee_half_in_fregs
+; LA32S-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32S-NEXT: addi.w $sp, $sp, 16
+; LA32S-NEXT: ret
+;
+; LA32F-ILP32S-LABEL: caller_half_in_fregs:
+; LA32F-ILP32S: # %bb.0:
+; LA32F-ILP32S-NEXT: addi.w $sp, $sp, -16
+; LA32F-ILP32S-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spi...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141702
More information about the llvm-commits
mailing list