[llvm] [LoongArch] Custom legalizing ConstantFP to avoid float loads (PR #158050)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 05:04:35 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: ZhaoQi (zhaoqi5)
<details>
<summary>Changes</summary>
---
Patch is 145.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158050.diff
15 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td (+21)
- (modified) llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td (+28)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+53)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+4)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-half.ll (+298-255)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll (+51-40)
- (modified) llvm/test/CodeGen/LoongArch/double-imm.ll (+17-12)
- (modified) llvm/test/CodeGen/LoongArch/float-imm.ll (+10-12)
- (modified) llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll (+514-497)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll (+30-40)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll (+11-9)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll (+12-12)
- (modified) llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll (+10-13)
- (modified) llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll (+4-6)
- (modified) llvm/test/CodeGen/LoongArch/vector-fp-imm.ll (+224-229)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index c45975431d833..690dd73014e57 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -17,6 +17,8 @@ def NotBoolXor : PatFrags<(ops node:$val),
// LoongArch specific DAG Nodes.
//===----------------------------------------------------------------------===//
+def SDT_LoongArchMOVGR2FR_W
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
def SDT_LoongArchMOVGR2FR_W_LA64
: SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
def SDT_LoongArchMOVFR2GR_S_LA64
@@ -28,6 +30,8 @@ def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
// comparisons to prevent recursive lowering.
def loongarch_brcond : SDNode<"LoongArchISD::BRCOND", SDTBrcond, [SDNPHasChain]>;
+def loongarch_movgr2fr_w
+ : SDNode<"LoongArchISD::MOVGR2FR_W", SDT_LoongArchMOVGR2FR_W>;
def loongarch_movgr2fr_w_la64
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
def loongarch_movfr2gr_s_la64
@@ -185,6 +189,14 @@ def : PatFpr<fneg, FNEG_S, FPR32>;
def : PatFpr<fabs, FABS_S, FPR32>;
def : PatFpr<fsqrt, FSQRT_S, FPR32>;
def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>;
+let Predicates = [HasBasicF, IsLA64] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w_la64 (i64 1065353216)), (fsqrt FPR32:$fj)),
+ (FRSQRT_S FPR32:$fj)>;
+} // Predicates = [HasBasicF, IsLA64]
+let Predicates = [HasBasicF, IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w (i32 1065353216)), (fsqrt FPR32:$fj)),
+ (FRSQRT_S FPR32:$fj)>;
+} // Predicates = [HasBasicF, IsLA32]
def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>;
def : Pat<(is_fpclass FPR32:$fj, (i32 timm:$mask)),
(SLTU R0, (ANDI (MOVFR2GR_S (FCLASS_S FPR32:$fj)),
@@ -295,6 +307,14 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>;
// FP reciprocal operation
def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>;
+let Predicates = [HasBasicF, IsLA64] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w_la64 (i64 1065353216)), FPR32:$src),
+ (FRECIP_S $src)>;
+} // Predicates = [HasBasicF, IsLA64]
+let Predicates = [HasBasicF, IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w (i32 1065353216)), FPR32:$src),
+ (FRECIP_S $src)>;
+} // Predicates = [HasBasicF, IsLA32]
let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
@@ -350,6 +370,7 @@ def : PatFpr<frint, FRINT_S, FPR32>;
let Predicates = [HasBasicF, IsLA32] in {
// GPR -> FPR
def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>;
+def : Pat<(loongarch_movgr2fr_w (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>;
// FPR -> GPR
def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>;
// int -> f32
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 965ad8a0a35c6..daefbaa52d42a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -10,6 +10,21 @@
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// LoongArch specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_LoongArchMOVGR2FR_D
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisVT<1, i64>]>;
+def SDT_LoongArchMOVGR2FR_D_LO_HI
+ : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def loongarch_movgr2fr_d
+ : SDNode<"LoongArchISD::MOVGR2FR_D", SDT_LoongArchMOVGR2FR_D>;
+def loongarch_movgr2fr_d_lo_hi
+ : SDNode<"LoongArchISD::MOVGR2FR_D_LO_HI", SDT_LoongArchMOVGR2FR_D_LO_HI>;
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -147,6 +162,11 @@ def : PatFpr<fneg, FNEG_D, FPR64>;
def : PatFpr<fabs, FABS_D, FPR64>;
def : PatFpr<fsqrt, FSQRT_D, FPR64>;
def : Pat<(fdiv fpimm1, (fsqrt FPR64:$fj)), (FRSQRT_D FPR64:$fj)>;
+let Predicates = [IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_d_lo_hi (i32 0), (i32 1072693248)),
+ (fsqrt FPR64:$fj)),
+ (FRSQRT_D FPR64:$fj)>;
+} // Predicates = [IsLA32]
def : Pat<(fcopysign FPR64:$fj, FPR32:$fk),
(FCOPYSIGN_D FPR64:$fj, (FCVT_D_S FPR32:$fk))>;
def : Pat<(fcopysign FPR32:$fj, FPR64:$fk),
@@ -252,6 +272,10 @@ def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>;
// FP reciprocal operation
def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>;
+let Predicates = [IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_d_lo_hi (i32 0), (i32 1072693248)), FPR64:$src),
+ (FRECIP_D FPR64:$src)>;
+} // Predicates = [IsLA32]
let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
@@ -307,9 +331,13 @@ def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>;
def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>;
+def : Pat<(loongarch_movgr2fr_d GPR:$src), (MOVGR2FR_D GPR:$src)>;
} // Predicates = [HasBasicD, IsLA64]
let Predicates = [HasBasicD, IsLA32] in {
def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>;
+
+def : Pat<(f64 (loongarch_movgr2fr_d_lo_hi (i32 GPR:$lo), (i32 GPR:$hi))),
+ (MOVGR2FRH_W (MOVGR2FR_W_64 GPR:$lo), GPR:$hi)>;
} // Predicates = [HasBasicD, IsLA32]
// Convert FP to int
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..2f10dc9704445 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -190,6 +190,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f32, Legal);
@@ -237,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
@@ -549,10 +551,58 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
return lowerVECREDUCE(Op, DAG);
+ case ISD::ConstantFP:
+ return lowerConstantFP(Op, DAG);
}
return SDValue();
}
+SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ const APFloat &FPVal = CFP->getValueAPF();
+ SDLoc DL(CFP);
+
+ assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
+ (VT == MVT::f64 && Subtarget.hasBasicD()));
+
+ // If value is 0.0 or -0.0, just ignore it.
+ if (FPVal.isZero())
+ return SDValue();
+
+ // If lsx enabled, use cheaper 'vldi' instruction if possible.
+ if (Subtarget.hasExtLSX() && isFPImmVLDILegal(FPVal, VT))
+ return SDValue();
+
+ // Construct as integer, and move to float register.
+ APInt INTVal = FPVal.bitcastToAPInt();
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected floating point type!");
+ break;
+ case MVT::f32: {
+ SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
+ if (Subtarget.is64Bit())
+ NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
+ return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
+ : LoongArchISD::MOVGR2FR_W,
+ DL, VT, NewVal);
+ }
+ case MVT::f64: {
+ if (Subtarget.is64Bit()) {
+ SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
+ return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
+ }
+ SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
+ return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
// Lower vecreduce_add using vhaddw instructions.
// For Example:
// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
@@ -7041,7 +7091,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SRL_W)
NODE_NAME_CASE(BSTRINS)
NODE_NAME_CASE(BSTRPICK)
+ NODE_NAME_CASE(MOVGR2FR_W)
NODE_NAME_CASE(MOVGR2FR_W_LA64)
+ NODE_NAME_CASE(MOVGR2FR_D)
+ NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
NODE_NAME_CASE(MOVFR2GR_S_LA64)
NODE_NAME_CASE(FTINT)
NODE_NAME_CASE(BUILD_PAIR_F64)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9d14934a9d363..c98b29d400dd6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -57,7 +57,10 @@ enum NodeType : unsigned {
MOD_WU,
// FPR<->GPR transfer operations
+ MOVGR2FR_W,
MOVGR2FR_W_LA64,
+ MOVGR2FR_D,
+ MOVGR2FR_D_LO_HI,
MOVFR2GR_S_LA64,
MOVFCSR2GR,
MOVGR2FCSR,
@@ -397,6 +400,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerBF16_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
index da8c3e93f6842..d111cf2fcfc07 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
@@ -226,8 +226,8 @@ define i32 @caller_half_in_fregs() nounwind {
; LA32F-ILP32D: # %bb.0:
; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA32F-ILP32D-NEXT: lu12i.w $a0, -12
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a0
; LA32F-ILP32D-NEXT: ori $a0, $zero, 1
; LA32F-ILP32D-NEXT: ori $a1, $zero, 2
; LA32F-ILP32D-NEXT: ori $a2, $zero, 3
@@ -264,8 +264,8 @@ define i32 @caller_half_in_fregs() nounwind {
; LA32D-ILP32D: # %bb.0:
; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA32D-ILP32D-NEXT: lu12i.w $a0, -12
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a0
; LA32D-ILP32D-NEXT: ori $a0, $zero, 1
; LA32D-ILP32D-NEXT: ori $a1, $zero, 2
; LA32D-ILP32D-NEXT: ori $a2, $zero, 3
@@ -283,8 +283,9 @@ define i32 @caller_half_in_fregs() nounwind {
; LA64S: # %bb.0:
; LA64S-NEXT: addi.d $sp, $sp, -16
; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA64S-NEXT: lu12i.w $a0, -12
+; LA64S-NEXT: lu32i.d $a0, 0
+; LA64S-NEXT: movgr2fr.w $fa0, $a0
; LA64S-NEXT: ori $a0, $zero, 1
; LA64S-NEXT: ori $a1, $zero, 2
; LA64S-NEXT: ori $a2, $zero, 3
@@ -324,8 +325,9 @@ define i32 @caller_half_in_fregs() nounwind {
; LA64F-LP64D: # %bb.0:
; LA64F-LP64D-NEXT: addi.d $sp, $sp, -16
; LA64F-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA64F-LP64D-NEXT: lu12i.w $a0, -12
+; LA64F-LP64D-NEXT: lu32i.d $a0, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a0
; LA64F-LP64D-NEXT: ori $a0, $zero, 1
; LA64F-LP64D-NEXT: ori $a1, $zero, 2
; LA64F-LP64D-NEXT: ori $a2, $zero, 3
@@ -365,8 +367,9 @@ define i32 @caller_half_in_fregs() nounwind {
; LA64D-LP64D: # %bb.0:
; LA64D-LP64D-NEXT: addi.d $sp, $sp, -16
; LA64D-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA64D-LP64D-NEXT: lu12i.w $a0, -12
+; LA64D-LP64D-NEXT: lu32i.d $a0, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a0
; LA64D-LP64D-NEXT: ori $a0, $zero, 1
; LA64D-LP64D-NEXT: ori $a1, $zero, 2
; LA64D-LP64D-NEXT: ori $a2, $zero, 3
@@ -606,24 +609,24 @@ define i32 @caller_half_in_gregs() nounwind {
; LA32F-ILP32D: # %bb.0:
; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA32F-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA32F-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_2)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA32F-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_3)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA32F-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_4)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA32F-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_5)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA32F-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_6)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA32F-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_7)
-; LA32F-ILP32D-NEXT: lu12i.w $a0, -12
-; LA32F-ILP32D-NEXT: ori $a0, $a0, 2176
+; LA32F-ILP32D-NEXT: lu12i.w $a1, -12
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa1, $a1
+; LA32F-ILP32D-NEXT: ori $a0, $a1, 2176
+; LA32F-ILP32D-NEXT: lu12i.w $a2, -13
+; LA32F-ILP32D-NEXT: ori $a2, $a2, 3072
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 512
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa2, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1024
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1280
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa4, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1536
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa5, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1792
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa6, $a2
+; LA32F-ILP32D-NEXT: ori $a1, $a1, 2048
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa7, $a1
; LA32F-ILP32D-NEXT: ori $a1, $zero, 10
; LA32F-ILP32D-NEXT: bl callee_half_in_gregs
; LA32F-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -656,24 +659,24 @@ define i32 @caller_half_in_gregs() nounwind {
; LA32D-ILP32D: # %bb.0:
; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA32D-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA32D-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_2)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA32D-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_3)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA32D-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_4)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA32D-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_5)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA32D-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_6)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA32D-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_7)
-; LA32D-ILP32D-NEXT: lu12i.w $a0, -12
-; LA32D-ILP32D-NEXT: ori $a0, $a0, 2176
+; LA32D-ILP32D-NEXT: lu12i.w $a1, -12
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa1, $a1
+; LA32D-ILP32D-NEXT: ori $a0, $a1, 2176
+; LA32D-ILP32D-NEXT: lu12i.w $a2, -13
+; LA32D-ILP32D-NEXT: ori $a2, $a2, 3072
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 512
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa2, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1024
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa3, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1280
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa4, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1536
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa5, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1792
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa6, $a2
+; LA32D-ILP32D-NEXT: ori $a1, $a1, 2048
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa7, $a1
; LA32D-ILP32D-NEXT: ori $a1, $zero, 10
; LA32D-ILP32D-NEXT: bl callee_half_in_gregs
; LA32D-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -684,25 +687,33 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64S: # %bb.0:
; LA64S-NEXT: addi.d $sp, $sp, -16
; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64S-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA64S-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA64S-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA64S-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA64S-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA64S-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8)
-; LA64S-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8)
-; LA64S-NEXT: movfr2gr.s $a0, $ft0
+; LA64S-NEXT: lu12i.w $a1, -12
+; LA64S-NEXT: ori $a0, $a1, 2176
+; LA64S-NEXT: ori $a2, $a1, 512
+; LA64S-NEXT: ori $a3, $a1, 1024
+; LA64S-NEXT: ori $a4, $a1, 1280
+; LA64S-NEXT: ori $a5, $a1, 1536
+; LA64S-NEXT: ori $a6, $a1, 1792
+; LA64S-NEXT: ori $a7, $a1, 2048
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa1, $a1
+; LA64S-NEXT: lu12i.w $a1, -13
+; LA64S-NEXT: ori $a1, $a1, 3072
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa0, $a1
+; LA64S-NEXT: lu32i.d $a2, 0
+; LA64S-NEXT: movgr2fr.w $fa2, $a2
+; LA64S-NEXT: lu32i.d $a3, 0
+; LA64S-NEXT: movgr2fr.w $fa3, $a3
+; LA64S-NEXT: lu32i.d $a4, 0
+; LA64S-NEXT: movgr2fr.w $fa4, $a4
+; LA64S-NEXT: lu32i.d $a5, 0
+; LA64S-NEXT: movgr2fr.w $fa5, $a5
+; LA64S-NEXT: lu32i.d $a0, 0
+; LA64S-NEXT: lu32i.d $a6, 0
+; LA64S-NEXT: movgr2fr.w $fa6, $a6
+; LA64S-NEXT: lu32i.d $a7, 0
+; LA64S-NEXT: movgr2fr.w $fa7, $a7
; LA64S-NEXT: ori $a1, $zero, 10
; LA64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs)
; LA64S-NEXT: jirl $ra, $ra, 0
@@ -714,35 +725,27 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64F-LP64S: # %bb.0:
; LA64F-LP64S-NEXT: addi.d $sp, $sp, -32
; LA64F-LP64S-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/158050
More information about the llvm-commits
mailing list