[llvm] [LoongArch] Custom legalizing ConstantFP to avoid float loads (PR #158050)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 12 05:35:31 PDT 2025
https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/158050
>From ea1337e3d361f5dc491cdc2979b8fbac75b1ad66 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Thu, 11 Sep 2025 16:56:35 +0800
Subject: [PATCH 1/3] [LoongArch] Custom legalizing ConstantFP to avoid float
loads
TODO: `frecip` and `frsqrt` cannot match.
---
.../LoongArch/LoongArchFloat32InstrInfo.td | 5 +
.../LoongArch/LoongArchFloat64InstrInfo.td | 19 +
.../LoongArch/LoongArchISelLowering.cpp | 53 +
.../Target/LoongArch/LoongArchISelLowering.h | 4 +
.../CodeGen/LoongArch/calling-conv-half.ll | 553 ++++-----
.../CodeGen/LoongArch/calling-conv-ilp32d.ll | 91 +-
llvm/test/CodeGen/LoongArch/double-imm.ll | 29 +-
llvm/test/CodeGen/LoongArch/float-imm.ll | 22 +-
llvm/test/CodeGen/LoongArch/fp-reciprocal.ll | 17 +-
.../LoongArch/fsqrt-reciprocal-estimate.ll | 1018 +++++++++--------
llvm/test/CodeGen/LoongArch/fsqrt.ll | 21 +-
.../LoongArch/ir-instruction/atomicrmw-fp.ll | 70 +-
.../ir-instruction/double-convert.ll | 20 +-
.../LoongArch/ir-instruction/float-convert.ll | 24 +-
.../target-abi-from-triple-edge-cases.ll | 23 +-
.../LoongArch/target-abi-from-triple.ll | 10 +-
llvm/test/CodeGen/LoongArch/vector-fp-imm.ll | 453 ++++----
17 files changed, 1298 insertions(+), 1134 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index c45975431d833..cd6bb40c880f1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -17,6 +17,8 @@ def NotBoolXor : PatFrags<(ops node:$val),
// LoongArch specific DAG Nodes.
//===----------------------------------------------------------------------===//
+def SDT_LoongArchMOVGR2FR_W
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
def SDT_LoongArchMOVGR2FR_W_LA64
: SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
def SDT_LoongArchMOVFR2GR_S_LA64
@@ -28,6 +30,8 @@ def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
// comparisons to prevent recursive lowering.
def loongarch_brcond : SDNode<"LoongArchISD::BRCOND", SDTBrcond, [SDNPHasChain]>;
+def loongarch_movgr2fr_w
+ : SDNode<"LoongArchISD::MOVGR2FR_W", SDT_LoongArchMOVGR2FR_W>;
def loongarch_movgr2fr_w_la64
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
def loongarch_movfr2gr_s_la64
@@ -350,6 +354,7 @@ def : PatFpr<frint, FRINT_S, FPR32>;
let Predicates = [HasBasicF, IsLA32] in {
// GPR -> FPR
def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>;
+def : Pat<(loongarch_movgr2fr_w (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>;
// FPR -> GPR
def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>;
// int -> f32
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 965ad8a0a35c6..c5fb7aeb9ed85 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -10,6 +10,21 @@
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// LoongArch specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_LoongArchMOVGR2FR_D
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisVT<1, i64>]>;
+def SDT_LoongArchMOVGR2FR_D_LO_HI
+ : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def loongarch_movgr2fr_d
+ : SDNode<"LoongArchISD::MOVGR2FR_D", SDT_LoongArchMOVGR2FR_D>;
+def loongarch_movgr2fr_d_lo_hi
+ : SDNode<"LoongArchISD::MOVGR2FR_D_LO_HI", SDT_LoongArchMOVGR2FR_D_LO_HI>;
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -307,9 +322,13 @@ def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>;
def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>;
+def : Pat<(loongarch_movgr2fr_d GPR:$src), (MOVGR2FR_D GPR:$src)>;
} // Predicates = [HasBasicD, IsLA64]
let Predicates = [HasBasicD, IsLA32] in {
def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>;
+
+def : Pat<(f64 (loongarch_movgr2fr_d_lo_hi (i32 GPR:$lo), (i32 GPR:$hi))),
+ (MOVGR2FRH_W (MOVGR2FR_W_64 GPR:$lo), GPR:$hi)>;
} // Predicates = [HasBasicD, IsLA32]
// Convert FP to int
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..2f10dc9704445 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -190,6 +190,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f32, Legal);
@@ -237,6 +238,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
@@ -549,10 +551,58 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
return lowerVECREDUCE(Op, DAG);
+ case ISD::ConstantFP:
+ return lowerConstantFP(Op, DAG);
}
return SDValue();
}
+SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ const APFloat &FPVal = CFP->getValueAPF();
+ SDLoc DL(CFP);
+
+ assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
+ (VT == MVT::f64 && Subtarget.hasBasicD()));
+
+ // If value is 0.0 or -0.0, just ignore it.
+ if (FPVal.isZero())
+ return SDValue();
+
+ // If lsx enabled, use cheaper 'vldi' instruction if possible.
+ if (Subtarget.hasExtLSX() && isFPImmVLDILegal(FPVal, VT))
+ return SDValue();
+
+ // Construct as integer, and move to float register.
+ APInt INTVal = FPVal.bitcastToAPInt();
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected floating point type!");
+ break;
+ case MVT::f32: {
+ SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
+ if (Subtarget.is64Bit())
+ NewVal = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, NewVal);
+ return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
+ : LoongArchISD::MOVGR2FR_W,
+ DL, VT, NewVal);
+ }
+ case MVT::f64: {
+ if (Subtarget.is64Bit()) {
+ SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
+ return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
+ }
+ SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
+ return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
// Lower vecreduce_add using vhaddw instructions.
// For Example:
// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
@@ -7041,7 +7091,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SRL_W)
NODE_NAME_CASE(BSTRINS)
NODE_NAME_CASE(BSTRPICK)
+ NODE_NAME_CASE(MOVGR2FR_W)
NODE_NAME_CASE(MOVGR2FR_W_LA64)
+ NODE_NAME_CASE(MOVGR2FR_D)
+ NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
NODE_NAME_CASE(MOVFR2GR_S_LA64)
NODE_NAME_CASE(FTINT)
NODE_NAME_CASE(BUILD_PAIR_F64)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9d14934a9d363..c98b29d400dd6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -57,7 +57,10 @@ enum NodeType : unsigned {
MOD_WU,
// FPR<->GPR transfer operations
+ MOVGR2FR_W,
MOVGR2FR_W_LA64,
+ MOVGR2FR_D,
+ MOVGR2FR_D_LO_HI,
MOVFR2GR_S_LA64,
MOVFCSR2GR,
MOVGR2FCSR,
@@ -397,6 +400,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerBF16_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
index da8c3e93f6842..d111cf2fcfc07 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
@@ -226,8 +226,8 @@ define i32 @caller_half_in_fregs() nounwind {
; LA32F-ILP32D: # %bb.0:
; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA32F-ILP32D-NEXT: lu12i.w $a0, -12
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a0
; LA32F-ILP32D-NEXT: ori $a0, $zero, 1
; LA32F-ILP32D-NEXT: ori $a1, $zero, 2
; LA32F-ILP32D-NEXT: ori $a2, $zero, 3
@@ -264,8 +264,8 @@ define i32 @caller_half_in_fregs() nounwind {
; LA32D-ILP32D: # %bb.0:
; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA32D-ILP32D-NEXT: lu12i.w $a0, -12
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a0
; LA32D-ILP32D-NEXT: ori $a0, $zero, 1
; LA32D-ILP32D-NEXT: ori $a1, $zero, 2
; LA32D-ILP32D-NEXT: ori $a2, $zero, 3
@@ -283,8 +283,9 @@ define i32 @caller_half_in_fregs() nounwind {
; LA64S: # %bb.0:
; LA64S-NEXT: addi.d $sp, $sp, -16
; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA64S-NEXT: lu12i.w $a0, -12
+; LA64S-NEXT: lu32i.d $a0, 0
+; LA64S-NEXT: movgr2fr.w $fa0, $a0
; LA64S-NEXT: ori $a0, $zero, 1
; LA64S-NEXT: ori $a1, $zero, 2
; LA64S-NEXT: ori $a2, $zero, 3
@@ -324,8 +325,9 @@ define i32 @caller_half_in_fregs() nounwind {
; LA64F-LP64D: # %bb.0:
; LA64F-LP64D-NEXT: addi.d $sp, $sp, -16
; LA64F-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA64F-LP64D-NEXT: lu12i.w $a0, -12
+; LA64F-LP64D-NEXT: lu32i.d $a0, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a0
; LA64F-LP64D-NEXT: ori $a0, $zero, 1
; LA64F-LP64D-NEXT: ori $a1, $zero, 2
; LA64F-LP64D-NEXT: ori $a2, $zero, 3
@@ -365,8 +367,9 @@ define i32 @caller_half_in_fregs() nounwind {
; LA64D-LP64D: # %bb.0:
; LA64D-LP64D-NEXT: addi.d $sp, $sp, -16
; LA64D-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI1_0)
+; LA64D-LP64D-NEXT: lu12i.w $a0, -12
+; LA64D-LP64D-NEXT: lu32i.d $a0, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a0
; LA64D-LP64D-NEXT: ori $a0, $zero, 1
; LA64D-LP64D-NEXT: ori $a1, $zero, 2
; LA64D-LP64D-NEXT: ori $a2, $zero, 3
@@ -606,24 +609,24 @@ define i32 @caller_half_in_gregs() nounwind {
; LA32F-ILP32D: # %bb.0:
; LA32F-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA32F-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA32F-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_2)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA32F-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_3)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA32F-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_4)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA32F-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_5)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA32F-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_6)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA32F-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_7)
-; LA32F-ILP32D-NEXT: lu12i.w $a0, -12
-; LA32F-ILP32D-NEXT: ori $a0, $a0, 2176
+; LA32F-ILP32D-NEXT: lu12i.w $a1, -12
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa1, $a1
+; LA32F-ILP32D-NEXT: ori $a0, $a1, 2176
+; LA32F-ILP32D-NEXT: lu12i.w $a2, -13
+; LA32F-ILP32D-NEXT: ori $a2, $a2, 3072
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 512
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa2, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1024
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1280
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa4, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1536
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa5, $a2
+; LA32F-ILP32D-NEXT: ori $a2, $a1, 1792
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa6, $a2
+; LA32F-ILP32D-NEXT: ori $a1, $a1, 2048
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa7, $a1
; LA32F-ILP32D-NEXT: ori $a1, $zero, 10
; LA32F-ILP32D-NEXT: bl callee_half_in_gregs
; LA32F-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -656,24 +659,24 @@ define i32 @caller_half_in_gregs() nounwind {
; LA32D-ILP32D: # %bb.0:
; LA32D-ILP32D-NEXT: addi.w $sp, $sp, -16
; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA32D-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA32D-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_2)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA32D-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_3)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA32D-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_4)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA32D-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_5)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA32D-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_6)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA32D-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_7)
-; LA32D-ILP32D-NEXT: lu12i.w $a0, -12
-; LA32D-ILP32D-NEXT: ori $a0, $a0, 2176
+; LA32D-ILP32D-NEXT: lu12i.w $a1, -12
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa1, $a1
+; LA32D-ILP32D-NEXT: ori $a0, $a1, 2176
+; LA32D-ILP32D-NEXT: lu12i.w $a2, -13
+; LA32D-ILP32D-NEXT: ori $a2, $a2, 3072
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 512
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa2, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1024
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa3, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1280
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa4, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1536
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa5, $a2
+; LA32D-ILP32D-NEXT: ori $a2, $a1, 1792
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa6, $a2
+; LA32D-ILP32D-NEXT: ori $a1, $a1, 2048
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa7, $a1
; LA32D-ILP32D-NEXT: ori $a1, $zero, 10
; LA32D-ILP32D-NEXT: bl callee_half_in_gregs
; LA32D-ILP32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -684,25 +687,33 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64S: # %bb.0:
; LA64S-NEXT: addi.d $sp, $sp, -16
; LA64S-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64S-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA64S-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA64S-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA64S-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA64S-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA64S-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8)
-; LA64S-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8)
-; LA64S-NEXT: movfr2gr.s $a0, $ft0
+; LA64S-NEXT: lu12i.w $a1, -12
+; LA64S-NEXT: ori $a0, $a1, 2176
+; LA64S-NEXT: ori $a2, $a1, 512
+; LA64S-NEXT: ori $a3, $a1, 1024
+; LA64S-NEXT: ori $a4, $a1, 1280
+; LA64S-NEXT: ori $a5, $a1, 1536
+; LA64S-NEXT: ori $a6, $a1, 1792
+; LA64S-NEXT: ori $a7, $a1, 2048
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa1, $a1
+; LA64S-NEXT: lu12i.w $a1, -13
+; LA64S-NEXT: ori $a1, $a1, 3072
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa0, $a1
+; LA64S-NEXT: lu32i.d $a2, 0
+; LA64S-NEXT: movgr2fr.w $fa2, $a2
+; LA64S-NEXT: lu32i.d $a3, 0
+; LA64S-NEXT: movgr2fr.w $fa3, $a3
+; LA64S-NEXT: lu32i.d $a4, 0
+; LA64S-NEXT: movgr2fr.w $fa4, $a4
+; LA64S-NEXT: lu32i.d $a5, 0
+; LA64S-NEXT: movgr2fr.w $fa5, $a5
+; LA64S-NEXT: lu32i.d $a0, 0
+; LA64S-NEXT: lu32i.d $a6, 0
+; LA64S-NEXT: movgr2fr.w $fa6, $a6
+; LA64S-NEXT: lu32i.d $a7, 0
+; LA64S-NEXT: movgr2fr.w $fa7, $a7
; LA64S-NEXT: ori $a1, $zero, 10
; LA64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs)
; LA64S-NEXT: jirl $ra, $ra, 0
@@ -714,35 +725,27 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64F-LP64S: # %bb.0:
; LA64F-LP64S-NEXT: addi.d $sp, $sp, -32
; LA64F-LP64S-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0)
-; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA64F-LP64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1)
-; LA64F-LP64S-NEXT: movfr2gr.s $a0, $fa0
-; LA64F-LP64S-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_2)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a1, %pc_lo12(.LCPI3_2)
-; LA64F-LP64S-NEXT: movfr2gr.s $a1, $fa1
-; LA64F-LP64S-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_3)
-; LA64F-LP64S-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_3)
-; LA64F-LP64S-NEXT: movfr2gr.s $a2, $fa0
-; LA64F-LP64S-NEXT: pcalau12i $a3, %pc_hi20(.LCPI3_4)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a3, %pc_lo12(.LCPI3_4)
-; LA64F-LP64S-NEXT: movfr2gr.s $a3, $fa1
-; LA64F-LP64S-NEXT: pcalau12i $a4, %pc_hi20(.LCPI3_5)
-; LA64F-LP64S-NEXT: fld.s $fa1, $a4, %pc_lo12(.LCPI3_5)
-; LA64F-LP64S-NEXT: movfr2gr.s $a4, $fa0
-; LA64F-LP64S-NEXT: pcalau12i $a5, %pc_hi20(.LCPI3_6)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a5, %pc_lo12(.LCPI3_6)
-; LA64F-LP64S-NEXT: movfr2gr.s $a5, $fa1
-; LA64F-LP64S-NEXT: ori $a6, $zero, 10
-; LA64F-LP64S-NEXT: st.d $a6, $sp, 8
-; LA64F-LP64S-NEXT: movfr2gr.s $a6, $fa0
-; LA64F-LP64S-NEXT: pcalau12i $a7, %pc_hi20(.LCPI3_7)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a7, %pc_lo12(.LCPI3_7)
-; LA64F-LP64S-NEXT: lu12i.w $a7, -12
-; LA64F-LP64S-NEXT: ori $t0, $a7, 2176
+; LA64F-LP64S-NEXT: ori $a0, $zero, 10
+; LA64F-LP64S-NEXT: st.d $a0, $sp, 8
+; LA64F-LP64S-NEXT: lu12i.w $a1, -12
+; LA64F-LP64S-NEXT: ori $t0, $a1, 2176
; LA64F-LP64S-NEXT: lu32i.d $t0, 0
-; LA64F-LP64S-NEXT: movfr2gr.s $a7, $fa0
+; LA64F-LP64S-NEXT: ori $a2, $a1, 512
+; LA64F-LP64S-NEXT: ori $a3, $a1, 1024
+; LA64F-LP64S-NEXT: ori $a4, $a1, 1280
+; LA64F-LP64S-NEXT: ori $a5, $a1, 1536
+; LA64F-LP64S-NEXT: ori $a6, $a1, 1792
+; LA64F-LP64S-NEXT: ori $a7, $a1, 2048
+; LA64F-LP64S-NEXT: lu32i.d $a1, 0
+; LA64F-LP64S-NEXT: lu12i.w $a0, -13
+; LA64F-LP64S-NEXT: ori $a0, $a0, 3072
+; LA64F-LP64S-NEXT: lu32i.d $a0, 0
+; LA64F-LP64S-NEXT: lu32i.d $a2, 0
+; LA64F-LP64S-NEXT: lu32i.d $a3, 0
+; LA64F-LP64S-NEXT: lu32i.d $a4, 0
+; LA64F-LP64S-NEXT: lu32i.d $a5, 0
+; LA64F-LP64S-NEXT: lu32i.d $a6, 0
+; LA64F-LP64S-NEXT: lu32i.d $a7, 0
; LA64F-LP64S-NEXT: st.w $t0, $sp, 0
; LA64F-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs)
; LA64F-LP64S-NEXT: jirl $ra, $ra, 0
@@ -754,25 +757,33 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64F-LP64D: # %bb.0:
; LA64F-LP64D-NEXT: addi.d $sp, $sp, -16
; LA64F-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64F-LP64D-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA64F-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA64F-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA64F-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA64F-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA64F-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA64F-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8)
-; LA64F-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8)
-; LA64F-LP64D-NEXT: movfr2gr.s $a0, $ft0
+; LA64F-LP64D-NEXT: lu12i.w $a1, -12
+; LA64F-LP64D-NEXT: ori $a0, $a1, 2176
+; LA64F-LP64D-NEXT: ori $a2, $a1, 512
+; LA64F-LP64D-NEXT: ori $a3, $a1, 1024
+; LA64F-LP64D-NEXT: ori $a4, $a1, 1280
+; LA64F-LP64D-NEXT: ori $a5, $a1, 1536
+; LA64F-LP64D-NEXT: ori $a6, $a1, 1792
+; LA64F-LP64D-NEXT: ori $a7, $a1, 2048
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa1, $a1
+; LA64F-LP64D-NEXT: lu12i.w $a1, -13
+; LA64F-LP64D-NEXT: ori $a1, $a1, 3072
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a1
+; LA64F-LP64D-NEXT: lu32i.d $a2, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa2, $a2
+; LA64F-LP64D-NEXT: lu32i.d $a3, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa3, $a3
+; LA64F-LP64D-NEXT: lu32i.d $a4, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa4, $a4
+; LA64F-LP64D-NEXT: lu32i.d $a5, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa5, $a5
+; LA64F-LP64D-NEXT: lu32i.d $a0, 0
+; LA64F-LP64D-NEXT: lu32i.d $a6, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa6, $a6
+; LA64F-LP64D-NEXT: lu32i.d $a7, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa7, $a7
; LA64F-LP64D-NEXT: ori $a1, $zero, 10
; LA64F-LP64D-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs)
; LA64F-LP64D-NEXT: jirl $ra, $ra, 0
@@ -784,35 +795,27 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64D-LP64S: # %bb.0:
; LA64D-LP64S-NEXT: addi.d $sp, $sp, -32
; LA64D-LP64S-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64D-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64D-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_0)
-; LA64D-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA64D-LP64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_1)
-; LA64D-LP64S-NEXT: movfr2gr.s $a0, $fa0
-; LA64D-LP64S-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_2)
-; LA64D-LP64S-NEXT: fld.s $fa0, $a1, %pc_lo12(.LCPI3_2)
-; LA64D-LP64S-NEXT: movfr2gr.s $a1, $fa1
-; LA64D-LP64S-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_3)
-; LA64D-LP64S-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_3)
-; LA64D-LP64S-NEXT: movfr2gr.s $a2, $fa0
-; LA64D-LP64S-NEXT: pcalau12i $a3, %pc_hi20(.LCPI3_4)
-; LA64D-LP64S-NEXT: fld.s $fa0, $a3, %pc_lo12(.LCPI3_4)
-; LA64D-LP64S-NEXT: movfr2gr.s $a3, $fa1
-; LA64D-LP64S-NEXT: pcalau12i $a4, %pc_hi20(.LCPI3_5)
-; LA64D-LP64S-NEXT: fld.s $fa1, $a4, %pc_lo12(.LCPI3_5)
-; LA64D-LP64S-NEXT: movfr2gr.s $a4, $fa0
-; LA64D-LP64S-NEXT: pcalau12i $a5, %pc_hi20(.LCPI3_6)
-; LA64D-LP64S-NEXT: fld.s $fa0, $a5, %pc_lo12(.LCPI3_6)
-; LA64D-LP64S-NEXT: movfr2gr.s $a5, $fa1
-; LA64D-LP64S-NEXT: ori $a6, $zero, 10
-; LA64D-LP64S-NEXT: st.d $a6, $sp, 8
-; LA64D-LP64S-NEXT: movfr2gr.s $a6, $fa0
-; LA64D-LP64S-NEXT: pcalau12i $a7, %pc_hi20(.LCPI3_7)
-; LA64D-LP64S-NEXT: fld.s $fa0, $a7, %pc_lo12(.LCPI3_7)
-; LA64D-LP64S-NEXT: lu12i.w $a7, -12
-; LA64D-LP64S-NEXT: ori $t0, $a7, 2176
+; LA64D-LP64S-NEXT: ori $a0, $zero, 10
+; LA64D-LP64S-NEXT: st.d $a0, $sp, 8
+; LA64D-LP64S-NEXT: lu12i.w $a1, -12
+; LA64D-LP64S-NEXT: ori $t0, $a1, 2176
; LA64D-LP64S-NEXT: lu32i.d $t0, 0
-; LA64D-LP64S-NEXT: movfr2gr.s $a7, $fa0
+; LA64D-LP64S-NEXT: ori $a2, $a1, 512
+; LA64D-LP64S-NEXT: ori $a3, $a1, 1024
+; LA64D-LP64S-NEXT: ori $a4, $a1, 1280
+; LA64D-LP64S-NEXT: ori $a5, $a1, 1536
+; LA64D-LP64S-NEXT: ori $a6, $a1, 1792
+; LA64D-LP64S-NEXT: ori $a7, $a1, 2048
+; LA64D-LP64S-NEXT: lu32i.d $a1, 0
+; LA64D-LP64S-NEXT: lu12i.w $a0, -13
+; LA64D-LP64S-NEXT: ori $a0, $a0, 3072
+; LA64D-LP64S-NEXT: lu32i.d $a0, 0
+; LA64D-LP64S-NEXT: lu32i.d $a2, 0
+; LA64D-LP64S-NEXT: lu32i.d $a3, 0
+; LA64D-LP64S-NEXT: lu32i.d $a4, 0
+; LA64D-LP64S-NEXT: lu32i.d $a5, 0
+; LA64D-LP64S-NEXT: lu32i.d $a6, 0
+; LA64D-LP64S-NEXT: lu32i.d $a7, 0
; LA64D-LP64S-NEXT: st.w $t0, $sp, 0
; LA64D-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs)
; LA64D-LP64S-NEXT: jirl $ra, $ra, 0
@@ -824,25 +827,33 @@ define i32 @caller_half_in_gregs() nounwind {
; LA64D-LP64D: # %bb.0:
; LA64D-LP64D-NEXT: addi.d $sp, $sp, -16
; LA64D-LP64D-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; LA64D-LP64D-NEXT: fld.s $ft0, $a0, %pc_lo12(.LCPI3_0)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI3_1)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; LA64D-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI3_2)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; LA64D-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI3_3)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; LA64D-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI3_4)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; LA64D-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI3_5)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; LA64D-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI3_6)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_7)
-; LA64D-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI3_7)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_8)
-; LA64D-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI3_8)
-; LA64D-LP64D-NEXT: movfr2gr.s $a0, $ft0
+; LA64D-LP64D-NEXT: lu12i.w $a1, -12
+; LA64D-LP64D-NEXT: ori $a0, $a1, 2176
+; LA64D-LP64D-NEXT: ori $a2, $a1, 512
+; LA64D-LP64D-NEXT: ori $a3, $a1, 1024
+; LA64D-LP64D-NEXT: ori $a4, $a1, 1280
+; LA64D-LP64D-NEXT: ori $a5, $a1, 1536
+; LA64D-LP64D-NEXT: ori $a6, $a1, 1792
+; LA64D-LP64D-NEXT: ori $a7, $a1, 2048
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa1, $a1
+; LA64D-LP64D-NEXT: lu12i.w $a1, -13
+; LA64D-LP64D-NEXT: ori $a1, $a1, 3072
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a1
+; LA64D-LP64D-NEXT: lu32i.d $a2, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa2, $a2
+; LA64D-LP64D-NEXT: lu32i.d $a3, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa3, $a3
+; LA64D-LP64D-NEXT: lu32i.d $a4, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa4, $a4
+; LA64D-LP64D-NEXT: lu32i.d $a5, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa5, $a5
+; LA64D-LP64D-NEXT: lu32i.d $a0, 0
+; LA64D-LP64D-NEXT: lu32i.d $a6, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa6, $a6
+; LA64D-LP64D-NEXT: lu32i.d $a7, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa7, $a7
; LA64D-LP64D-NEXT: ori $a1, $zero, 10
; LA64D-LP64D-NEXT: pcaddu18i $ra, %call36(callee_half_in_gregs)
; LA64D-LP64D-NEXT: jirl $ra, $ra, 0
@@ -1110,22 +1121,22 @@ define i32 @caller_half_on_stack() nounwind {
; LA32F-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-ILP32D-NEXT: lu12i.w $a0, -12
; LA32F-ILP32D-NEXT: ori $t0, $a0, 3200
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA32F-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA32F-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
-; LA32F-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
-; LA32F-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
-; LA32F-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
-; LA32F-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6)
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7)
-; LA32F-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7)
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 2304
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a1
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 2432
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa1, $a1
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 2560
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa2, $a1
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 2688
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa3, $a1
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 2816
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa4, $a1
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 2944
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa5, $a1
+; LA32F-ILP32D-NEXT: ori $a1, $a0, 3072
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa6, $a1
+; LA32F-ILP32D-NEXT: ori $a0, $a0, 3136
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa7, $a0
; LA32F-ILP32D-NEXT: ori $a0, $zero, 1
; LA32F-ILP32D-NEXT: ori $a1, $zero, 2
; LA32F-ILP32D-NEXT: ori $a2, $zero, 3
@@ -1182,22 +1193,22 @@ define i32 @caller_half_on_stack() nounwind {
; LA32D-ILP32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32D-ILP32D-NEXT: lu12i.w $a0, -12
; LA32D-ILP32D-NEXT: ori $t0, $a0, 3200
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA32D-ILP32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA32D-ILP32D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
-; LA32D-ILP32D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
-; LA32D-ILP32D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
-; LA32D-ILP32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
-; LA32D-ILP32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6)
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7)
-; LA32D-ILP32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7)
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 2304
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a1
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 2432
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa1, $a1
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 2560
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa2, $a1
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 2688
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa3, $a1
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 2816
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa4, $a1
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 2944
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa5, $a1
+; LA32D-ILP32D-NEXT: ori $a1, $a0, 3072
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa6, $a1
+; LA32D-ILP32D-NEXT: ori $a0, $a0, 3136
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa7, $a0
; LA32D-ILP32D-NEXT: ori $a0, $zero, 1
; LA32D-ILP32D-NEXT: ori $a1, $zero, 2
; LA32D-ILP32D-NEXT: ori $a2, $zero, 3
@@ -1219,22 +1230,30 @@ define i32 @caller_half_on_stack() nounwind {
; LA64S-NEXT: lu12i.w $a0, -12
; LA64S-NEXT: ori $t0, $a0, 3200
; LA64S-NEXT: lu32i.d $t0, 0
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA64S-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA64S-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
-; LA64S-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
-; LA64S-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
-; LA64S-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
-; LA64S-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6)
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7)
-; LA64S-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7)
+; LA64S-NEXT: ori $a1, $a0, 2304
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa0, $a1
+; LA64S-NEXT: ori $a1, $a0, 2432
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa1, $a1
+; LA64S-NEXT: ori $a1, $a0, 2560
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa2, $a1
+; LA64S-NEXT: ori $a1, $a0, 2688
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa3, $a1
+; LA64S-NEXT: ori $a1, $a0, 2816
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa4, $a1
+; LA64S-NEXT: ori $a1, $a0, 2944
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa5, $a1
+; LA64S-NEXT: ori $a1, $a0, 3072
+; LA64S-NEXT: lu32i.d $a1, 0
+; LA64S-NEXT: movgr2fr.w $fa6, $a1
+; LA64S-NEXT: ori $a0, $a0, 3136
+; LA64S-NEXT: lu32i.d $a0, 0
+; LA64S-NEXT: movgr2fr.w $fa7, $a0
; LA64S-NEXT: ori $a0, $zero, 1
; LA64S-NEXT: ori $a1, $zero, 2
; LA64S-NEXT: ori $a2, $zero, 3
@@ -1303,22 +1322,30 @@ define i32 @caller_half_on_stack() nounwind {
; LA64F-LP64D-NEXT: lu12i.w $a0, -12
; LA64F-LP64D-NEXT: ori $t0, $a0, 3200
; LA64F-LP64D-NEXT: lu32i.d $t0, 0
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA64F-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA64F-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
-; LA64F-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
-; LA64F-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
-; LA64F-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
-; LA64F-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6)
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7)
-; LA64F-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7)
+; LA64F-LP64D-NEXT: ori $a1, $a0, 2304
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a1
+; LA64F-LP64D-NEXT: ori $a1, $a0, 2432
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa1, $a1
+; LA64F-LP64D-NEXT: ori $a1, $a0, 2560
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa2, $a1
+; LA64F-LP64D-NEXT: ori $a1, $a0, 2688
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa3, $a1
+; LA64F-LP64D-NEXT: ori $a1, $a0, 2816
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa4, $a1
+; LA64F-LP64D-NEXT: ori $a1, $a0, 2944
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa5, $a1
+; LA64F-LP64D-NEXT: ori $a1, $a0, 3072
+; LA64F-LP64D-NEXT: lu32i.d $a1, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa6, $a1
+; LA64F-LP64D-NEXT: ori $a0, $a0, 3136
+; LA64F-LP64D-NEXT: lu32i.d $a0, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa7, $a0
; LA64F-LP64D-NEXT: ori $a0, $zero, 1
; LA64F-LP64D-NEXT: ori $a1, $zero, 2
; LA64F-LP64D-NEXT: ori $a2, $zero, 3
@@ -1387,22 +1414,30 @@ define i32 @caller_half_on_stack() nounwind {
; LA64D-LP64D-NEXT: lu12i.w $a0, -12
; LA64D-LP64D-NEXT: ori $t0, $a0, 3200
; LA64D-LP64D-NEXT: lu32i.d $t0, 0
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI5_0)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA64D-LP64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI5_1)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA64D-LP64D-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI5_2)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
-; LA64D-LP64D-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI5_3)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
-; LA64D-LP64D-NEXT: fld.s $fa4, $a0, %pc_lo12(.LCPI5_4)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
-; LA64D-LP64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI5_5)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
-; LA64D-LP64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI5_6)
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_7)
-; LA64D-LP64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI5_7)
+; LA64D-LP64D-NEXT: ori $a1, $a0, 2304
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a1
+; LA64D-LP64D-NEXT: ori $a1, $a0, 2432
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa1, $a1
+; LA64D-LP64D-NEXT: ori $a1, $a0, 2560
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa2, $a1
+; LA64D-LP64D-NEXT: ori $a1, $a0, 2688
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa3, $a1
+; LA64D-LP64D-NEXT: ori $a1, $a0, 2816
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa4, $a1
+; LA64D-LP64D-NEXT: ori $a1, $a0, 2944
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa5, $a1
+; LA64D-LP64D-NEXT: ori $a1, $a0, 3072
+; LA64D-LP64D-NEXT: lu32i.d $a1, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa6, $a1
+; LA64D-LP64D-NEXT: ori $a0, $a0, 3136
+; LA64D-LP64D-NEXT: lu32i.d $a0, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa7, $a0
; LA64D-LP64D-NEXT: ori $a0, $zero, 1
; LA64D-LP64D-NEXT: ori $a1, $zero, 2
; LA64D-LP64D-NEXT: ori $a2, $zero, 3
@@ -1436,8 +1471,9 @@ define half @callee_half_ret() nounwind {
;
; LA32F-ILP32D-LABEL: callee_half_ret:
; LA32F-ILP32D: # %bb.0:
-; LA32F-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA32F-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
+; LA32F-ILP32D-NEXT: lu12i.w $a0, -13
+; LA32F-ILP32D-NEXT: ori $a0, $a0, 3072
+; LA32F-ILP32D-NEXT: movgr2fr.w $fa0, $a0
; LA32F-ILP32D-NEXT: ret
;
; LA32D-ILP32S-LABEL: callee_half_ret:
@@ -1448,40 +1484,47 @@ define half @callee_half_ret() nounwind {
;
; LA32D-ILP32D-LABEL: callee_half_ret:
; LA32D-ILP32D: # %bb.0:
-; LA32D-ILP32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA32D-ILP32D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
+; LA32D-ILP32D-NEXT: lu12i.w $a0, -13
+; LA32D-ILP32D-NEXT: ori $a0, $a0, 3072
+; LA32D-ILP32D-NEXT: movgr2fr.w $fa0, $a0
; LA32D-ILP32D-NEXT: ret
;
; LA64S-LABEL: callee_half_ret:
; LA64S: # %bb.0:
-; LA64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
+; LA64S-NEXT: lu12i.w $a0, -13
+; LA64S-NEXT: ori $a0, $a0, 3072
+; LA64S-NEXT: lu32i.d $a0, 0
+; LA64S-NEXT: movgr2fr.w $fa0, $a0
; LA64S-NEXT: ret
;
; LA64F-LP64S-LABEL: callee_half_ret:
; LA64F-LP64S: # %bb.0:
-; LA64F-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA64F-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
-; LA64F-LP64S-NEXT: movfr2gr.s $a0, $fa0
+; LA64F-LP64S-NEXT: lu12i.w $a0, -13
+; LA64F-LP64S-NEXT: ori $a0, $a0, 3072
+; LA64F-LP64S-NEXT: lu32i.d $a0, 0
; LA64F-LP64S-NEXT: ret
;
; LA64F-LP64D-LABEL: callee_half_ret:
; LA64F-LP64D: # %bb.0:
-; LA64F-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA64F-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
+; LA64F-LP64D-NEXT: lu12i.w $a0, -13
+; LA64F-LP64D-NEXT: ori $a0, $a0, 3072
+; LA64F-LP64D-NEXT: lu32i.d $a0, 0
+; LA64F-LP64D-NEXT: movgr2fr.w $fa0, $a0
; LA64F-LP64D-NEXT: ret
;
; LA64D-LP64S-LABEL: callee_half_ret:
; LA64D-LP64S: # %bb.0:
-; LA64D-LP64S-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA64D-LP64S-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
-; LA64D-LP64S-NEXT: movfr2gr.s $a0, $fa0
+; LA64D-LP64S-NEXT: lu12i.w $a0, -13
+; LA64D-LP64S-NEXT: ori $a0, $a0, 3072
+; LA64D-LP64S-NEXT: lu32i.d $a0, 0
; LA64D-LP64S-NEXT: ret
;
; LA64D-LP64D-LABEL: callee_half_ret:
; LA64D-LP64D: # %bb.0:
-; LA64D-LP64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA64D-LP64D-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI6_0)
+; LA64D-LP64D-NEXT: lu12i.w $a0, -13
+; LA64D-LP64D-NEXT: ori $a0, $a0, 3072
+; LA64D-LP64D-NEXT: lu32i.d $a0, 0
+; LA64D-LP64D-NEXT: movgr2fr.w $fa0, $a0
; LA64D-LP64D-NEXT: ret
ret half 1.0
}
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll
index 62c2cc999456c..0544d93f97300 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll
@@ -65,24 +65,30 @@ define i32 @caller_double_in_gpr_exhausted_fprs() nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi.w $sp, $sp, -16
; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
-; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_1)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
-; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI3_2)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
-; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_3)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
-; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_4)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
-; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_5)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
-; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI3_6)
-; CHECK-NEXT: addi.w $a0, $zero, 1
-; CHECK-NEXT: movgr2fr.w $fa0, $a0
-; CHECK-NEXT: ffint.s.w $fa0, $fa0
-; CHECK-NEXT: fcvt.d.s $fa0, $fa0
+; CHECK-NEXT: movgr2fr.w $fa7, $zero
+; CHECK-NEXT: lu12i.w $a0, 261888
+; CHECK-NEXT: fmov.d $fa0, $fa7
+; CHECK-NEXT: movgr2frh.w $fa0, $a0
+; CHECK-NEXT: lu12i.w $a0, 262144
+; CHECK-NEXT: fmov.d $fa1, $fa7
+; CHECK-NEXT: movgr2frh.w $fa1, $a0
+; CHECK-NEXT: lu12i.w $a0, 262272
+; CHECK-NEXT: fmov.d $fa2, $fa7
+; CHECK-NEXT: movgr2frh.w $fa2, $a0
+; CHECK-NEXT: lu12i.w $a0, 262400
+; CHECK-NEXT: fmov.d $fa3, $fa7
+; CHECK-NEXT: movgr2frh.w $fa3, $a0
+; CHECK-NEXT: lu12i.w $a0, 262464
+; CHECK-NEXT: fmov.d $fa4, $fa7
+; CHECK-NEXT: movgr2frh.w $fa4, $a0
+; CHECK-NEXT: lu12i.w $a0, 262528
+; CHECK-NEXT: fmov.d $fa5, $fa7
+; CHECK-NEXT: movgr2frh.w $fa5, $a0
+; CHECK-NEXT: lu12i.w $a0, 262592
+; CHECK-NEXT: fmov.d $fa6, $fa7
+; CHECK-NEXT: movgr2frh.w $fa6, $a0
+; CHECK-NEXT: lu12i.w $a0, 262656
+; CHECK-NEXT: movgr2frh.w $fa7, $a0
; CHECK-NEXT: lu12i.w $a1, 262688
; CHECK-NEXT: move $a0, $zero
; CHECK-NEXT: bl callee_double_in_gpr_exhausted_fprs
@@ -125,24 +131,30 @@ define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind {
; CHECK-NEXT: st.w $zero, $sp, 0
; CHECK-NEXT: lu12i.w $a0, 262848
; CHECK-NEXT: st.w $a0, $sp, 12
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI5_1)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI5_2)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
-; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI5_3)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
-; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_4)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
-; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_5)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
-; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_6)
-; CHECK-NEXT: addi.w $a0, $zero, 1
-; CHECK-NEXT: movgr2fr.w $fa0, $a0
-; CHECK-NEXT: ffint.s.w $fa0, $fa0
-; CHECK-NEXT: fcvt.d.s $fa0, $fa0
+; CHECK-NEXT: movgr2fr.w $fa7, $zero
+; CHECK-NEXT: lu12i.w $a0, 261888
+; CHECK-NEXT: fmov.d $fa0, $fa7
+; CHECK-NEXT: movgr2frh.w $fa0, $a0
+; CHECK-NEXT: lu12i.w $a0, 262144
+; CHECK-NEXT: fmov.d $fa1, $fa7
+; CHECK-NEXT: movgr2frh.w $fa1, $a0
+; CHECK-NEXT: lu12i.w $a0, 262272
+; CHECK-NEXT: fmov.d $fa2, $fa7
+; CHECK-NEXT: movgr2frh.w $fa2, $a0
+; CHECK-NEXT: lu12i.w $a0, 262400
+; CHECK-NEXT: fmov.d $fa3, $fa7
+; CHECK-NEXT: movgr2frh.w $fa3, $a0
+; CHECK-NEXT: lu12i.w $a0, 262464
+; CHECK-NEXT: fmov.d $fa4, $fa7
+; CHECK-NEXT: movgr2frh.w $fa4, $a0
+; CHECK-NEXT: lu12i.w $a0, 262528
+; CHECK-NEXT: fmov.d $fa5, $fa7
+; CHECK-NEXT: movgr2frh.w $fa5, $a0
+; CHECK-NEXT: lu12i.w $a0, 262592
+; CHECK-NEXT: fmov.d $fa6, $fa7
+; CHECK-NEXT: movgr2frh.w $fa6, $a0
+; CHECK-NEXT: lu12i.w $a0, 262656
+; CHECK-NEXT: movgr2frh.w $fa7, $a0
; CHECK-NEXT: lu12i.w $a1, 262688
; CHECK-NEXT: lu12i.w $a3, 262720
; CHECK-NEXT: lu12i.w $a5, 262752
@@ -168,10 +180,9 @@ define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind {
define double @callee_double_ret() nounwind {
; CHECK-LABEL: callee_double_ret:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.w $a0, $zero, 1
-; CHECK-NEXT: movgr2fr.w $fa0, $a0
-; CHECK-NEXT: ffint.s.w $fa0, $fa0
-; CHECK-NEXT: fcvt.d.s $fa0, $fa0
+; CHECK-NEXT: movgr2fr.w $fa0, $zero
+; CHECK-NEXT: lu12i.w $a0, 261888
+; CHECK-NEXT: movgr2frh.w $fa0, $a0
; CHECK-NEXT: ret
ret double 1.0
}
diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll
index fe403ec532d8e..3da4e159dbffc 100644
--- a/llvm/test/CodeGen/LoongArch/double-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/double-imm.ll
@@ -35,14 +35,21 @@ define double @f64_negative_zero() nounwind {
define double @f64_constant_pi() nounwind {
; LA32-LABEL: f64_constant_pi:
; LA32: # %bb.0:
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0)
+; LA32-NEXT: lu12i.w $a0, 262290
+; LA32-NEXT: ori $a0, $a0, 507
+; LA32-NEXT: lu12i.w $a1, 345154
+; LA32-NEXT: ori $a1, $a1, 3352
+; LA32-NEXT: movgr2fr.w $fa0, $a1
+; LA32-NEXT: movgr2frh.w $fa0, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: f64_constant_pi:
; LA64: # %bb.0:
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA64-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0)
+; LA64-NEXT: lu12i.w $a0, 345154
+; LA64-NEXT: ori $a0, $a0, 3352
+; LA64-NEXT: lu32i.d $a0, -450053
+; LA64-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-NEXT: movgr2fr.d $fa0, $a0
; LA64-NEXT: ret
ret double 3.1415926535897931159979634685441851615905761718750
}
@@ -50,10 +57,9 @@ define double @f64_constant_pi() nounwind {
define double @f64_add_fimm1(double %a) nounwind {
; LA32-LABEL: f64_add_fimm1:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a0, $zero, 1
-; LA32-NEXT: movgr2fr.w $fa1, $a0
-; LA32-NEXT: ffint.s.w $fa1, $fa1
-; LA32-NEXT: fcvt.d.s $fa1, $fa1
+; LA32-NEXT: movgr2fr.w $fa1, $zero
+; LA32-NEXT: lu12i.w $a0, 261888
+; LA32-NEXT: movgr2frh.w $fa1, $a0
; LA32-NEXT: fadd.d $fa0, $fa0, $fa1
; LA32-NEXT: ret
;
@@ -69,10 +75,9 @@ define double @f64_add_fimm1(double %a) nounwind {
define double @f64_positive_fimm1() nounwind {
; LA32-LABEL: f64_positive_fimm1:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a0, $zero, 1
-; LA32-NEXT: movgr2fr.w $fa0, $a0
-; LA32-NEXT: ffint.s.w $fa0, $fa0
-; LA32-NEXT: fcvt.d.s $fa0, $fa0
+; LA32-NEXT: movgr2fr.w $fa0, $zero
+; LA32-NEXT: lu12i.w $a0, 261888
+; LA32-NEXT: movgr2frh.w $fa0, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: f64_positive_fimm1:
diff --git a/llvm/test/CodeGen/LoongArch/float-imm.ll b/llvm/test/CodeGen/LoongArch/float-imm.ll
index 006a9e64b190d..3e52cc8d7d168 100644
--- a/llvm/test/CodeGen/LoongArch/float-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/float-imm.ll
@@ -33,14 +33,16 @@ define float @f32_negative_zero() nounwind {
define float @f32_constant_pi() nounwind {
; LA32-LABEL: f32_constant_pi:
; LA32: # %bb.0:
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI2_0)
+; LA32-NEXT: lu12i.w $a0, 263312
+; LA32-NEXT: ori $a0, $a0, 4059
+; LA32-NEXT: movgr2fr.w $fa0, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: f32_constant_pi:
; LA64: # %bb.0:
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA64-NEXT: fld.s $fa0, $a0, %pc_lo12(.LCPI2_0)
+; LA64-NEXT: lu12i.w $a0, 263312
+; LA64-NEXT: ori $a0, $a0, 4059
+; LA64-NEXT: movgr2fr.w $fa0, $a0
; LA64-NEXT: ret
ret float 3.14159274101257324218750
}
@@ -48,17 +50,15 @@ define float @f32_constant_pi() nounwind {
define float @f32_add_fimm1(float %a) nounwind {
; LA32-LABEL: f32_add_fimm1:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a0, $zero, 1
+; LA32-NEXT: lu12i.w $a0, 260096
; LA32-NEXT: movgr2fr.w $fa1, $a0
-; LA32-NEXT: ffint.s.w $fa1, $fa1
; LA32-NEXT: fadd.s $fa0, $fa0, $fa1
; LA32-NEXT: ret
;
; LA64-LABEL: f32_add_fimm1:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a0, $zero, 1
+; LA64-NEXT: lu12i.w $a0, 260096
; LA64-NEXT: movgr2fr.w $fa1, $a0
-; LA64-NEXT: ffint.s.w $fa1, $fa1
; LA64-NEXT: fadd.s $fa0, $fa0, $fa1
; LA64-NEXT: ret
%1 = fadd float %a, 1.0
@@ -68,16 +68,14 @@ define float @f32_add_fimm1(float %a) nounwind {
define float @f32_positive_fimm1() nounwind {
; LA32-LABEL: f32_positive_fimm1:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $a0, $zero, 1
+; LA32-NEXT: lu12i.w $a0, 260096
; LA32-NEXT: movgr2fr.w $fa0, $a0
-; LA32-NEXT: ffint.s.w $fa0, $fa0
; LA32-NEXT: ret
;
; LA64-LABEL: f32_positive_fimm1:
; LA64: # %bb.0:
-; LA64-NEXT: addi.w $a0, $zero, 1
+; LA64-NEXT: lu12i.w $a0, 260096
; LA64-NEXT: movgr2fr.w $fa0, $a0
-; LA64-NEXT: ffint.s.w $fa0, $fa0
; LA64-NEXT: ret
ret float 1.0
}
diff --git a/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll b/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll
index 11e246eafe4ff..29efb1ff909f0 100644
--- a/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll
+++ b/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll
@@ -8,17 +8,23 @@
define float @f32_reciprocal(float %a) nounwind {
; LA32F-LABEL: f32_reciprocal:
; LA32F: # %bb.0:
-; LA32F-NEXT: frecip.s $fa0, $fa0
+; LA32F-NEXT: lu12i.w $a0, 260096
+; LA32F-NEXT: movgr2fr.w $fa1, $a0
+; LA32F-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA32F-NEXT: ret
;
; LA32D-LABEL: f32_reciprocal:
; LA32D: # %bb.0:
-; LA32D-NEXT: frecip.s $fa0, $fa0
+; LA32D-NEXT: lu12i.w $a0, 260096
+; LA32D-NEXT: movgr2fr.w $fa1, $a0
+; LA32D-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: f32_reciprocal:
; LA64F: # %bb.0:
-; LA64F-NEXT: frecip.s $fa0, $fa0
+; LA64F-NEXT: lu12i.w $a0, 260096
+; LA64F-NEXT: movgr2fr.w $fa1, $a0
+; LA64F-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA64F-NEXT: ret
;
; LA64D-LABEL: f32_reciprocal:
@@ -45,7 +51,10 @@ define double @f64_reciprocal(double %a) nounwind {
;
; LA32D-LABEL: f64_reciprocal:
; LA32D: # %bb.0:
-; LA32D-NEXT: frecip.d $fa0, $fa0
+; LA32D-NEXT: movgr2fr.w $fa1, $zero
+; LA32D-NEXT: lu12i.w $a0, 261888
+; LA32D-NEXT: movgr2frh.w $fa1, $a0
+; LA32D-NEXT: fdiv.d $fa0, $fa1, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: f64_reciprocal:
diff --git a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
index e5c848e0f1542..91d1efe9e3f98 100644
--- a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
@@ -11,21 +11,24 @@ declare double @llvm.sqrt.f64(double)
define float @frsqrt_f32(float %a) nounwind {
; LA32F-LABEL: frsqrt_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: frsqrt.s $fa0, $fa0
+; LA32F-NEXT: fsqrt.s $fa0, $fa0
+; LA32F-NEXT: lu12i.w $a0, 260096
+; LA32F-NEXT: movgr2fr.w $fa1, $a0
+; LA32F-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: frsqrt_f32:
; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; LA32F-FRECIPE-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI0_0)
-; LA32F-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
-; LA32F-FRECIPE-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI0_1)
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
-; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0
+; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: lu12i.w $a0, -261120
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a0
+; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: lu12i.w $a0, -266240
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a0
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: frsqrt_f32:
@@ -53,30 +56,30 @@ define float @frsqrt_f32(float %a) nounwind {
define double @frsqrt_f64(double %a) nounwind {
; LA32F-LABEL: frsqrt_f64:
; LA32F: # %bb.0:
-; LA32F-NEXT: addi.w $sp, $sp, -16
-; LA32F-NEXT: st.w $ra, $sp, 12
-; LA32F-NEXT: bl sqrt
-; LA32F-NEXT: move $a2, $a0
-; LA32F-NEXT: move $a3, $a1
-; LA32F-NEXT: lu12i.w $a1, 261888
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: ld.w $ra, $sp, 12
-; LA32F-NEXT: addi.w $sp, $sp, 16
+; LA32F-NEXT: addi.w $sp, $sp, -16
+; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT: bl sqrt
+; LA32F-NEXT: move $a2, $a0
+; LA32F-NEXT: move $a3, $a1
+; LA32F-NEXT: lu12i.w $a1, 261888
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT: addi.w $sp, $sp, 16
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: frsqrt_f64:
; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
-; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: bl sqrt
-; LA32F-FRECIPE-NEXT: move $a2, $a0
-; LA32F-FRECIPE-NEXT: move $a3, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
+; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: bl sqrt
+; LA32F-FRECIPE-NEXT: move $a2, $a0
+; LA32F-FRECIPE-NEXT: move $a3, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: frsqrt_f64:
@@ -105,102 +108,104 @@ define double @frsqrt_f64(double %a) nounwind {
}
define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2) nounwind {
-; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f64:
-; LA32F: # %bb.0:
-; LA32F-NEXT: addi.w $sp, $sp, -32
-; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
-; LA32F-NEXT: move $fp, $a3
-; LA32F-NEXT: move $s0, $a2
-; LA32F-NEXT: bl sqrt
-; LA32F-NEXT: move $s1, $a0
-; LA32F-NEXT: move $s2, $a1
-; LA32F-NEXT: lu12i.w $a1, 261888
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s1
-; LA32F-NEXT: move $a3, $s2
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: move $s3, $a0
-; LA32F-NEXT: move $s4, $a1
-; LA32F-NEXT: lu12i.w $a1, 263248
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s1
-; LA32F-NEXT: move $a3, $s2
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: st.w $s3, $s0, 0
-; LA32F-NEXT: st.w $s4, $s0, 4
-; LA32F-NEXT: st.w $a0, $fp, 0
-; LA32F-NEXT: st.w $a1, $fp, 4
-; LA32F-NEXT: move $a0, $s1
-; LA32F-NEXT: move $a1, $s2
-; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32F-NEXT: addi.w $sp, $sp, 32
+; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f64:
+; LA32F: # %bb.0:
+; LA32F-NEXT: addi.w $sp, $sp, -32
+; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
+; LA32F-NEXT: move $fp, $a3
+; LA32F-NEXT: move $s0, $a2
+; LA32F-NEXT: bl sqrt
+; LA32F-NEXT: move $s1, $a0
+; LA32F-NEXT: move $s2, $a1
+; LA32F-NEXT: lu12i.w $a1, 261888
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s1
+; LA32F-NEXT: move $a3, $s2
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: move $s3, $a0
+; LA32F-NEXT: move $s4, $a1
+; LA32F-NEXT: lu12i.w $a1, 263248
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s1
+; LA32F-NEXT: move $a3, $s2
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: st.w $s3, $s0, 0
+; LA32F-NEXT: st.w $s4, $s0, 4
+; LA32F-NEXT: st.w $a0, $fp, 0
+; LA32F-NEXT: st.w $a1, $fp, 4
+; LA32F-NEXT: move $a0, $s1
+; LA32F-NEXT: move $a1, $s2
+; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT: addi.w $sp, $sp, 32
; LA32F-NEXT: ret
;
-; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64:
-; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32
-; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: move $fp, $a3
-; LA32F-FRECIPE-NEXT: move $s0, $a2
-; LA32F-FRECIPE-NEXT: bl sqrt
-; LA32F-FRECIPE-NEXT: move $s1, $a0
-; LA32F-FRECIPE-NEXT: move $s2, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s1
-; LA32F-FRECIPE-NEXT: move $a3, $s2
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: move $s3, $a0
-; LA32F-FRECIPE-NEXT: move $s4, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s1
-; LA32F-FRECIPE-NEXT: move $a3, $s2
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0
-; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4
-; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
-; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
-; LA32F-FRECIPE-NEXT: move $a0, $s1
-; LA32F-FRECIPE-NEXT: move $a1, $s2
-; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32
+; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64:
+; LA32F-FRECIPE: # %bb.0:
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32
+; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: move $fp, $a3
+; LA32F-FRECIPE-NEXT: move $s0, $a2
+; LA32F-FRECIPE-NEXT: bl sqrt
+; LA32F-FRECIPE-NEXT: move $s1, $a0
+; LA32F-FRECIPE-NEXT: move $s2, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s1
+; LA32F-FRECIPE-NEXT: move $a3, $s2
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: move $s3, $a0
+; LA32F-FRECIPE-NEXT: move $s4, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s1
+; LA32F-FRECIPE-NEXT: move $a3, $s2
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0
+; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4
+; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
+; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
+; LA32F-FRECIPE-NEXT: move $a0, $s1
+; LA32F-FRECIPE-NEXT: move $a1, $s2
+; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f64:
; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
-; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI2_0)
-; LA64D-NEXT: fsqrt.d $fa1, $fa0
-; LA64D-NEXT: frsqrt.d $fa0, $fa0
-; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
-; LA64D-NEXT: fst.d $fa0, $a0, 0
-; LA64D-NEXT: fst.d $fa2, $a1, 0
-; LA64D-NEXT: fmov.d $fa0, $fa1
+; LA64D-NEXT: fsqrt.d $fa1, $fa0
+; LA64D-NEXT: frsqrt.d $fa0, $fa0
+; LA64D-NEXT: ori $a2, $zero, 0
+; LA64D-NEXT: lu32i.d $a2, 327680
+; LA64D-NEXT: lu52i.d $a2, $a2, 1028
+; LA64D-NEXT: movgr2fr.d $fa2, $a2
+; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
+; LA64D-NEXT: fst.d $fa0, $a0, 0
+; LA64D-NEXT: fst.d $fa2, $a1, 0
+; LA64D-NEXT: fmov.d $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64:
@@ -214,12 +219,14 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
-; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
-; LA64D-FRECIPE-NEXT: fld.d $fa5, $a2, %pc_lo12(.LCPI2_0)
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa5
+; LA64D-FRECIPE-NEXT: ori $a2, $zero, 0
+; LA64D-FRECIPE-NEXT: lu32i.d $a2, 327680
+; LA64D-FRECIPE-NEXT: lu52i.d $a2, $a2, 1028
+; LA64D-FRECIPE-NEXT: movgr2fr.d $fa2, $a2
+; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0
; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0
@@ -235,103 +242,107 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2
define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, ptr %p2) nounwind {
-; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
-; LA32F: # %bb.0:
-; LA32F-NEXT: addi.w $sp, $sp, -32
-; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
-; LA32F-NEXT: move $fp, $a3
-; LA32F-NEXT: move $s0, $a2
-; LA32F-NEXT: bl sqrt
-; LA32F-NEXT: move $s1, $a0
-; LA32F-NEXT: move $s2, $a1
-; LA32F-NEXT: lu12i.w $a1, 263248
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s1
-; LA32F-NEXT: move $a3, $s2
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: move $s3, $a0
-; LA32F-NEXT: move $s4, $a1
-; LA32F-NEXT: lu12i.w $a1, 263256
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s1
-; LA32F-NEXT: move $a3, $s2
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: st.w $s3, $s0, 0
-; LA32F-NEXT: st.w $s4, $s0, 4
-; LA32F-NEXT: st.w $a0, $fp, 0
-; LA32F-NEXT: st.w $a1, $fp, 4
-; LA32F-NEXT: move $a0, $s1
-; LA32F-NEXT: move $a1, $s2
-; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32F-NEXT: addi.w $sp, $sp, 32
+; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
+; LA32F: # %bb.0:
+; LA32F-NEXT: addi.w $sp, $sp, -32
+; LA32F-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
+; LA32F-NEXT: move $fp, $a3
+; LA32F-NEXT: move $s0, $a2
+; LA32F-NEXT: bl sqrt
+; LA32F-NEXT: move $s1, $a0
+; LA32F-NEXT: move $s2, $a1
+; LA32F-NEXT: lu12i.w $a1, 263248
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s1
+; LA32F-NEXT: move $a3, $s2
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: move $s3, $a0
+; LA32F-NEXT: move $s4, $a1
+; LA32F-NEXT: lu12i.w $a1, 263256
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s1
+; LA32F-NEXT: move $a3, $s2
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: st.w $s3, $s0, 0
+; LA32F-NEXT: st.w $s4, $s0, 4
+; LA32F-NEXT: st.w $a0, $fp, 0
+; LA32F-NEXT: st.w $a1, $fp, 4
+; LA32F-NEXT: move $a0, $s1
+; LA32F-NEXT: move $a1, $s2
+; LA32F-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT: addi.w $sp, $sp, 32
; LA32F-NEXT: ret
;
-; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
-; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32
-; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: move $fp, $a3
-; LA32F-FRECIPE-NEXT: move $s0, $a2
-; LA32F-FRECIPE-NEXT: bl sqrt
-; LA32F-FRECIPE-NEXT: move $s1, $a0
-; LA32F-FRECIPE-NEXT: move $s2, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s1
-; LA32F-FRECIPE-NEXT: move $a3, $s2
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: move $s3, $a0
-; LA32F-FRECIPE-NEXT: move $s4, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s1
-; LA32F-FRECIPE-NEXT: move $a3, $s2
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0
-; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4
-; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
-; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
-; LA32F-FRECIPE-NEXT: move $a0, $s1
-; LA32F-FRECIPE-NEXT: move $a1, $s2
-; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32
+; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
+; LA32F-FRECIPE: # %bb.0:
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -32
+; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: move $fp, $a3
+; LA32F-FRECIPE-NEXT: move $s0, $a2
+; LA32F-FRECIPE-NEXT: bl sqrt
+; LA32F-FRECIPE-NEXT: move $s1, $a0
+; LA32F-FRECIPE-NEXT: move $s2, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s1
+; LA32F-FRECIPE-NEXT: move $a3, $s2
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: move $s3, $a0
+; LA32F-FRECIPE-NEXT: move $s4, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s1
+; LA32F-FRECIPE-NEXT: move $a3, $s2
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: st.w $s3, $s0, 0
+; LA32F-FRECIPE-NEXT: st.w $s4, $s0, 4
+; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
+; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
+; LA32F-FRECIPE-NEXT: move $a0, $s1
+; LA32F-FRECIPE-NEXT: move $a1, $s2
+; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 32
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
-; LA64D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI3_0)
-; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
-; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI3_1)
-; LA64D-NEXT: fsqrt.d $fa0, $fa0
-; LA64D-NEXT: fdiv.d $fa1, $fa1, $fa0
-; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa0
-; LA64D-NEXT: fst.d $fa1, $a0, 0
-; LA64D-NEXT: fst.d $fa2, $a1, 0
+; LA64D-NEXT: fsqrt.d $fa0, $fa0
+; LA64D-NEXT: ori $a2, $zero, 0
+; LA64D-NEXT: ori $a3, $zero, 0
+; LA64D-NEXT: lu32i.d $a3, 327680
+; LA64D-NEXT: lu52i.d $a3, $a3, 1028
+; LA64D-NEXT: movgr2fr.d $fa1, $a3
+; LA64D-NEXT: fdiv.d $fa1, $fa1, $fa0
+; LA64D-NEXT: lu32i.d $a2, 360448
+; LA64D-NEXT: lu52i.d $a2, $a2, 1028
+; LA64D-NEXT: movgr2fr.d $fa2, $a2
+; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa0
+; LA64D-NEXT: fst.d $fa1, $a0, 0
+; LA64D-NEXT: fst.d $fa2, $a1, 0
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
@@ -347,14 +358,18 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
-; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
-; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI3_0)
-; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
-; LA64D-FRECIPE-NEXT: fld.d $fa4, $a2, %pc_lo12(.LCPI3_1)
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
-; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3
-; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
+; LA64D-FRECIPE-NEXT: ori $a2, $zero, 0
+; LA64D-FRECIPE-NEXT: ori $a3, $zero, 0
+; LA64D-FRECIPE-NEXT: lu32i.d $a3, 327680
+; LA64D-FRECIPE-NEXT: lu52i.d $a3, $a3, 1028
+; LA64D-FRECIPE-NEXT: movgr2fr.d $fa2, $a3
+; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa2
+; LA64D-FRECIPE-NEXT: lu32i.d $a2, 360448
+; LA64D-FRECIPE-NEXT: lu52i.d $a2, $a2, 1028
+; LA64D-FRECIPE-NEXT: movgr2fr.d $fa3, $a2
+; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fst.d $fa2, $a0, 0
; LA64D-FRECIPE-NEXT: fst.d $fa1, $a1, 0
; LA64D-FRECIPE-NEXT: ret
@@ -368,138 +383,142 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p
}
define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2, ptr %p3) nounwind {
-; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f64:
-; LA32F: # %bb.0:
-; LA32F-NEXT: addi.w $sp, $sp, -48
-; LA32F-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT: move $fp, $a4
-; LA32F-NEXT: move $s0, $a3
-; LA32F-NEXT: move $s1, $a2
-; LA32F-NEXT: bl sqrt
-; LA32F-NEXT: move $s2, $a0
-; LA32F-NEXT: move $s3, $a1
-; LA32F-NEXT: lu12i.w $a1, 261888
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s2
-; LA32F-NEXT: move $a3, $s3
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: move $s4, $a0
-; LA32F-NEXT: move $s5, $a1
-; LA32F-NEXT: lu12i.w $a1, 263248
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s2
-; LA32F-NEXT: move $a3, $s3
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: move $s6, $a0
-; LA32F-NEXT: move $s7, $a1
-; LA32F-NEXT: lu12i.w $a1, 263256
-; LA32F-NEXT: move $a0, $zero
-; LA32F-NEXT: move $a2, $s2
-; LA32F-NEXT: move $a3, $s3
-; LA32F-NEXT: bl __divdf3
-; LA32F-NEXT: st.w $s4, $s1, 0
-; LA32F-NEXT: st.w $s5, $s1, 4
-; LA32F-NEXT: st.w $s6, $s0, 0
-; LA32F-NEXT: st.w $s7, $s0, 4
-; LA32F-NEXT: st.w $a0, $fp, 0
-; LA32F-NEXT: st.w $a1, $fp, 4
-; LA32F-NEXT: move $a0, $s2
-; LA32F-NEXT: move $a1, $s3
-; LA32F-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32F-NEXT: addi.w $sp, $sp, 48
+; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f64:
+; LA32F: # %bb.0:
+; LA32F-NEXT: addi.w $sp, $sp, -48
+; LA32F-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill
+; LA32F-NEXT: move $fp, $a4
+; LA32F-NEXT: move $s0, $a3
+; LA32F-NEXT: move $s1, $a2
+; LA32F-NEXT: bl sqrt
+; LA32F-NEXT: move $s2, $a0
+; LA32F-NEXT: move $s3, $a1
+; LA32F-NEXT: lu12i.w $a1, 261888
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s2
+; LA32F-NEXT: move $a3, $s3
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: move $s4, $a0
+; LA32F-NEXT: move $s5, $a1
+; LA32F-NEXT: lu12i.w $a1, 263248
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s2
+; LA32F-NEXT: move $a3, $s3
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: move $s6, $a0
+; LA32F-NEXT: move $s7, $a1
+; LA32F-NEXT: lu12i.w $a1, 263256
+; LA32F-NEXT: move $a0, $zero
+; LA32F-NEXT: move $a2, $s2
+; LA32F-NEXT: move $a3, $s3
+; LA32F-NEXT: bl __divdf3
+; LA32F-NEXT: st.w $s4, $s1, 0
+; LA32F-NEXT: st.w $s5, $s1, 4
+; LA32F-NEXT: st.w $s6, $s0, 0
+; LA32F-NEXT: st.w $s7, $s0, 4
+; LA32F-NEXT: st.w $a0, $fp, 0
+; LA32F-NEXT: st.w $a1, $fp, 4
+; LA32F-NEXT: move $a0, $s2
+; LA32F-NEXT: move $a1, $s3
+; LA32F-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32F-NEXT: addi.w $sp, $sp, 48
; LA32F-NEXT: ret
;
-; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64:
-; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -48
-; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill
-; LA32F-FRECIPE-NEXT: move $fp, $a4
-; LA32F-FRECIPE-NEXT: move $s0, $a3
-; LA32F-FRECIPE-NEXT: move $s1, $a2
-; LA32F-FRECIPE-NEXT: bl sqrt
-; LA32F-FRECIPE-NEXT: move $s2, $a0
-; LA32F-FRECIPE-NEXT: move $s3, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s2
-; LA32F-FRECIPE-NEXT: move $a3, $s3
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: move $s4, $a0
-; LA32F-FRECIPE-NEXT: move $s5, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s2
-; LA32F-FRECIPE-NEXT: move $a3, $s3
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: move $s6, $a0
-; LA32F-FRECIPE-NEXT: move $s7, $a1
-; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256
-; LA32F-FRECIPE-NEXT: move $a0, $zero
-; LA32F-FRECIPE-NEXT: move $a2, $s2
-; LA32F-FRECIPE-NEXT: move $a3, $s3
-; LA32F-FRECIPE-NEXT: bl __divdf3
-; LA32F-FRECIPE-NEXT: st.w $s4, $s1, 0
-; LA32F-FRECIPE-NEXT: st.w $s5, $s1, 4
-; LA32F-FRECIPE-NEXT: st.w $s6, $s0, 0
-; LA32F-FRECIPE-NEXT: st.w $s7, $s0, 4
-; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
-; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
-; LA32F-FRECIPE-NEXT: move $a0, $s2
-; LA32F-FRECIPE-NEXT: move $a1, $s3
-; LA32F-FRECIPE-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
-; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 48
+; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64:
+; LA32F-FRECIPE: # %bb.0:
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -48
+; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: st.w $s7, $sp, 8 # 4-byte Folded Spill
+; LA32F-FRECIPE-NEXT: move $fp, $a4
+; LA32F-FRECIPE-NEXT: move $s0, $a3
+; LA32F-FRECIPE-NEXT: move $s1, $a2
+; LA32F-FRECIPE-NEXT: bl sqrt
+; LA32F-FRECIPE-NEXT: move $s2, $a0
+; LA32F-FRECIPE-NEXT: move $s3, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 261888
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s2
+; LA32F-FRECIPE-NEXT: move $a3, $s3
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: move $s4, $a0
+; LA32F-FRECIPE-NEXT: move $s5, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263248
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s2
+; LA32F-FRECIPE-NEXT: move $a3, $s3
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: move $s6, $a0
+; LA32F-FRECIPE-NEXT: move $s7, $a1
+; LA32F-FRECIPE-NEXT: lu12i.w $a1, 263256
+; LA32F-FRECIPE-NEXT: move $a0, $zero
+; LA32F-FRECIPE-NEXT: move $a2, $s2
+; LA32F-FRECIPE-NEXT: move $a3, $s3
+; LA32F-FRECIPE-NEXT: bl __divdf3
+; LA32F-FRECIPE-NEXT: st.w $s4, $s1, 0
+; LA32F-FRECIPE-NEXT: st.w $s5, $s1, 4
+; LA32F-FRECIPE-NEXT: st.w $s6, $s0, 0
+; LA32F-FRECIPE-NEXT: st.w $s7, $s0, 4
+; LA32F-FRECIPE-NEXT: st.w $a0, $fp, 0
+; LA32F-FRECIPE-NEXT: st.w $a1, $fp, 4
+; LA32F-FRECIPE-NEXT: move $a0, $s2
+; LA32F-FRECIPE-NEXT: move $a1, $s3
+; LA32F-FRECIPE-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 48
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f64:
; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
-; LA64D-NEXT: fld.d $fa2, $a3, %pc_lo12(.LCPI4_0)
-; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1)
-; LA64D-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_1)
-; LA64D-NEXT: fsqrt.d $fa1, $fa0
-; LA64D-NEXT: frsqrt.d $fa0, $fa0
-; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
-; LA64D-NEXT: fdiv.d $fa3, $fa3, $fa1
-; LA64D-NEXT: fst.d $fa0, $a0, 0
-; LA64D-NEXT: fst.d $fa2, $a1, 0
-; LA64D-NEXT: fst.d $fa3, $a2, 0
-; LA64D-NEXT: fmov.d $fa0, $fa1
+; LA64D-NEXT: fsqrt.d $fa1, $fa0
+; LA64D-NEXT: frsqrt.d $fa0, $fa0
+; LA64D-NEXT: ori $a3, $zero, 0
+; LA64D-NEXT: ori $a4, $zero, 0
+; LA64D-NEXT: lu32i.d $a4, 327680
+; LA64D-NEXT: lu52i.d $a4, $a4, 1028
+; LA64D-NEXT: movgr2fr.d $fa2, $a4
+; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
+; LA64D-NEXT: lu32i.d $a3, 360448
+; LA64D-NEXT: lu52i.d $a3, $a3, 1028
+; LA64D-NEXT: movgr2fr.d $fa3, $a3
+; LA64D-NEXT: fdiv.d $fa3, $fa3, $fa1
+; LA64D-NEXT: fst.d $fa0, $a0, 0
+; LA64D-NEXT: fst.d $fa2, $a1, 0
+; LA64D-NEXT: fst.d $fa3, $a2, 0
+; LA64D-NEXT: fmov.d $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64:
@@ -514,14 +533,18 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
-; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
-; LA64D-FRECIPE-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_0)
-; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1)
-; LA64D-FRECIPE-NEXT: fld.d $fa5, $a3, %pc_lo12(.LCPI4_1)
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3
-; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa5
+; LA64D-FRECIPE-NEXT: ori $a3, $zero, 0
+; LA64D-FRECIPE-NEXT: ori $a4, $zero, 0
+; LA64D-FRECIPE-NEXT: lu32i.d $a4, 327680
+; LA64D-FRECIPE-NEXT: lu52i.d $a4, $a4, 1028
+; LA64D-FRECIPE-NEXT: movgr2fr.d $fa2, $a4
+; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa2
+; LA64D-FRECIPE-NEXT: lu32i.d $a3, 360448
+; LA64D-FRECIPE-NEXT: lu52i.d $a3, $a3, 1028
+; LA64D-FRECIPE-NEXT: movgr2fr.d $fa3, $a3
+; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0
; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0
@@ -541,66 +564,66 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2
define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2) nounwind {
; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
-; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI5_0)
-; LA32F-NEXT: fsqrt.s $fa1, $fa0
-; LA32F-NEXT: frsqrt.s $fa0, $fa0
-; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1
-; LA32F-NEXT: fst.s $fa0, $a0, 0
-; LA32F-NEXT: fst.s $fa2, $a1, 0
-; LA32F-NEXT: fmov.s $fa0, $fa1
+; LA32F-NEXT: fsqrt.s $fa0, $fa0
+; LA32F-NEXT: lu12i.w $a2, 260096
+; LA32F-NEXT: movgr2fr.w $fa1, $a2
+; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
+; LA32F-NEXT: lu12i.w $a2, 270976
+; LA32F-NEXT: movgr2fr.w $fa2, $a2
+; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
+; LA32F-NEXT: fst.s $fa1, $a0, 0
+; LA32F-NEXT: fst.s $fa2, $a1, 0
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
-; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0)
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_1)
-; LA32F-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI5_1)
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_2)
-; LA32F-FRECIPE-NEXT: fld.s $fa5, $a2, %pc_lo12(.LCPI5_2)
-; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
-; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa5
-; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0
-; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0
+; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, -261120
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, -266240
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, 270976
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a2
+; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0
+; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0
; LA32F-FRECIPE-NEXT: ret
;
-; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f32:
-; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
-; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI5_0)
-; LA64D-NEXT: fsqrt.s $fa1, $fa0
-; LA64D-NEXT: frsqrt.s $fa0, $fa0
-; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1
-; LA64D-NEXT: fst.s $fa0, $a0, 0
-; LA64D-NEXT: fst.s $fa2, $a1, 0
-; LA64D-NEXT: fmov.s $fa0, $fa1
+; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f32:
+; LA64D: # %bb.0:
+; LA64D-NEXT: fsqrt.s $fa1, $fa0
+; LA64D-NEXT: frsqrt.s $fa0, $fa0
+; LA64D-NEXT: lu12i.w $a2, 270976
+; LA64D-NEXT: movgr2fr.w $fa2, $a2
+; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1
+; LA64D-NEXT: fst.s $fa0, $a0, 0
+; LA64D-NEXT: fst.s $fa2, $a1, 0
+; LA64D-NEXT: fmov.s $fa0, $fa1
; LA64D-NEXT: ret
;
-; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
-; LA64D-FRECIPE: # %bb.0:
+; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
+; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
-; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0)
-; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0)
-; LA64D-FRECIPE-NEXT: vldi $vr4, -1056
-; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
+; LA64D-FRECIPE-NEXT: vldi $vr3, -1056
+; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3
+; LA64D-FRECIPE-NEXT: lu12i.w $a2, 270976
+; LA64D-FRECIPE-NEXT: movgr2fr.w $fa2, $a2
+; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0
; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0
; LA64D-FRECIPE-NEXT: ret
-;
%sqrt = tail call fast float @llvm.sqrt.f32(float %x)
%rsqrt = fdiv fast float 1.0, %sqrt
%r = fdiv fast float 42.0, %sqrt
@@ -613,82 +636,82 @@ define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2)
define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2, ptr %p3) nounwind {
; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
-; LA32F-NEXT: fld.s $fa2, $a3, %pc_lo12(.LCPI6_0)
-; LA32F-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
-; LA32F-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1)
-; LA32F-NEXT: fsqrt.s $fa1, $fa0
-; LA32F-NEXT: frsqrt.s $fa0, $fa0
-; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1
-; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa1
-; LA32F-NEXT: fst.s $fa0, $a0, 0
-; LA32F-NEXT: fst.s $fa2, $a1, 0
-; LA32F-NEXT: fst.s $fa3, $a2, 0
-; LA32F-NEXT: fmov.s $fa0, $fa1
+; LA32F-NEXT: fsqrt.s $fa0, $fa0
+; LA32F-NEXT: lu12i.w $a3, 260096
+; LA32F-NEXT: movgr2fr.w $fa1, $a3
+; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
+; LA32F-NEXT: lu12i.w $a3, 270976
+; LA32F-NEXT: movgr2fr.w $fa2, $a3
+; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
+; LA32F-NEXT: lu12i.w $a3, 271040
+; LA32F-NEXT: movgr2fr.w $fa3, $a3
+; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa0
+; LA32F-NEXT: fst.s $fa1, $a0, 0
+; LA32F-NEXT: fst.s $fa2, $a1, 0
+; LA32F-NEXT: fst.s $fa3, $a2, 0
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
-; LA32F-FRECIPE-NEXT: fld.s $fa1, $a3, %pc_lo12(.LCPI6_0)
-; LA32F-FRECIPE-NEXT: frsqrte.s $fa2, $fa0
-; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa2
-; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
-; LA32F-FRECIPE-NEXT: fmadd.s $fa1, $fa3, $fa2, $fa1
-; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
-; LA32F-FRECIPE-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1)
-; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_2)
-; LA32F-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_2)
-; LA32F-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_3)
-; LA32F-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_3)
-; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa2, $fa3
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa2, $fa1
-; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4
-; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5
-; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0
-; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0
-; LA32F-FRECIPE-NEXT: fst.s $fa3, $a2, 0
+; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: lu12i.w $a3, -261120
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3
+; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: lu12i.w $a3, -266240
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: lu12i.w $a3, 270976
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a3
+; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: lu12i.w $a3, 271040
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3
+; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: fst.s $fa1, $a0, 0
+; LA32F-FRECIPE-NEXT: fst.s $fa2, $a1, 0
+; LA32F-FRECIPE-NEXT: fst.s $fa3, $a2, 0
; LA32F-FRECIPE-NEXT: ret
;
-; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f32:
-; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
-; LA64D-NEXT: fld.s $fa2, $a3, %pc_lo12(.LCPI6_0)
-; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
-; LA64D-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1)
-; LA64D-NEXT: fsqrt.s $fa1, $fa0
-; LA64D-NEXT: frsqrt.s $fa0, $fa0
-; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1
-; LA64D-NEXT: fdiv.s $fa3, $fa3, $fa1
-; LA64D-NEXT: fst.s $fa0, $a0, 0
-; LA64D-NEXT: fst.s $fa2, $a1, 0
-; LA64D-NEXT: fst.s $fa3, $a2, 0
-; LA64D-NEXT: fmov.s $fa0, $fa1
+; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f32:
+; LA64D: # %bb.0:
+; LA64D-NEXT: fsqrt.s $fa1, $fa0
+; LA64D-NEXT: frsqrt.s $fa0, $fa0
+; LA64D-NEXT: lu12i.w $a3, 270976
+; LA64D-NEXT: movgr2fr.w $fa2, $a3
+; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa1
+; LA64D-NEXT: lu12i.w $a3, 271040
+; LA64D-NEXT: movgr2fr.w $fa3, $a3
+; LA64D-NEXT: fdiv.s $fa3, $fa3, $fa1
+; LA64D-NEXT: fst.s $fa0, $a0, 0
+; LA64D-NEXT: fst.s $fa2, $a1, 0
+; LA64D-NEXT: fst.s $fa3, $a2, 0
+; LA64D-NEXT: fmov.s $fa0, $fa1
; LA64D-NEXT: ret
;
-; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
-; LA64D-FRECIPE: # %bb.0:
+; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
+; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
; LA64D-FRECIPE-NEXT: vldi $vr3, -1056
-; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0)
-; LA64D-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_0)
-; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1)
-; LA64D-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_1)
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4
-; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5
+; LA64D-FRECIPE-NEXT: lu12i.w $a3, 270976
+; LA64D-FRECIPE-NEXT: movgr2fr.w $fa2, $a3
+; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2
+; LA64D-FRECIPE-NEXT: lu12i.w $a3, 271040
+; LA64D-FRECIPE-NEXT: movgr2fr.w $fa3, $a3
+; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0
; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0
; LA64D-FRECIPE-NEXT: fst.s $fa3, $a2, 0
; LA64D-FRECIPE-NEXT: ret
-;
%sqrt = tail call fast float @llvm.sqrt.f32(float %x)
%rsqrt = fdiv fast float 1.0, %sqrt
%r1 = fdiv fast float 42.0, %sqrt
@@ -703,55 +726,55 @@ define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2,
define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr %p2) nounwind {
; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
-; LA32F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI7_0)
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
-; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_1)
-; LA32F-NEXT: fsqrt.s $fa0, $fa0
-; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
-; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
-; LA32F-NEXT: fst.s $fa1, $a0, 0
-; LA32F-NEXT: fst.s $fa2, $a1, 0
+; LA32F-NEXT: fsqrt.s $fa0, $fa0
+; LA32F-NEXT: lu12i.w $a2, 270976
+; LA32F-NEXT: movgr2fr.w $fa1, $a2
+; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
+; LA32F-NEXT: lu12i.w $a2, 271040
+; LA32F-NEXT: movgr2fr.w $fa2, $a2
+; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
+; LA32F-NEXT: fst.s $fa1, $a0, 0
+; LA32F-NEXT: fst.s $fa2, $a1, 0
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
; LA32F-FRECIPE: # %bb.0:
-; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
-; LA32F-FRECIPE-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_0)
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
-; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_1)
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fmul.s $fa4, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa4, $fa1, $fa2
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_2)
-; LA32F-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_2)
-; LA32F-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_3)
-; LA32F-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_3)
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
-; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
-; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
-; LA32F-FRECIPE-NEXT: fst.s $fa2, $a0, 0
-; LA32F-FRECIPE-NEXT: fst.s $fa1, $a1, 0
+; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, -261120
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, -266240
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, 270976
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a2
+; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2
+; LA32F-FRECIPE-NEXT: lu12i.w $a2, 271040
+; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
+; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
+; LA32F-FRECIPE-NEXT: fst.s $fa2, $a0, 0
+; LA32F-FRECIPE-NEXT: fst.s $fa1, $a1, 0
; LA32F-FRECIPE-NEXT: ret
;
-; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
-; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
-; LA64D-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI7_0)
-; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
-; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_1)
-; LA64D-NEXT: fsqrt.s $fa0, $fa0
-; LA64D-NEXT: fdiv.s $fa1, $fa1, $fa0
-; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa0
-; LA64D-NEXT: fst.s $fa1, $a0, 0
-; LA64D-NEXT: fst.s $fa2, $a1, 0
+; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
+; LA64D: # %bb.0:
+; LA64D-NEXT: fsqrt.s $fa0, $fa0
+; LA64D-NEXT: lu12i.w $a2, 270976
+; LA64D-NEXT: movgr2fr.w $fa1, $a2
+; LA64D-NEXT: fdiv.s $fa1, $fa1, $fa0
+; LA64D-NEXT: lu12i.w $a2, 271040
+; LA64D-NEXT: movgr2fr.w $fa2, $a2
+; LA64D-NEXT: fdiv.s $fa2, $fa2, $fa0
+; LA64D-NEXT: fst.s $fa1, $a0, 0
+; LA64D-NEXT: fst.s $fa2, $a1, 0
; LA64D-NEXT: ret
;
-; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
-; LA64D-FRECIPE: # %bb.0:
+; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
+; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
@@ -759,18 +782,17 @@ define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr
; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
; LA64D-FRECIPE-NEXT: vldi $vr3, -1056
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
-; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
-; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_0)
-; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1)
-; LA64D-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_1)
; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
-; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3
-; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4
+; LA64D-FRECIPE-NEXT: lu12i.w $a2, 270976
+; LA64D-FRECIPE-NEXT: movgr2fr.w $fa2, $a2
+; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa2
+; LA64D-FRECIPE-NEXT: lu12i.w $a2, 271040
+; LA64D-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
+; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fst.s $fa2, $a0, 0
; LA64D-FRECIPE-NEXT: fst.s $fa1, $a1, 0
; LA64D-FRECIPE-NEXT: ret
-;
%sqrt = tail call fast float @llvm.sqrt.f32(float %x)
%sqrt_fast = fdiv fast float %x, %sqrt
%r1 = fdiv fast float 42.0, %sqrt
diff --git a/llvm/test/CodeGen/LoongArch/fsqrt.ll b/llvm/test/CodeGen/LoongArch/fsqrt.ll
index e0cb4d39474b1..290b6f3de78d1 100644
--- a/llvm/test/CodeGen/LoongArch/fsqrt.ll
+++ b/llvm/test/CodeGen/LoongArch/fsqrt.ll
@@ -67,17 +67,26 @@ define double @fsqrt_f64(double %a) nounwind {
define float @frsqrt_f32(float %a) nounwind {
; LA32F-LABEL: frsqrt_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: frsqrt.s $fa0, $fa0
+; LA32F-NEXT: fsqrt.s $fa0, $fa0
+; LA32F-NEXT: lu12i.w $a0, 260096
+; LA32F-NEXT: movgr2fr.w $fa1, $a0
+; LA32F-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA32F-NEXT: ret
;
; LA32D-LABEL: frsqrt_f32:
; LA32D: # %bb.0:
-; LA32D-NEXT: frsqrt.s $fa0, $fa0
+; LA32D-NEXT: fsqrt.s $fa0, $fa0
+; LA32D-NEXT: lu12i.w $a0, 260096
+; LA32D-NEXT: movgr2fr.w $fa1, $a0
+; LA32D-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: frsqrt_f32:
; LA64F: # %bb.0:
-; LA64F-NEXT: frsqrt.s $fa0, $fa0
+; LA64F-NEXT: fsqrt.s $fa0, $fa0
+; LA64F-NEXT: lu12i.w $a0, 260096
+; LA64F-NEXT: movgr2fr.w $fa1, $a0
+; LA64F-NEXT: fdiv.s $fa0, $fa1, $fa0
; LA64F-NEXT: ret
;
; LA64D-LABEL: frsqrt_f32:
@@ -106,7 +115,11 @@ define double @frsqrt_f64(double %a) nounwind {
;
; LA32D-LABEL: frsqrt_f64:
; LA32D: # %bb.0:
-; LA32D-NEXT: frsqrt.d $fa0, $fa0
+; LA32D-NEXT: fsqrt.d $fa0, $fa0
+; LA32D-NEXT: movgr2fr.w $fa1, $zero
+; LA32D-NEXT: lu12i.w $a0, 261888
+; LA32D-NEXT: movgr2frh.w $fa1, $a0
+; LA32D-NEXT: fdiv.d $fa0, $fa1, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: frsqrt_f64:
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 4990e7002562d..a6e3f790943aa 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -6,9 +6,8 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64F-LABEL: float_fadd_acquire:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB0_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -76,8 +75,9 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64F-LABEL: float_fsub_acquire:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI1_0)
+; LA64F-NEXT: lu12i.w $a1, -264192
+; LA64F-NEXT: lu32i.d $a1, 0
+; LA64F-NEXT: movgr2fr.w $fa1, $a1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB1_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -145,9 +145,8 @@ define float @float_fmin_acquire(ptr %p) nounwind {
; LA64F-LABEL: float_fmin_acquire:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB2_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -215,9 +214,8 @@ define float @float_fmax_acquire(ptr %p) nounwind {
; LA64F-LABEL: float_fmax_acquire:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB3_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -573,9 +571,8 @@ define float @float_fadd_release(ptr %p) nounwind {
; LA64F-LABEL: float_fadd_release:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB8_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -643,8 +640,9 @@ define float @float_fsub_release(ptr %p) nounwind {
; LA64F-LABEL: float_fsub_release:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0)
-; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI9_0)
+; LA64F-NEXT: lu12i.w $a1, -264192
+; LA64F-NEXT: lu32i.d $a1, 0
+; LA64F-NEXT: movgr2fr.w $fa1, $a1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB9_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -712,9 +710,8 @@ define float @float_fmin_release(ptr %p) nounwind {
; LA64F-LABEL: float_fmin_release:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB10_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -782,9 +779,8 @@ define float @float_fmax_release(ptr %p) nounwind {
; LA64F-LABEL: float_fmax_release:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB11_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1140,9 +1136,8 @@ define float @float_fadd_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: float_fadd_acq_rel:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB16_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1210,8 +1205,9 @@ define float @float_fsub_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: float_fsub_acq_rel:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
-; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI17_0)
+; LA64F-NEXT: lu12i.w $a1, -264192
+; LA64F-NEXT: lu32i.d $a1, 0
+; LA64F-NEXT: movgr2fr.w $fa1, $a1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB17_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1279,9 +1275,8 @@ define float @float_fmin_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: float_fmin_acq_rel:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB18_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1349,9 +1344,8 @@ define float @float_fmax_acq_rel(ptr %p) nounwind {
; LA64F-LABEL: float_fmax_acq_rel:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB19_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1707,9 +1701,8 @@ define float @float_fadd_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: float_fadd_seq_cst:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB24_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1777,8 +1770,9 @@ define float @float_fsub_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: float_fsub_seq_cst:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0)
-; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI25_0)
+; LA64F-NEXT: lu12i.w $a1, -264192
+; LA64F-NEXT: lu32i.d $a1, 0
+; LA64F-NEXT: movgr2fr.w $fa1, $a1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB25_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1846,9 +1840,8 @@ define float @float_fmin_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: float_fmin_seq_cst:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB26_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -1916,9 +1909,8 @@ define float @float_fmax_seq_cst(ptr %p) nounwind {
; LA64F-LABEL: float_fmax_seq_cst:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB27_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -2274,9 +2266,8 @@ define float @float_fadd_monotonic(ptr %p) nounwind {
; LA64F-LABEL: float_fadd_monotonic:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB32_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -2344,8 +2335,9 @@ define float @float_fsub_monotonic(ptr %p) nounwind {
; LA64F-LABEL: float_fsub_monotonic:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0)
-; LA64F-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI33_0)
+; LA64F-NEXT: lu12i.w $a1, -264192
+; LA64F-NEXT: lu32i.d $a1, 0
+; LA64F-NEXT: movgr2fr.w $fa1, $a1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB33_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -2413,9 +2405,8 @@ define float @float_fmin_monotonic(ptr %p) nounwind {
; LA64F-LABEL: float_fmin_monotonic:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB34_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
@@ -2483,9 +2474,8 @@ define float @float_fmax_monotonic(ptr %p) nounwind {
; LA64F-LABEL: float_fmax_monotonic:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 0
-; LA64F-NEXT: addi.w $a1, $zero, 1
+; LA64F-NEXT: lu12i.w $a1, 260096
; LA64F-NEXT: movgr2fr.w $fa1, $a1
-; LA64F-NEXT: ffint.s.w $fa1, $fa1
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB35_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
index 0b82ea220d7fb..ef211139afdf5 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll
@@ -116,8 +116,9 @@ define i32 @convert_double_to_i32(double %a) nounwind {
define i32 @convert_double_to_u32(double %a) nounwind {
; LA32-LABEL: convert_double_to_u32:
; LA32: # %bb.0:
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; LA32-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI7_0)
+; LA32-NEXT: movgr2fr.w $fa1, $zero
+; LA32-NEXT: lu12i.w $a0, 269824
+; LA32-NEXT: movgr2frh.w $fa1, $a0
; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1
; LA32-NEXT: movcf2gr $a0, $fcc0
; LA32-NEXT: bne $a0, $zero, .LBB7_2
@@ -173,8 +174,8 @@ define i64 @convert_double_to_u64(double %a) nounwind {
;
; LA64-LABEL: convert_double_to_u64:
; LA64: # %bb.0:
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; LA64-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI9_0)
+; LA64-NEXT: lu52i.d $a0, $zero, 1086
+; LA64-NEXT: movgr2fr.d $fa1, $a0
; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1
; LA64-NEXT: fsub.d $fa1, $fa0, $fa1
; LA64-NEXT: ftintrz.l.d $fa1, $fa1
@@ -232,8 +233,8 @@ define double @convert_u32_to_double(i32 %a) nounwind {
; LA32-NEXT: st.w $a1, $sp, 12
; LA32-NEXT: st.w $a0, $sp, 8
; LA32-NEXT: fld.d $fa0, $sp, 8
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0)
-; LA32-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI12_0)
+; LA32-NEXT: movgr2fr.w $fa1, $zero
+; LA32-NEXT: movgr2frh.w $fa1, $a1
; LA32-NEXT: fsub.d $fa0, $fa0, $fa1
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
@@ -261,12 +262,13 @@ define double @convert_u64_to_double(i64 %a) nounwind {
; LA64-LABEL: convert_u64_to_double:
; LA64: # %bb.0:
; LA64-NEXT: srli.d $a1, $a0, 32
-; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI13_0)
-; LA64-NEXT: fld.d $fa0, $a2, %pc_lo12(.LCPI13_0)
; LA64-NEXT: lu52i.d $a2, $zero, 1107
; LA64-NEXT: or $a1, $a1, $a2
+; LA64-NEXT: movgr2fr.d $fa0, $a1
+; LA64-NEXT: lu12i.w $a1, 256
+; LA64-NEXT: lu52i.d $a1, $a1, 1107
; LA64-NEXT: movgr2fr.d $fa1, $a1
-; LA64-NEXT: fsub.d $fa0, $fa1, $fa0
+; LA64-NEXT: fsub.d $fa0, $fa0, $fa1
; LA64-NEXT: lu12i.w $a1, 275200
; LA64-NEXT: bstrins.d $a0, $a1, 63, 32
; LA64-NEXT: movgr2fr.d $fa1, $a0
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
index 413702b006b1b..8328bb02cf8b5 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
@@ -181,8 +181,8 @@ define zeroext i16 @convert_float_to_u16(float %a) nounwind {
define i32 @convert_float_to_u32(float %a) nounwind {
; LA32F-LABEL: convert_float_to_u32:
; LA32F: # %bb.0:
-; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA32F-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI6_0)
+; LA32F-NEXT: lu12i.w $a0, 323584
+; LA32F-NEXT: movgr2fr.w $fa1, $a0
; LA32F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1
; LA32F-NEXT: movcf2gr $a0, $fcc0
; LA32F-NEXT: bne $a0, $zero, .LBB6_2
@@ -200,8 +200,8 @@ define i32 @convert_float_to_u32(float %a) nounwind {
;
; LA32D-LABEL: convert_float_to_u32:
; LA32D: # %bb.0:
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA32D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI6_0)
+; LA32D-NEXT: lu12i.w $a0, 323584
+; LA32D-NEXT: movgr2fr.w $fa1, $a0
; LA32D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1
; LA32D-NEXT: movcf2gr $a0, $fcc0
; LA32D-NEXT: bne $a0, $zero, .LBB6_2
@@ -219,8 +219,8 @@ define i32 @convert_float_to_u32(float %a) nounwind {
;
; LA64F-LABEL: convert_float_to_u32:
; LA64F: # %bb.0:
-; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; LA64F-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI6_0)
+; LA64F-NEXT: lu12i.w $a0, 323584
+; LA64F-NEXT: movgr2fr.w $fa1, $a0
; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1
; LA64F-NEXT: fsub.s $fa1, $fa0, $fa1
; LA64F-NEXT: ftintrz.w.s $fa1, $fa1
@@ -265,8 +265,8 @@ define i64 @convert_float_to_u64(float %a) nounwind {
;
; LA64F-LABEL: convert_float_to_u64:
; LA64F: # %bb.0:
-; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; LA64F-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI7_0)
+; LA64F-NEXT: lu12i.w $a0, 389120
+; LA64F-NEXT: movgr2fr.w $fa1, $a0
; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1
; LA64F-NEXT: fsub.s $fa1, $fa0, $fa1
; LA64F-NEXT: ftintrz.w.s $fa1, $fa1
@@ -283,8 +283,8 @@ define i64 @convert_float_to_u64(float %a) nounwind {
;
; LA64D-LABEL: convert_float_to_u64:
; LA64D: # %bb.0:
-; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; LA64D-NEXT: fld.s $fa1, $a0, %pc_lo12(.LCPI7_0)
+; LA64D-NEXT: lu12i.w $a0, 389120
+; LA64D-NEXT: movgr2fr.w $fa1, $a0
; LA64D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1
; LA64D-NEXT: fsub.s $fa1, $fa0, $fa1
; LA64D-NEXT: ftintrz.l.s $fa1, $fa1
@@ -503,8 +503,8 @@ define float @convert_u32_to_float(i32 %a) nounwind {
; LA32D-NEXT: st.w $a1, $sp, 12
; LA32D-NEXT: st.w $a0, $sp, 8
; LA32D-NEXT: fld.d $fa0, $sp, 8
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0)
-; LA32D-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI14_0)
+; LA32D-NEXT: movgr2fr.w $fa1, $zero
+; LA32D-NEXT: movgr2frh.w $fa1, $a1
; LA32D-NEXT: fsub.d $fa0, $fa0, $fa1
; LA32D-NEXT: fcvt.s.d $fa0, $fa0
; LA32D-NEXT: addi.w $sp, $sp, 16
diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll
index 6e9d26ab362d6..d92de139ce672 100644
--- a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll
+++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll
@@ -52,10 +52,9 @@
define float @f(float %a) {
; ILP32D-LABEL: f:
; ILP32D: # %bb.0:
-; ILP32D-NEXT: addi.w $a0, $zero, 1
-; ILP32D-NEXT: movgr2fr.w $fa1, $a0
-; ILP32D-NEXT: ffint.s.w $fa1, $fa1
-; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1
+; ILP32D-NEXT: lu12i.w $a0, 260096
+; ILP32D-NEXT: movgr2fr.w $fa1, $a0
+; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1
; ILP32D-NEXT: ret
;
; LP64D-LABEL: f:
@@ -71,11 +70,10 @@ define float @f(float %a) {
; LP64S-LP64D-NOD-LABEL: f:
; LP64S-LP64D-NOD: # %bb.0:
; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa0, $a0
-; LP64S-LP64D-NOD-NEXT: addi.w $a0, $zero, 1
-; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa1, $a0
-; LP64S-LP64D-NOD-NEXT: ffint.s.w $fa1, $fa1
-; LP64S-LP64D-NOD-NEXT: fadd.s $fa0, $fa0, $fa1
-; LP64S-LP64D-NOD-NEXT: movfr2gr.s $a0, $fa0
+; LP64S-LP64D-NOD-NEXT: lu12i.w $a0, 260096
+; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa1, $a0
+; LP64S-LP64D-NOD-NEXT: fadd.s $fa0, $fa0, $fa1
+; LP64S-LP64D-NOD-NEXT: movfr2gr.s $a0, $fa0
; LP64S-LP64D-NOD-NEXT: ret
;
; LP64D-LP64F-NOF-LABEL: f:
@@ -92,10 +90,9 @@ define float @f(float %a) {
define double @g(double %a) {
; ILP32D-LABEL: g:
; ILP32D: # %bb.0:
-; ILP32D-NEXT: addi.w $a0, $zero, 1
-; ILP32D-NEXT: movgr2fr.w $fa1, $a0
-; ILP32D-NEXT: ffint.s.w $fa1, $fa1
-; ILP32D-NEXT: fcvt.d.s $fa1, $fa1
+; ILP32D-NEXT: movgr2fr.w $fa1, $zero
+; ILP32D-NEXT: lu12i.w $a0, 261888
+; ILP32D-NEXT: movgr2frh.w $fa1, $a0
; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1
; ILP32D-NEXT: ret
;
diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll
index c8a33725267a2..b093f6f6010bc 100644
--- a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll
+++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll
@@ -10,9 +10,8 @@
define float @f(float %a) {
; ILP32D-LABEL: f:
; ILP32D: # %bb.0:
-; ILP32D-NEXT: addi.w $a0, $zero, 1
+; ILP32D-NEXT: lu12i.w $a0, 260096
; ILP32D-NEXT: movgr2fr.w $fa1, $a0
-; ILP32D-NEXT: ffint.s.w $fa1, $fa1
; ILP32D-NEXT: fadd.s $fa0, $fa0, $fa1
; ILP32D-NEXT: ret
;
@@ -28,10 +27,9 @@ define float @f(float %a) {
define double @g(double %a) {
; ILP32D-LABEL: g:
; ILP32D: # %bb.0:
-; ILP32D-NEXT: addi.w $a0, $zero, 1
-; ILP32D-NEXT: movgr2fr.w $fa1, $a0
-; ILP32D-NEXT: ffint.s.w $fa1, $fa1
-; ILP32D-NEXT: fcvt.d.s $fa1, $fa1
+; ILP32D-NEXT: movgr2fr.w $fa1, $zero
+; ILP32D-NEXT: lu12i.w $a0, 261888
+; ILP32D-NEXT: movgr2frh.w $fa1, $a0
; ILP32D-NEXT: fadd.d $fa0, $fa0, $fa1
; ILP32D-NEXT: ret
;
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index 285527fca11cf..a59d4ffcc4e33 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -81,47 +81,44 @@ define void @test_zero(ptr %P, ptr %S) nounwind {
define void @test_f2(ptr %P, ptr %S) nounwind {
; LA32F-LABEL: test_f2:
; LA32F: # %bb.0:
-; LA32F-NEXT: fld.s $fa0, $a0, 4
-; LA32F-NEXT: fld.s $fa1, $a0, 0
-; LA32F-NEXT: addi.w $a0, $zero, 1
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0)
-; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0)
-; LA32F-NEXT: movgr2fr.w $fa3, $a0
-; LA32F-NEXT: ffint.s.w $fa3, $fa3
-; LA32F-NEXT: fadd.s $fa1, $fa1, $fa3
+; LA32F-NEXT: fld.s $fa0, $a0, 0
+; LA32F-NEXT: fld.s $fa1, $a0, 4
+; LA32F-NEXT: lu12i.w $a0, 260096
+; LA32F-NEXT: movgr2fr.w $fa2, $a0
; LA32F-NEXT: fadd.s $fa0, $fa0, $fa2
-; LA32F-NEXT: fst.s $fa0, $a1, 4
-; LA32F-NEXT: fst.s $fa1, $a1, 0
+; LA32F-NEXT: lu12i.w $a0, 262144
+; LA32F-NEXT: movgr2fr.w $fa2, $a0
+; LA32F-NEXT: fadd.s $fa1, $fa1, $fa2
+; LA32F-NEXT: fst.s $fa1, $a1, 4
+; LA32F-NEXT: fst.s $fa0, $a1, 0
; LA32F-NEXT: ret
;
; LA32D-LABEL: test_f2:
; LA32D: # %bb.0:
-; LA32D-NEXT: fld.s $fa0, $a0, 4
-; LA32D-NEXT: fld.s $fa1, $a0, 0
-; LA32D-NEXT: addi.w $a0, $zero, 1
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0)
-; LA32D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0)
-; LA32D-NEXT: movgr2fr.w $fa3, $a0
-; LA32D-NEXT: ffint.s.w $fa3, $fa3
-; LA32D-NEXT: fadd.s $fa1, $fa1, $fa3
+; LA32D-NEXT: fld.s $fa0, $a0, 0
+; LA32D-NEXT: fld.s $fa1, $a0, 4
+; LA32D-NEXT: lu12i.w $a0, 260096
+; LA32D-NEXT: movgr2fr.w $fa2, $a0
; LA32D-NEXT: fadd.s $fa0, $fa0, $fa2
-; LA32D-NEXT: fst.s $fa0, $a1, 4
-; LA32D-NEXT: fst.s $fa1, $a1, 0
+; LA32D-NEXT: lu12i.w $a0, 262144
+; LA32D-NEXT: movgr2fr.w $fa2, $a0
+; LA32D-NEXT: fadd.s $fa1, $fa1, $fa2
+; LA32D-NEXT: fst.s $fa1, $a1, 4
+; LA32D-NEXT: fst.s $fa0, $a1, 0
; LA32D-NEXT: ret
;
; LA64F-LABEL: test_f2:
; LA64F: # %bb.0:
-; LA64F-NEXT: fld.s $fa0, $a0, 4
-; LA64F-NEXT: fld.s $fa1, $a0, 0
-; LA64F-NEXT: addi.w $a0, $zero, 1
-; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0)
-; LA64F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0)
-; LA64F-NEXT: movgr2fr.w $fa3, $a0
-; LA64F-NEXT: ffint.s.w $fa3, $fa3
-; LA64F-NEXT: fadd.s $fa1, $fa1, $fa3
+; LA64F-NEXT: fld.s $fa0, $a0, 0
+; LA64F-NEXT: fld.s $fa1, $a0, 4
+; LA64F-NEXT: lu12i.w $a0, 260096
+; LA64F-NEXT: movgr2fr.w $fa2, $a0
; LA64F-NEXT: fadd.s $fa0, $fa0, $fa2
-; LA64F-NEXT: fst.s $fa0, $a1, 4
-; LA64F-NEXT: fst.s $fa1, $a1, 0
+; LA64F-NEXT: lu12i.w $a0, 262144
+; LA64F-NEXT: movgr2fr.w $fa2, $a0
+; LA64F-NEXT: fadd.s $fa1, $fa1, $fa2
+; LA64F-NEXT: fst.s $fa1, $a1, 4
+; LA64F-NEXT: fst.s $fa0, $a1, 0
; LA64F-NEXT: ret
;
; LA64D-LABEL: test_f2:
@@ -145,75 +142,72 @@ define void @test_f4(ptr %P, ptr %S) nounwind {
; LA32F: # %bb.0:
; LA32F-NEXT: fld.s $fa0, $a0, 12
; LA32F-NEXT: fld.s $fa1, $a0, 8
-; LA32F-NEXT: fld.s $fa2, $a0, 4
-; LA32F-NEXT: fld.s $fa3, $a0, 0
-; LA32F-NEXT: addi.w $a0, $zero, 1
+; LA32F-NEXT: fld.s $fa2, $a0, 0
+; LA32F-NEXT: fld.s $fa3, $a0, 4
+; LA32F-NEXT: lu12i.w $a0, 260096
+; LA32F-NEXT: movgr2fr.w $fa4, $a0
+; LA32F-NEXT: fadd.s $fa2, $fa2, $fa4
+; LA32F-NEXT: lu12i.w $a0, 262144
; LA32F-NEXT: movgr2fr.w $fa4, $a0
-; LA32F-NEXT: ffint.s.w $fa4, $fa4
-; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32F-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0)
-; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA32F-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1)
-; LA32F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA32F-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2)
; LA32F-NEXT: fadd.s $fa3, $fa3, $fa4
-; LA32F-NEXT: fadd.s $fa2, $fa2, $fa5
-; LA32F-NEXT: fadd.s $fa1, $fa1, $fa6
-; LA32F-NEXT: fadd.s $fa0, $fa0, $fa7
+; LA32F-NEXT: lu12i.w $a0, 263168
+; LA32F-NEXT: movgr2fr.w $fa4, $a0
+; LA32F-NEXT: fadd.s $fa1, $fa1, $fa4
+; LA32F-NEXT: lu12i.w $a0, 264192
+; LA32F-NEXT: movgr2fr.w $fa4, $a0
+; LA32F-NEXT: fadd.s $fa0, $fa0, $fa4
; LA32F-NEXT: fst.s $fa0, $a1, 12
; LA32F-NEXT: fst.s $fa1, $a1, 8
-; LA32F-NEXT: fst.s $fa2, $a1, 4
-; LA32F-NEXT: fst.s $fa3, $a1, 0
+; LA32F-NEXT: fst.s $fa3, $a1, 4
+; LA32F-NEXT: fst.s $fa2, $a1, 0
; LA32F-NEXT: ret
;
; LA32D-LABEL: test_f4:
; LA32D: # %bb.0:
; LA32D-NEXT: fld.s $fa0, $a0, 12
; LA32D-NEXT: fld.s $fa1, $a0, 8
-; LA32D-NEXT: fld.s $fa2, $a0, 4
-; LA32D-NEXT: fld.s $fa3, $a0, 0
-; LA32D-NEXT: addi.w $a0, $zero, 1
+; LA32D-NEXT: fld.s $fa2, $a0, 0
+; LA32D-NEXT: fld.s $fa3, $a0, 4
+; LA32D-NEXT: lu12i.w $a0, 260096
+; LA32D-NEXT: movgr2fr.w $fa4, $a0
+; LA32D-NEXT: fadd.s $fa2, $fa2, $fa4
+; LA32D-NEXT: lu12i.w $a0, 262144
; LA32D-NEXT: movgr2fr.w $fa4, $a0
-; LA32D-NEXT: ffint.s.w $fa4, $fa4
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA32D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0)
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA32D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1)
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA32D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2)
; LA32D-NEXT: fadd.s $fa3, $fa3, $fa4
-; LA32D-NEXT: fadd.s $fa2, $fa2, $fa5
-; LA32D-NEXT: fadd.s $fa1, $fa1, $fa6
-; LA32D-NEXT: fadd.s $fa0, $fa0, $fa7
+; LA32D-NEXT: lu12i.w $a0, 263168
+; LA32D-NEXT: movgr2fr.w $fa4, $a0
+; LA32D-NEXT: fadd.s $fa1, $fa1, $fa4
+; LA32D-NEXT: lu12i.w $a0, 264192
+; LA32D-NEXT: movgr2fr.w $fa4, $a0
+; LA32D-NEXT: fadd.s $fa0, $fa0, $fa4
; LA32D-NEXT: fst.s $fa0, $a1, 12
; LA32D-NEXT: fst.s $fa1, $a1, 8
-; LA32D-NEXT: fst.s $fa2, $a1, 4
-; LA32D-NEXT: fst.s $fa3, $a1, 0
+; LA32D-NEXT: fst.s $fa3, $a1, 4
+; LA32D-NEXT: fst.s $fa2, $a1, 0
; LA32D-NEXT: ret
;
; LA64F-LABEL: test_f4:
; LA64F: # %bb.0:
; LA64F-NEXT: fld.s $fa0, $a0, 12
; LA64F-NEXT: fld.s $fa1, $a0, 8
-; LA64F-NEXT: fld.s $fa2, $a0, 4
-; LA64F-NEXT: fld.s $fa3, $a0, 0
-; LA64F-NEXT: addi.w $a0, $zero, 1
+; LA64F-NEXT: fld.s $fa2, $a0, 0
+; LA64F-NEXT: fld.s $fa3, $a0, 4
+; LA64F-NEXT: lu12i.w $a0, 260096
+; LA64F-NEXT: movgr2fr.w $fa4, $a0
+; LA64F-NEXT: fadd.s $fa2, $fa2, $fa4
+; LA64F-NEXT: lu12i.w $a0, 262144
; LA64F-NEXT: movgr2fr.w $fa4, $a0
-; LA64F-NEXT: ffint.s.w $fa4, $fa4
-; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; LA64F-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0)
-; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; LA64F-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1)
-; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2)
-; LA64F-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2)
; LA64F-NEXT: fadd.s $fa3, $fa3, $fa4
-; LA64F-NEXT: fadd.s $fa2, $fa2, $fa5
-; LA64F-NEXT: fadd.s $fa1, $fa1, $fa6
-; LA64F-NEXT: fadd.s $fa0, $fa0, $fa7
+; LA64F-NEXT: lu12i.w $a0, 263168
+; LA64F-NEXT: movgr2fr.w $fa4, $a0
+; LA64F-NEXT: fadd.s $fa1, $fa1, $fa4
+; LA64F-NEXT: lu12i.w $a0, 264192
+; LA64F-NEXT: movgr2fr.w $fa4, $a0
+; LA64F-NEXT: fadd.s $fa0, $fa0, $fa4
; LA64F-NEXT: fst.s $fa0, $a1, 12
; LA64F-NEXT: fst.s $fa1, $a1, 8
-; LA64F-NEXT: fst.s $fa2, $a1, 4
-; LA64F-NEXT: fst.s $fa3, $a1, 0
+; LA64F-NEXT: fst.s $fa3, $a1, 4
+; LA64F-NEXT: fst.s $fa2, $a1, 0
; LA64F-NEXT: ret
;
; LA64D-LABEL: test_f4:
@@ -233,113 +227,110 @@ define void @test_f4(ptr %P, ptr %S) nounwind {
define void @test_f8(ptr %P, ptr %S) nounwind {
; LA32F-LABEL: test_f8:
; LA32F: # %bb.0:
-; LA32F-NEXT: addi.w $a2, $zero, 1
-; LA32F-NEXT: movgr2fr.w $fa0, $a2
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
-; LA32F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0)
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
-; LA32F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1)
-; LA32F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2)
-; LA32F-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2)
-; LA32F-NEXT: fld.s $fa4, $a0, 28
-; LA32F-NEXT: fld.s $fa5, $a0, 24
-; LA32F-NEXT: fld.s $fa6, $a0, 12
-; LA32F-NEXT: fld.s $fa7, $a0, 8
-; LA32F-NEXT: fld.s $ft0, $a0, 0
-; LA32F-NEXT: fld.s $ft1, $a0, 16
-; LA32F-NEXT: fld.s $ft2, $a0, 4
-; LA32F-NEXT: ffint.s.w $fa0, $fa0
-; LA32F-NEXT: fadd.s $ft0, $ft0, $fa0
-; LA32F-NEXT: fadd.s $fa0, $ft1, $fa0
-; LA32F-NEXT: fld.s $ft1, $a0, 20
-; LA32F-NEXT: fadd.s $ft2, $ft2, $fa1
-; LA32F-NEXT: fadd.s $fa7, $fa7, $fa2
-; LA32F-NEXT: fadd.s $fa6, $fa6, $fa3
-; LA32F-NEXT: fadd.s $fa1, $ft1, $fa1
-; LA32F-NEXT: fadd.s $fa2, $fa5, $fa2
-; LA32F-NEXT: fadd.s $fa3, $fa4, $fa3
-; LA32F-NEXT: fst.s $fa3, $a1, 28
-; LA32F-NEXT: fst.s $fa2, $a1, 24
-; LA32F-NEXT: fst.s $fa1, $a1, 20
-; LA32F-NEXT: fst.s $fa6, $a1, 12
-; LA32F-NEXT: fst.s $fa7, $a1, 8
-; LA32F-NEXT: fst.s $ft2, $a1, 4
-; LA32F-NEXT: fst.s $fa0, $a1, 16
-; LA32F-NEXT: fst.s $ft0, $a1, 0
+; LA32F-NEXT: fld.s $fa0, $a0, 28
+; LA32F-NEXT: fld.s $fa1, $a0, 24
+; LA32F-NEXT: fld.s $fa2, $a0, 20
+; LA32F-NEXT: fld.s $fa3, $a0, 16
+; LA32F-NEXT: fld.s $fa4, $a0, 12
+; LA32F-NEXT: fld.s $fa5, $a0, 8
+; LA32F-NEXT: fld.s $fa6, $a0, 0
+; LA32F-NEXT: fld.s $fa7, $a0, 4
+; LA32F-NEXT: lu12i.w $a0, 260096
+; LA32F-NEXT: movgr2fr.w $ft0, $a0
+; LA32F-NEXT: fadd.s $fa6, $fa6, $ft0
+; LA32F-NEXT: lu12i.w $a0, 262144
+; LA32F-NEXT: movgr2fr.w $ft1, $a0
+; LA32F-NEXT: fadd.s $fa7, $fa7, $ft1
+; LA32F-NEXT: lu12i.w $a0, 263168
+; LA32F-NEXT: movgr2fr.w $ft2, $a0
+; LA32F-NEXT: fadd.s $fa5, $fa5, $ft2
+; LA32F-NEXT: lu12i.w $a0, 264192
+; LA32F-NEXT: movgr2fr.w $ft3, $a0
+; LA32F-NEXT: fadd.s $fa4, $fa4, $ft3
+; LA32F-NEXT: fadd.s $fa3, $fa3, $ft0
+; LA32F-NEXT: fadd.s $fa2, $fa2, $ft1
+; LA32F-NEXT: fadd.s $fa1, $fa1, $ft2
+; LA32F-NEXT: fadd.s $fa0, $fa0, $ft3
+; LA32F-NEXT: fst.s $fa0, $a1, 28
+; LA32F-NEXT: fst.s $fa1, $a1, 24
+; LA32F-NEXT: fst.s $fa2, $a1, 20
+; LA32F-NEXT: fst.s $fa3, $a1, 16
+; LA32F-NEXT: fst.s $fa4, $a1, 12
+; LA32F-NEXT: fst.s $fa5, $a1, 8
+; LA32F-NEXT: fst.s $fa7, $a1, 4
+; LA32F-NEXT: fst.s $fa6, $a1, 0
; LA32F-NEXT: ret
;
; LA32D-LABEL: test_f8:
; LA32D: # %bb.0:
-; LA32D-NEXT: addi.w $a2, $zero, 1
-; LA32D-NEXT: movgr2fr.w $fa0, $a2
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
-; LA32D-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0)
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
-; LA32D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1)
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2)
-; LA32D-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2)
-; LA32D-NEXT: fld.s $fa4, $a0, 28
-; LA32D-NEXT: fld.s $fa5, $a0, 24
-; LA32D-NEXT: fld.s $fa6, $a0, 12
-; LA32D-NEXT: fld.s $fa7, $a0, 8
-; LA32D-NEXT: fld.s $ft0, $a0, 0
-; LA32D-NEXT: fld.s $ft1, $a0, 16
-; LA32D-NEXT: fld.s $ft2, $a0, 4
-; LA32D-NEXT: ffint.s.w $fa0, $fa0
-; LA32D-NEXT: fadd.s $ft0, $ft0, $fa0
-; LA32D-NEXT: fadd.s $fa0, $ft1, $fa0
-; LA32D-NEXT: fld.s $ft1, $a0, 20
-; LA32D-NEXT: fadd.s $ft2, $ft2, $fa1
-; LA32D-NEXT: fadd.s $fa7, $fa7, $fa2
-; LA32D-NEXT: fadd.s $fa6, $fa6, $fa3
-; LA32D-NEXT: fadd.s $fa1, $ft1, $fa1
-; LA32D-NEXT: fadd.s $fa2, $fa5, $fa2
-; LA32D-NEXT: fadd.s $fa3, $fa4, $fa3
-; LA32D-NEXT: fst.s $fa3, $a1, 28
-; LA32D-NEXT: fst.s $fa2, $a1, 24
-; LA32D-NEXT: fst.s $fa1, $a1, 20
-; LA32D-NEXT: fst.s $fa6, $a1, 12
-; LA32D-NEXT: fst.s $fa7, $a1, 8
-; LA32D-NEXT: fst.s $ft2, $a1, 4
-; LA32D-NEXT: fst.s $fa0, $a1, 16
-; LA32D-NEXT: fst.s $ft0, $a1, 0
+; LA32D-NEXT: fld.s $fa0, $a0, 28
+; LA32D-NEXT: fld.s $fa1, $a0, 24
+; LA32D-NEXT: fld.s $fa2, $a0, 20
+; LA32D-NEXT: fld.s $fa3, $a0, 16
+; LA32D-NEXT: fld.s $fa4, $a0, 12
+; LA32D-NEXT: fld.s $fa5, $a0, 8
+; LA32D-NEXT: fld.s $fa6, $a0, 0
+; LA32D-NEXT: fld.s $fa7, $a0, 4
+; LA32D-NEXT: lu12i.w $a0, 260096
+; LA32D-NEXT: movgr2fr.w $ft0, $a0
+; LA32D-NEXT: fadd.s $fa6, $fa6, $ft0
+; LA32D-NEXT: lu12i.w $a0, 262144
+; LA32D-NEXT: movgr2fr.w $ft1, $a0
+; LA32D-NEXT: fadd.s $fa7, $fa7, $ft1
+; LA32D-NEXT: lu12i.w $a0, 263168
+; LA32D-NEXT: movgr2fr.w $ft2, $a0
+; LA32D-NEXT: fadd.s $fa5, $fa5, $ft2
+; LA32D-NEXT: lu12i.w $a0, 264192
+; LA32D-NEXT: movgr2fr.w $ft3, $a0
+; LA32D-NEXT: fadd.s $fa4, $fa4, $ft3
+; LA32D-NEXT: fadd.s $fa3, $fa3, $ft0
+; LA32D-NEXT: fadd.s $fa2, $fa2, $ft1
+; LA32D-NEXT: fadd.s $fa1, $fa1, $ft2
+; LA32D-NEXT: fadd.s $fa0, $fa0, $ft3
+; LA32D-NEXT: fst.s $fa0, $a1, 28
+; LA32D-NEXT: fst.s $fa1, $a1, 24
+; LA32D-NEXT: fst.s $fa2, $a1, 20
+; LA32D-NEXT: fst.s $fa3, $a1, 16
+; LA32D-NEXT: fst.s $fa4, $a1, 12
+; LA32D-NEXT: fst.s $fa5, $a1, 8
+; LA32D-NEXT: fst.s $fa7, $a1, 4
+; LA32D-NEXT: fst.s $fa6, $a1, 0
; LA32D-NEXT: ret
;
; LA64F-LABEL: test_f8:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.w $a2, $zero, 1
-; LA64F-NEXT: movgr2fr.w $fa0, $a2
-; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
-; LA64F-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0)
-; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
-; LA64F-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1)
-; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2)
-; LA64F-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2)
-; LA64F-NEXT: fld.s $fa4, $a0, 28
-; LA64F-NEXT: fld.s $fa5, $a0, 24
-; LA64F-NEXT: fld.s $fa6, $a0, 12
-; LA64F-NEXT: fld.s $fa7, $a0, 8
-; LA64F-NEXT: fld.s $ft0, $a0, 0
-; LA64F-NEXT: fld.s $ft1, $a0, 16
-; LA64F-NEXT: fld.s $ft2, $a0, 4
-; LA64F-NEXT: ffint.s.w $fa0, $fa0
-; LA64F-NEXT: fadd.s $ft0, $ft0, $fa0
-; LA64F-NEXT: fadd.s $fa0, $ft1, $fa0
-; LA64F-NEXT: fld.s $ft1, $a0, 20
-; LA64F-NEXT: fadd.s $ft2, $ft2, $fa1
-; LA64F-NEXT: fadd.s $fa7, $fa7, $fa2
-; LA64F-NEXT: fadd.s $fa6, $fa6, $fa3
-; LA64F-NEXT: fadd.s $fa1, $ft1, $fa1
-; LA64F-NEXT: fadd.s $fa2, $fa5, $fa2
-; LA64F-NEXT: fadd.s $fa3, $fa4, $fa3
-; LA64F-NEXT: fst.s $fa3, $a1, 28
-; LA64F-NEXT: fst.s $fa2, $a1, 24
-; LA64F-NEXT: fst.s $fa1, $a1, 20
-; LA64F-NEXT: fst.s $fa6, $a1, 12
-; LA64F-NEXT: fst.s $fa7, $a1, 8
-; LA64F-NEXT: fst.s $ft2, $a1, 4
-; LA64F-NEXT: fst.s $fa0, $a1, 16
-; LA64F-NEXT: fst.s $ft0, $a1, 0
+; LA64F-NEXT: fld.s $fa0, $a0, 28
+; LA64F-NEXT: fld.s $fa1, $a0, 24
+; LA64F-NEXT: fld.s $fa2, $a0, 20
+; LA64F-NEXT: fld.s $fa3, $a0, 16
+; LA64F-NEXT: fld.s $fa4, $a0, 12
+; LA64F-NEXT: fld.s $fa5, $a0, 8
+; LA64F-NEXT: fld.s $fa6, $a0, 0
+; LA64F-NEXT: fld.s $fa7, $a0, 4
+; LA64F-NEXT: lu12i.w $a0, 260096
+; LA64F-NEXT: movgr2fr.w $ft0, $a0
+; LA64F-NEXT: fadd.s $fa6, $fa6, $ft0
+; LA64F-NEXT: lu12i.w $a0, 262144
+; LA64F-NEXT: movgr2fr.w $ft1, $a0
+; LA64F-NEXT: fadd.s $fa7, $fa7, $ft1
+; LA64F-NEXT: lu12i.w $a0, 263168
+; LA64F-NEXT: movgr2fr.w $ft2, $a0
+; LA64F-NEXT: fadd.s $fa5, $fa5, $ft2
+; LA64F-NEXT: lu12i.w $a0, 264192
+; LA64F-NEXT: movgr2fr.w $ft3, $a0
+; LA64F-NEXT: fadd.s $fa4, $fa4, $ft3
+; LA64F-NEXT: fadd.s $fa3, $fa3, $ft0
+; LA64F-NEXT: fadd.s $fa2, $fa2, $ft1
+; LA64F-NEXT: fadd.s $fa1, $fa1, $ft2
+; LA64F-NEXT: fadd.s $fa0, $fa0, $ft3
+; LA64F-NEXT: fst.s $fa0, $a1, 28
+; LA64F-NEXT: fst.s $fa1, $a1, 24
+; LA64F-NEXT: fst.s $fa2, $a1, 20
+; LA64F-NEXT: fst.s $fa3, $a1, 16
+; LA64F-NEXT: fst.s $fa4, $a1, 12
+; LA64F-NEXT: fst.s $fa5, $a1, 8
+; LA64F-NEXT: fst.s $fa7, $a1, 4
+; LA64F-NEXT: fst.s $fa6, $a1, 0
; LA64F-NEXT: ret
;
; LA64D-LABEL: test_f8:
@@ -403,14 +394,14 @@ define void @test_d2(ptr %P, ptr %S) nounwind {
; LA32D: # %bb.0:
; LA32D-NEXT: fld.d $fa0, $a0, 8
; LA32D-NEXT: fld.d $fa1, $a0, 0
-; LA32D-NEXT: addi.w $a0, $zero, 1
-; LA32D-NEXT: movgr2fr.w $fa2, $a0
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; LA32D-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI4_0)
-; LA32D-NEXT: ffint.s.w $fa2, $fa2
-; LA32D-NEXT: fcvt.d.s $fa2, $fa2
-; LA32D-NEXT: fadd.d $fa1, $fa1, $fa2
-; LA32D-NEXT: fadd.d $fa0, $fa0, $fa3
+; LA32D-NEXT: movgr2fr.w $fa2, $zero
+; LA32D-NEXT: lu12i.w $a0, 261888
+; LA32D-NEXT: fmov.d $fa3, $fa2
+; LA32D-NEXT: movgr2frh.w $fa3, $a0
+; LA32D-NEXT: fadd.d $fa1, $fa1, $fa3
+; LA32D-NEXT: lu12i.w $a0, 262144
+; LA32D-NEXT: movgr2frh.w $fa2, $a0
+; LA32D-NEXT: fadd.d $fa0, $fa0, $fa2
; LA32D-NEXT: fst.d $fa0, $a1, 8
; LA32D-NEXT: fst.d $fa1, $a1, 0
; LA32D-NEXT: ret
@@ -532,20 +523,22 @@ define void @test_d4(ptr %P, ptr %S) nounwind {
; LA32D-NEXT: fld.d $fa1, $a0, 16
; LA32D-NEXT: fld.d $fa2, $a0, 8
; LA32D-NEXT: fld.d $fa3, $a0, 0
-; LA32D-NEXT: addi.w $a0, $zero, 1
-; LA32D-NEXT: movgr2fr.w $fa4, $a0
-; LA32D-NEXT: ffint.s.w $fa4, $fa4
-; LA32D-NEXT: fcvt.d.s $fa4, $fa4
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; LA32D-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_0)
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
-; LA32D-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_1)
-; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
-; LA32D-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_2)
-; LA32D-NEXT: fadd.d $fa3, $fa3, $fa4
+; LA32D-NEXT: movgr2fr.w $fa4, $zero
+; LA32D-NEXT: lu12i.w $a0, 261888
+; LA32D-NEXT: fmov.d $fa5, $fa4
+; LA32D-NEXT: movgr2frh.w $fa5, $a0
+; LA32D-NEXT: fadd.d $fa3, $fa3, $fa5
+; LA32D-NEXT: lu12i.w $a0, 262144
+; LA32D-NEXT: fmov.d $fa5, $fa4
+; LA32D-NEXT: movgr2frh.w $fa5, $a0
; LA32D-NEXT: fadd.d $fa2, $fa2, $fa5
-; LA32D-NEXT: fadd.d $fa1, $fa1, $fa6
-; LA32D-NEXT: fadd.d $fa0, $fa0, $fa7
+; LA32D-NEXT: lu12i.w $a0, 262272
+; LA32D-NEXT: fmov.d $fa5, $fa4
+; LA32D-NEXT: movgr2frh.w $fa5, $a0
+; LA32D-NEXT: fadd.d $fa1, $fa1, $fa5
+; LA32D-NEXT: lu12i.w $a0, 262400
+; LA32D-NEXT: movgr2frh.w $fa4, $a0
+; LA32D-NEXT: fadd.d $fa0, $fa0, $fa4
; LA32D-NEXT: fst.d $fa0, $a1, 24
; LA32D-NEXT: fst.d $fa1, $a1, 16
; LA32D-NEXT: fst.d $fa2, $a1, 8
@@ -748,40 +741,42 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
;
; LA32D-LABEL: test_d8:
; LA32D: # %bb.0:
-; LA32D-NEXT: addi.w $a2, $zero, 1
-; LA32D-NEXT: movgr2fr.w $fa0, $a2
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0)
-; LA32D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI6_0)
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_1)
-; LA32D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI6_1)
-; LA32D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_2)
-; LA32D-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI6_2)
-; LA32D-NEXT: fld.d $fa4, $a0, 56
-; LA32D-NEXT: fld.d $fa5, $a0, 48
-; LA32D-NEXT: fld.d $fa6, $a0, 24
-; LA32D-NEXT: fld.d $fa7, $a0, 16
-; LA32D-NEXT: fld.d $ft0, $a0, 8
-; LA32D-NEXT: fld.d $ft1, $a0, 0
-; LA32D-NEXT: fld.d $ft2, $a0, 32
-; LA32D-NEXT: ffint.s.w $fa0, $fa0
-; LA32D-NEXT: fcvt.d.s $fa0, $fa0
-; LA32D-NEXT: fadd.d $ft1, $ft1, $fa0
-; LA32D-NEXT: fadd.d $fa0, $ft2, $fa0
-; LA32D-NEXT: fld.d $ft2, $a0, 40
-; LA32D-NEXT: fadd.d $ft0, $ft0, $fa1
-; LA32D-NEXT: fadd.d $fa7, $fa7, $fa2
-; LA32D-NEXT: fadd.d $fa6, $fa6, $fa3
-; LA32D-NEXT: fadd.d $fa1, $ft2, $fa1
-; LA32D-NEXT: fadd.d $fa2, $fa5, $fa2
-; LA32D-NEXT: fadd.d $fa3, $fa4, $fa3
-; LA32D-NEXT: fst.d $fa3, $a1, 56
-; LA32D-NEXT: fst.d $fa2, $a1, 48
-; LA32D-NEXT: fst.d $fa1, $a1, 40
-; LA32D-NEXT: fst.d $fa6, $a1, 24
-; LA32D-NEXT: fst.d $fa7, $a1, 16
-; LA32D-NEXT: fst.d $ft0, $a1, 8
-; LA32D-NEXT: fst.d $fa0, $a1, 32
-; LA32D-NEXT: fst.d $ft1, $a1, 0
+; LA32D-NEXT: fld.d $fa0, $a0, 56
+; LA32D-NEXT: fld.d $fa1, $a0, 48
+; LA32D-NEXT: fld.d $fa2, $a0, 40
+; LA32D-NEXT: fld.d $fa3, $a0, 32
+; LA32D-NEXT: fld.d $fa4, $a0, 24
+; LA32D-NEXT: fld.d $fa5, $a0, 16
+; LA32D-NEXT: fld.d $fa6, $a0, 8
+; LA32D-NEXT: fld.d $fa7, $a0, 0
+; LA32D-NEXT: movgr2fr.w $ft0, $zero
+; LA32D-NEXT: lu12i.w $a0, 261888
+; LA32D-NEXT: fmov.d $ft1, $ft0
+; LA32D-NEXT: movgr2frh.w $ft1, $a0
+; LA32D-NEXT: fadd.d $fa7, $fa7, $ft1
+; LA32D-NEXT: lu12i.w $a0, 262144
+; LA32D-NEXT: fmov.d $ft2, $ft0
+; LA32D-NEXT: movgr2frh.w $ft2, $a0
+; LA32D-NEXT: fadd.d $fa6, $fa6, $ft2
+; LA32D-NEXT: lu12i.w $a0, 262272
+; LA32D-NEXT: fmov.d $ft3, $ft0
+; LA32D-NEXT: movgr2frh.w $ft3, $a0
+; LA32D-NEXT: fadd.d $fa5, $fa5, $ft3
+; LA32D-NEXT: lu12i.w $a0, 262400
+; LA32D-NEXT: movgr2frh.w $ft0, $a0
+; LA32D-NEXT: fadd.d $fa4, $fa4, $ft0
+; LA32D-NEXT: fadd.d $fa3, $fa3, $ft1
+; LA32D-NEXT: fadd.d $fa2, $fa2, $ft2
+; LA32D-NEXT: fadd.d $fa1, $fa1, $ft3
+; LA32D-NEXT: fadd.d $fa0, $fa0, $ft0
+; LA32D-NEXT: fst.d $fa0, $a1, 56
+; LA32D-NEXT: fst.d $fa1, $a1, 48
+; LA32D-NEXT: fst.d $fa2, $a1, 40
+; LA32D-NEXT: fst.d $fa3, $a1, 32
+; LA32D-NEXT: fst.d $fa4, $a1, 24
+; LA32D-NEXT: fst.d $fa5, $a1, 16
+; LA32D-NEXT: fst.d $fa6, $a1, 8
+; LA32D-NEXT: fst.d $fa7, $a1, 0
; LA32D-NEXT: ret
;
; LA64F-LABEL: test_d8:
>From 2432bb0b52e393ced5ad60726137cbe54d04132a Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Thu, 11 Sep 2025 19:50:23 +0800
Subject: [PATCH 2/3] deal with frecip and frsqrt
---
.../LoongArch/LoongArchFloat32InstrInfo.td | 16 ++++++++++
.../LoongArch/LoongArchFloat64InstrInfo.td | 9 ++++++
llvm/test/CodeGen/LoongArch/fp-reciprocal.ll | 17 +++--------
.../LoongArch/fsqrt-reciprocal-estimate.ll | 29 ++++++++-----------
llvm/test/CodeGen/LoongArch/fsqrt.ll | 21 +++-----------
5 files changed, 45 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index cd6bb40c880f1..690dd73014e57 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -189,6 +189,14 @@ def : PatFpr<fneg, FNEG_S, FPR32>;
def : PatFpr<fabs, FABS_S, FPR32>;
def : PatFpr<fsqrt, FSQRT_S, FPR32>;
def : Pat<(fdiv fpimm1, (fsqrt FPR32:$fj)), (FRSQRT_S FPR32:$fj)>;
+let Predicates = [HasBasicF, IsLA64] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w_la64 (i64 1065353216)), (fsqrt FPR32:$fj)),
+ (FRSQRT_S FPR32:$fj)>;
+} // Predicates = [HasBasicF, IsLA64]
+let Predicates = [HasBasicF, IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w (i32 1065353216)), (fsqrt FPR32:$fj)),
+ (FRSQRT_S FPR32:$fj)>;
+} // Predicates = [HasBasicF, IsLA32]
def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>;
def : Pat<(is_fpclass FPR32:$fj, (i32 timm:$mask)),
(SLTU R0, (ANDI (MOVFR2GR_S (FCLASS_S FPR32:$fj)),
@@ -299,6 +307,14 @@ def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>;
// FP reciprocal operation
def : Pat<(fdiv fpimm1, FPR32:$src), (FRECIP_S $src)>;
+let Predicates = [HasBasicF, IsLA64] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w_la64 (i64 1065353216)), FPR32:$src),
+ (FRECIP_S $src)>;
+} // Predicates = [HasBasicF, IsLA64]
+let Predicates = [HasBasicF, IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_w (i32 1065353216)), FPR32:$src),
+ (FRECIP_S $src)>;
+} // Predicates = [HasBasicF, IsLA32]
let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index c5fb7aeb9ed85..daefbaa52d42a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -162,6 +162,11 @@ def : PatFpr<fneg, FNEG_D, FPR64>;
def : PatFpr<fabs, FABS_D, FPR64>;
def : PatFpr<fsqrt, FSQRT_D, FPR64>;
def : Pat<(fdiv fpimm1, (fsqrt FPR64:$fj)), (FRSQRT_D FPR64:$fj)>;
+let Predicates = [IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_d_lo_hi (i32 0), (i32 1072693248)),
+ (fsqrt FPR64:$fj)),
+ (FRSQRT_D FPR64:$fj)>;
+} // Predicates = [IsLA32]
def : Pat<(fcopysign FPR64:$fj, FPR32:$fk),
(FCOPYSIGN_D FPR64:$fj, (FCVT_D_S FPR32:$fk))>;
def : Pat<(fcopysign FPR32:$fj, FPR64:$fk),
@@ -267,6 +272,10 @@ def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>;
// FP reciprocal operation
def : Pat<(fdiv fpimm1, FPR64:$src), (FRECIP_D $src)>;
+let Predicates = [IsLA32] in {
+def : Pat<(fdiv (loongarch_movgr2fr_d_lo_hi (i32 0), (i32 1072693248)), FPR64:$src),
+ (FRECIP_D FPR64:$src)>;
+} // Predicates = [IsLA32]
let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
diff --git a/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll b/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll
index 29efb1ff909f0..11e246eafe4ff 100644
--- a/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll
+++ b/llvm/test/CodeGen/LoongArch/fp-reciprocal.ll
@@ -8,23 +8,17 @@
define float @f32_reciprocal(float %a) nounwind {
; LA32F-LABEL: f32_reciprocal:
; LA32F: # %bb.0:
-; LA32F-NEXT: lu12i.w $a0, 260096
-; LA32F-NEXT: movgr2fr.w $fa1, $a0
-; LA32F-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA32F-NEXT: frecip.s $fa0, $fa0
; LA32F-NEXT: ret
;
; LA32D-LABEL: f32_reciprocal:
; LA32D: # %bb.0:
-; LA32D-NEXT: lu12i.w $a0, 260096
-; LA32D-NEXT: movgr2fr.w $fa1, $a0
-; LA32D-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA32D-NEXT: frecip.s $fa0, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: f32_reciprocal:
; LA64F: # %bb.0:
-; LA64F-NEXT: lu12i.w $a0, 260096
-; LA64F-NEXT: movgr2fr.w $fa1, $a0
-; LA64F-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA64F-NEXT: frecip.s $fa0, $fa0
; LA64F-NEXT: ret
;
; LA64D-LABEL: f32_reciprocal:
@@ -51,10 +45,7 @@ define double @f64_reciprocal(double %a) nounwind {
;
; LA32D-LABEL: f64_reciprocal:
; LA32D: # %bb.0:
-; LA32D-NEXT: movgr2fr.w $fa1, $zero
-; LA32D-NEXT: lu12i.w $a0, 261888
-; LA32D-NEXT: movgr2frh.w $fa1, $a0
-; LA32D-NEXT: fdiv.d $fa0, $fa1, $fa0
+; LA32D-NEXT: frecip.d $fa0, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: f64_reciprocal:
diff --git a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
index 91d1efe9e3f98..29cfea7dc09cd 100644
--- a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
@@ -11,10 +11,7 @@ declare double @llvm.sqrt.f64(double)
define float @frsqrt_f32(float %a) nounwind {
; LA32F-LABEL: frsqrt_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: fsqrt.s $fa0, $fa0
-; LA32F-NEXT: lu12i.w $a0, 260096
-; LA32F-NEXT: movgr2fr.w $fa1, $a0
-; LA32F-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA32F-NEXT: frsqrt.s $fa0, $fa0
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: frsqrt_f32:
@@ -564,15 +561,14 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2
define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2) nounwind {
; LA32F-LABEL: sqrt_simplify_before_recip_3_uses_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: fsqrt.s $fa0, $fa0
-; LA32F-NEXT: lu12i.w $a2, 260096
-; LA32F-NEXT: movgr2fr.w $fa1, $a2
-; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
+; LA32F-NEXT: fsqrt.s $fa1, $fa0
+; LA32F-NEXT: frsqrt.s $fa0, $fa0
; LA32F-NEXT: lu12i.w $a2, 270976
; LA32F-NEXT: movgr2fr.w $fa2, $a2
-; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
-; LA32F-NEXT: fst.s $fa1, $a0, 0
+; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1
+; LA32F-NEXT: fst.s $fa0, $a0, 0
; LA32F-NEXT: fst.s $fa2, $a1, 0
+; LA32F-NEXT: fmov.s $fa0, $fa1
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
@@ -636,19 +632,18 @@ define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2)
define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2, ptr %p3) nounwind {
; LA32F-LABEL: sqrt_simplify_before_recip_4_uses_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: fsqrt.s $fa0, $fa0
-; LA32F-NEXT: lu12i.w $a3, 260096
-; LA32F-NEXT: movgr2fr.w $fa1, $a3
-; LA32F-NEXT: fdiv.s $fa1, $fa1, $fa0
+; LA32F-NEXT: fsqrt.s $fa1, $fa0
+; LA32F-NEXT: frsqrt.s $fa0, $fa0
; LA32F-NEXT: lu12i.w $a3, 270976
; LA32F-NEXT: movgr2fr.w $fa2, $a3
-; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa0
+; LA32F-NEXT: fdiv.s $fa2, $fa2, $fa1
; LA32F-NEXT: lu12i.w $a3, 271040
; LA32F-NEXT: movgr2fr.w $fa3, $a3
-; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa0
-; LA32F-NEXT: fst.s $fa1, $a0, 0
+; LA32F-NEXT: fdiv.s $fa3, $fa3, $fa1
+; LA32F-NEXT: fst.s $fa0, $a0, 0
; LA32F-NEXT: fst.s $fa2, $a1, 0
; LA32F-NEXT: fst.s $fa3, $a2, 0
+; LA32F-NEXT: fmov.s $fa0, $fa1
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
diff --git a/llvm/test/CodeGen/LoongArch/fsqrt.ll b/llvm/test/CodeGen/LoongArch/fsqrt.ll
index 290b6f3de78d1..e0cb4d39474b1 100644
--- a/llvm/test/CodeGen/LoongArch/fsqrt.ll
+++ b/llvm/test/CodeGen/LoongArch/fsqrt.ll
@@ -67,26 +67,17 @@ define double @fsqrt_f64(double %a) nounwind {
define float @frsqrt_f32(float %a) nounwind {
; LA32F-LABEL: frsqrt_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: fsqrt.s $fa0, $fa0
-; LA32F-NEXT: lu12i.w $a0, 260096
-; LA32F-NEXT: movgr2fr.w $fa1, $a0
-; LA32F-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA32F-NEXT: frsqrt.s $fa0, $fa0
; LA32F-NEXT: ret
;
; LA32D-LABEL: frsqrt_f32:
; LA32D: # %bb.0:
-; LA32D-NEXT: fsqrt.s $fa0, $fa0
-; LA32D-NEXT: lu12i.w $a0, 260096
-; LA32D-NEXT: movgr2fr.w $fa1, $a0
-; LA32D-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA32D-NEXT: frsqrt.s $fa0, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: frsqrt_f32:
; LA64F: # %bb.0:
-; LA64F-NEXT: fsqrt.s $fa0, $fa0
-; LA64F-NEXT: lu12i.w $a0, 260096
-; LA64F-NEXT: movgr2fr.w $fa1, $a0
-; LA64F-NEXT: fdiv.s $fa0, $fa1, $fa0
+; LA64F-NEXT: frsqrt.s $fa0, $fa0
; LA64F-NEXT: ret
;
; LA64D-LABEL: frsqrt_f32:
@@ -115,11 +106,7 @@ define double @frsqrt_f64(double %a) nounwind {
;
; LA32D-LABEL: frsqrt_f64:
; LA32D: # %bb.0:
-; LA32D-NEXT: fsqrt.d $fa0, $fa0
-; LA32D-NEXT: movgr2fr.w $fa1, $zero
-; LA32D-NEXT: lu12i.w $a0, 261888
-; LA32D-NEXT: movgr2frh.w $fa1, $a0
-; LA32D-NEXT: fdiv.d $fa0, $fa1, $fa0
+; LA32D-NEXT: frsqrt.d $fa0, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: frsqrt_f64:
>From df481191ce47bf7f6648d12b7947cd4c0e8e1794 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Fri, 12 Sep 2025 20:12:08 +0800
Subject: [PATCH 3/3] add option to set max allowed instructins
---
.../LoongArch/LoongArchISelLowering.cpp | 34 +-
.../CodeGen/LoongArch/calling-conv-ilp32d.ll | 40 +--
llvm/test/CodeGen/LoongArch/double-imm.ll | 306 ++++++++++++++++--
.../LoongArch/fsqrt-reciprocal-estimate.ll | 70 ++--
llvm/test/CodeGen/LoongArch/vector-fp-imm.ll | 58 ++--
5 files changed, 389 insertions(+), 119 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 2f10dc9704445..90e7082b167ce 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -18,6 +18,7 @@
#include "LoongArchSubtarget.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "MCTargetDesc/LoongArchMatInt.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -41,6 +42,29 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
+enum MaterializeFPImm {
+ NoMaterializeFPImm,
+ MaterializeFPImm1Ins,
+ MaterializeFPImm2Ins,
+ MaterializeFPImm3Ins,
+ MaterializeFPImm4Ins
+};
+
+static cl::opt<MaterializeFPImm> MaterializeFPImmInsNum(
+ "loongarch-materialize-float-imm", cl::Hidden,
+ cl::desc("Maximum number of instructions used when materializing "
+ "floating-point immediates (default = 2)"),
+ cl::init(MaterializeFPImm2Ins),
+ cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
+ clEnumValN(MaterializeFPImm1Ins, "1",
+ "Materialize FP immediate within 1 instruction"),
+ clEnumValN(MaterializeFPImm2Ins, "2",
+ "Materialize FP immediate within 2 instructions"),
+ clEnumValN(MaterializeFPImm3Ins, "3",
+ "Materialize FP immediate within 3 instructions"),
+ clEnumValN(MaterializeFPImm4Ins, "4",
+ "Materialize FP immediate within 4 instructions")));
+
static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
cl::desc("Trap on integer division by zero."),
cl::init(false));
@@ -572,7 +596,7 @@ SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
return SDValue();
// If lsx enabled, use cheaper 'vldi' instruction if possible.
- if (Subtarget.hasExtLSX() && isFPImmVLDILegal(FPVal, VT))
+ if (isFPImmVLDILegal(FPVal, VT))
return SDValue();
// Construct as integer, and move to float register.
@@ -590,10 +614,18 @@ SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
DL, VT, NewVal);
}
case MVT::f64: {
+ // If more than MaterializeFPImmInsNum instructions will be used to
+ // generate the INTVal, fallback to use floating point load from the
+ // constant pool.
+ auto Seq = LoongArchMatInt::generateInstSeq(INTVal.getSExtValue());
+ if (Seq.size() > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
+ return SDValue();
+
if (Subtarget.is64Bit()) {
SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
}
+
SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll
index 0544d93f97300..95f9aa514b340 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll
@@ -72,21 +72,17 @@ define i32 @caller_double_in_gpr_exhausted_fprs() nounwind {
; CHECK-NEXT: lu12i.w $a0, 262144
; CHECK-NEXT: fmov.d $fa1, $fa7
; CHECK-NEXT: movgr2frh.w $fa1, $a0
-; CHECK-NEXT: lu12i.w $a0, 262272
-; CHECK-NEXT: fmov.d $fa2, $fa7
-; CHECK-NEXT: movgr2frh.w $fa2, $a0
; CHECK-NEXT: lu12i.w $a0, 262400
; CHECK-NEXT: fmov.d $fa3, $fa7
; CHECK-NEXT: movgr2frh.w $fa3, $a0
-; CHECK-NEXT: lu12i.w $a0, 262464
-; CHECK-NEXT: fmov.d $fa4, $fa7
-; CHECK-NEXT: movgr2frh.w $fa4, $a0
-; CHECK-NEXT: lu12i.w $a0, 262528
-; CHECK-NEXT: fmov.d $fa5, $fa7
-; CHECK-NEXT: movgr2frh.w $fa5, $a0
-; CHECK-NEXT: lu12i.w $a0, 262592
-; CHECK-NEXT: fmov.d $fa6, $fa7
-; CHECK-NEXT: movgr2frh.w $fa6, $a0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
+; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_1)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
+; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_2)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
+; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_3)
; CHECK-NEXT: lu12i.w $a0, 262656
; CHECK-NEXT: movgr2frh.w $fa7, $a0
; CHECK-NEXT: lu12i.w $a1, 262688
@@ -138,23 +134,19 @@ define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind {
; CHECK-NEXT: lu12i.w $a0, 262144
; CHECK-NEXT: fmov.d $fa1, $fa7
; CHECK-NEXT: movgr2frh.w $fa1, $a0
-; CHECK-NEXT: lu12i.w $a0, 262272
-; CHECK-NEXT: fmov.d $fa2, $fa7
-; CHECK-NEXT: movgr2frh.w $fa2, $a0
; CHECK-NEXT: lu12i.w $a0, 262400
; CHECK-NEXT: fmov.d $fa3, $fa7
; CHECK-NEXT: movgr2frh.w $fa3, $a0
-; CHECK-NEXT: lu12i.w $a0, 262464
-; CHECK-NEXT: fmov.d $fa4, $fa7
-; CHECK-NEXT: movgr2frh.w $fa4, $a0
-; CHECK-NEXT: lu12i.w $a0, 262528
-; CHECK-NEXT: fmov.d $fa5, $fa7
-; CHECK-NEXT: movgr2frh.w $fa5, $a0
-; CHECK-NEXT: lu12i.w $a0, 262592
-; CHECK-NEXT: fmov.d $fa6, $fa7
-; CHECK-NEXT: movgr2frh.w $fa6, $a0
; CHECK-NEXT: lu12i.w $a0, 262656
; CHECK-NEXT: movgr2frh.w $fa7, $a0
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI5_0)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
+; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI5_1)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
+; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_2)
+; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
+; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_3)
; CHECK-NEXT: lu12i.w $a1, 262688
; CHECK-NEXT: lu12i.w $a3, 262720
; CHECK-NEXT: lu12i.w $a5, 262752
diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll
index 3da4e159dbffc..58ecce1452290 100644
--- a/llvm/test/CodeGen/LoongArch/double-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/double-imm.ll
@@ -1,6 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=0 < %s | FileCheck %s --check-prefixes=LA32,LA32-0
+; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=1 < %s | FileCheck %s --check-prefixes=LA32,LA32-1
+; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=2 < %s | FileCheck %s --check-prefixes=LA32,LA32-2
+; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=3 < %s | FileCheck %s --check-prefixes=LA32,LA32-3
+; RUN: llc --mtriple=loongarch32 --mattr=+d -loongarch-materialize-float-imm=4 < %s | FileCheck %s --check-prefixes=LA32,LA32-4
+; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=0 < %s | FileCheck %s --check-prefixes=LA64,LA64-0
+; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=1 < %s | FileCheck %s --check-prefixes=LA64,LA64-1
+; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=2 < %s | FileCheck %s --check-prefixes=LA64,LA64-2
+; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=3 < %s | FileCheck %s --check-prefixes=LA64,LA64-3
+; RUN: llc --mtriple=loongarch64 --mattr=+d -loongarch-materialize-float-imm=4 < %s | FileCheck %s --check-prefixes=LA64,LA64-4
define double @f64_positive_zero() nounwind {
; LA32-LABEL: f64_positive_zero:
@@ -32,25 +40,285 @@ define double @f64_negative_zero() nounwind {
ret double -0.0
}
+define double @f64_constant_ins1() nounwind {
+; LA32-0-LABEL: f64_constant_ins1:
+; LA32-0: # %bb.0:
+; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0)
+; LA32-0-NEXT: ret
+;
+; LA32-1-LABEL: f64_constant_ins1:
+; LA32-1: # %bb.0:
+; LA32-1-NEXT: movgr2fr.w $fa0, $zero
+; LA32-1-NEXT: lu12i.w $a0, 263424
+; LA32-1-NEXT: movgr2frh.w $fa0, $a0
+; LA32-1-NEXT: ret
+;
+; LA32-2-LABEL: f64_constant_ins1:
+; LA32-2: # %bb.0:
+; LA32-2-NEXT: movgr2fr.w $fa0, $zero
+; LA32-2-NEXT: lu12i.w $a0, 263424
+; LA32-2-NEXT: movgr2frh.w $fa0, $a0
+; LA32-2-NEXT: ret
+;
+; LA32-3-LABEL: f64_constant_ins1:
+; LA32-3: # %bb.0:
+; LA32-3-NEXT: movgr2fr.w $fa0, $zero
+; LA32-3-NEXT: lu12i.w $a0, 263424
+; LA32-3-NEXT: movgr2frh.w $fa0, $a0
+; LA32-3-NEXT: ret
+;
+; LA32-4-LABEL: f64_constant_ins1:
+; LA32-4: # %bb.0:
+; LA32-4-NEXT: movgr2fr.w $fa0, $zero
+; LA32-4-NEXT: lu12i.w $a0, 263424
+; LA32-4-NEXT: movgr2frh.w $fa0, $a0
+; LA32-4-NEXT: ret
+;
+; LA64-0-LABEL: f64_constant_ins1:
+; LA64-0: # %bb.0:
+; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI2_0)
+; LA64-0-NEXT: ret
+;
+; LA64-1-LABEL: f64_constant_ins1:
+; LA64-1: # %bb.0:
+; LA64-1-NEXT: lu52i.d $a0, $zero, 1029
+; LA64-1-NEXT: movgr2fr.d $fa0, $a0
+; LA64-1-NEXT: ret
+;
+; LA64-2-LABEL: f64_constant_ins1:
+; LA64-2: # %bb.0:
+; LA64-2-NEXT: lu52i.d $a0, $zero, 1029
+; LA64-2-NEXT: movgr2fr.d $fa0, $a0
+; LA64-2-NEXT: ret
+;
+; LA64-3-LABEL: f64_constant_ins1:
+; LA64-3: # %bb.0:
+; LA64-3-NEXT: lu52i.d $a0, $zero, 1029
+; LA64-3-NEXT: movgr2fr.d $fa0, $a0
+; LA64-3-NEXT: ret
+;
+; LA64-4-LABEL: f64_constant_ins1:
+; LA64-4: # %bb.0:
+; LA64-4-NEXT: lu52i.d $a0, $zero, 1029
+; LA64-4-NEXT: movgr2fr.d $fa0, $a0
+; LA64-4-NEXT: ret
+ ret double 64.0
+}
+
+define double @f64_constant_ins2() nounwind {
+; LA32-0-LABEL: f64_constant_ins2:
+; LA32-0: # %bb.0:
+; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0)
+; LA32-0-NEXT: ret
+;
+; LA32-1-LABEL: f64_constant_ins2:
+; LA32-1: # %bb.0:
+; LA32-1-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; LA32-1-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0)
+; LA32-1-NEXT: ret
+;
+; LA32-2-LABEL: f64_constant_ins2:
+; LA32-2: # %bb.0:
+; LA32-2-NEXT: ori $a0, $zero, 3
+; LA32-2-NEXT: movgr2fr.w $fa0, $a0
+; LA32-2-NEXT: lu12i.w $a0, 262144
+; LA32-2-NEXT: movgr2frh.w $fa0, $a0
+; LA32-2-NEXT: ret
+;
+; LA32-3-LABEL: f64_constant_ins2:
+; LA32-3: # %bb.0:
+; LA32-3-NEXT: ori $a0, $zero, 3
+; LA32-3-NEXT: movgr2fr.w $fa0, $a0
+; LA32-3-NEXT: lu12i.w $a0, 262144
+; LA32-3-NEXT: movgr2frh.w $fa0, $a0
+; LA32-3-NEXT: ret
+;
+; LA32-4-LABEL: f64_constant_ins2:
+; LA32-4: # %bb.0:
+; LA32-4-NEXT: ori $a0, $zero, 3
+; LA32-4-NEXT: movgr2fr.w $fa0, $a0
+; LA32-4-NEXT: lu12i.w $a0, 262144
+; LA32-4-NEXT: movgr2frh.w $fa0, $a0
+; LA32-4-NEXT: ret
+;
+; LA64-0-LABEL: f64_constant_ins2:
+; LA64-0: # %bb.0:
+; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0)
+; LA64-0-NEXT: ret
+;
+; LA64-1-LABEL: f64_constant_ins2:
+; LA64-1: # %bb.0:
+; LA64-1-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; LA64-1-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI3_0)
+; LA64-1-NEXT: ret
+;
+; LA64-2-LABEL: f64_constant_ins2:
+; LA64-2: # %bb.0:
+; LA64-2-NEXT: ori $a0, $zero, 3
+; LA64-2-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-2-NEXT: movgr2fr.d $fa0, $a0
+; LA64-2-NEXT: ret
+;
+; LA64-3-LABEL: f64_constant_ins2:
+; LA64-3: # %bb.0:
+; LA64-3-NEXT: ori $a0, $zero, 3
+; LA64-3-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-3-NEXT: movgr2fr.d $fa0, $a0
+; LA64-3-NEXT: ret
+;
+; LA64-4-LABEL: f64_constant_ins2:
+; LA64-4: # %bb.0:
+; LA64-4-NEXT: ori $a0, $zero, 3
+; LA64-4-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-4-NEXT: movgr2fr.d $fa0, $a0
+; LA64-4-NEXT: ret
+ ret double 2.00000000000000137
+}
+
+define double @f64_constant_ins3() nounwind {
+; LA32-0-LABEL: f64_constant_ins3:
+; LA32-0: # %bb.0:
+; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0)
+; LA32-0-NEXT: ret
+;
+; LA32-1-LABEL: f64_constant_ins3:
+; LA32-1: # %bb.0:
+; LA32-1-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA32-1-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0)
+; LA32-1-NEXT: ret
+;
+; LA32-2-LABEL: f64_constant_ins3:
+; LA32-2: # %bb.0:
+; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA32-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0)
+; LA32-2-NEXT: ret
+;
+; LA32-3-LABEL: f64_constant_ins3:
+; LA32-3: # %bb.0:
+; LA32-3-NEXT: lu12i.w $a0, 268115
+; LA32-3-NEXT: ori $a0, $a0, 344
+; LA32-3-NEXT: lu12i.w $a1, -131072
+; LA32-3-NEXT: movgr2fr.w $fa0, $a1
+; LA32-3-NEXT: movgr2frh.w $fa0, $a0
+; LA32-3-NEXT: ret
+;
+; LA32-4-LABEL: f64_constant_ins3:
+; LA32-4: # %bb.0:
+; LA32-4-NEXT: lu12i.w $a0, 268115
+; LA32-4-NEXT: ori $a0, $a0, 344
+; LA32-4-NEXT: lu12i.w $a1, -131072
+; LA32-4-NEXT: movgr2fr.w $fa0, $a1
+; LA32-4-NEXT: movgr2frh.w $fa0, $a0
+; LA32-4-NEXT: ret
+;
+; LA64-0-LABEL: f64_constant_ins3:
+; LA64-0: # %bb.0:
+; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0)
+; LA64-0-NEXT: ret
+;
+; LA64-1-LABEL: f64_constant_ins3:
+; LA64-1: # %bb.0:
+; LA64-1-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA64-1-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0)
+; LA64-1-NEXT: ret
+;
+; LA64-2-LABEL: f64_constant_ins3:
+; LA64-2: # %bb.0:
+; LA64-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA64-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI4_0)
+; LA64-2-NEXT: ret
+;
+; LA64-3-LABEL: f64_constant_ins3:
+; LA64-3: # %bb.0:
+; LA64-3-NEXT: lu12i.w $a0, -131072
+; LA64-3-NEXT: lu32i.d $a0, 340312
+; LA64-3-NEXT: lu52i.d $a0, $a0, 1047
+; LA64-3-NEXT: movgr2fr.d $fa0, $a0
+; LA64-3-NEXT: ret
+;
+; LA64-4-LABEL: f64_constant_ins3:
+; LA64-4: # %bb.0:
+; LA64-4-NEXT: lu12i.w $a0, -131072
+; LA64-4-NEXT: lu32i.d $a0, 340312
+; LA64-4-NEXT: lu52i.d $a0, $a0, 1047
+; LA64-4-NEXT: movgr2fr.d $fa0, $a0
+; LA64-4-NEXT: ret
+ ret double 22222222.0
+}
+
define double @f64_constant_pi() nounwind {
-; LA32-LABEL: f64_constant_pi:
-; LA32: # %bb.0:
-; LA32-NEXT: lu12i.w $a0, 262290
-; LA32-NEXT: ori $a0, $a0, 507
-; LA32-NEXT: lu12i.w $a1, 345154
-; LA32-NEXT: ori $a1, $a1, 3352
-; LA32-NEXT: movgr2fr.w $fa0, $a1
-; LA32-NEXT: movgr2frh.w $fa0, $a0
-; LA32-NEXT: ret
+; LA32-0-LABEL: f64_constant_pi:
+; LA32-0: # %bb.0:
+; LA32-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA32-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA32-0-NEXT: ret
;
-; LA64-LABEL: f64_constant_pi:
-; LA64: # %bb.0:
-; LA64-NEXT: lu12i.w $a0, 345154
-; LA64-NEXT: ori $a0, $a0, 3352
-; LA64-NEXT: lu32i.d $a0, -450053
-; LA64-NEXT: lu52i.d $a0, $a0, 1024
-; LA64-NEXT: movgr2fr.d $fa0, $a0
-; LA64-NEXT: ret
+; LA32-1-LABEL: f64_constant_pi:
+; LA32-1: # %bb.0:
+; LA32-1-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA32-1-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA32-1-NEXT: ret
+;
+; LA32-2-LABEL: f64_constant_pi:
+; LA32-2: # %bb.0:
+; LA32-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA32-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA32-2-NEXT: ret
+;
+; LA32-3-LABEL: f64_constant_pi:
+; LA32-3: # %bb.0:
+; LA32-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA32-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA32-3-NEXT: ret
+;
+; LA32-4-LABEL: f64_constant_pi:
+; LA32-4: # %bb.0:
+; LA32-4-NEXT: lu12i.w $a0, 262290
+; LA32-4-NEXT: ori $a0, $a0, 507
+; LA32-4-NEXT: lu12i.w $a1, 345154
+; LA32-4-NEXT: ori $a1, $a1, 3352
+; LA32-4-NEXT: movgr2fr.w $fa0, $a1
+; LA32-4-NEXT: movgr2frh.w $fa0, $a0
+; LA32-4-NEXT: ret
+;
+; LA64-0-LABEL: f64_constant_pi:
+; LA64-0: # %bb.0:
+; LA64-0-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA64-0-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA64-0-NEXT: ret
+;
+; LA64-1-LABEL: f64_constant_pi:
+; LA64-1: # %bb.0:
+; LA64-1-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA64-1-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA64-1-NEXT: ret
+;
+; LA64-2-LABEL: f64_constant_pi:
+; LA64-2: # %bb.0:
+; LA64-2-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA64-2-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA64-2-NEXT: ret
+;
+; LA64-3-LABEL: f64_constant_pi:
+; LA64-3: # %bb.0:
+; LA64-3-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA64-3-NEXT: fld.d $fa0, $a0, %pc_lo12(.LCPI5_0)
+; LA64-3-NEXT: ret
+;
+; LA64-4-LABEL: f64_constant_pi:
+; LA64-4: # %bb.0:
+; LA64-4-NEXT: lu12i.w $a0, 345154
+; LA64-4-NEXT: ori $a0, $a0, 3352
+; LA64-4-NEXT: lu32i.d $a0, -450053
+; LA64-4-NEXT: lu52i.d $a0, $a0, 1024
+; LA64-4-NEXT: movgr2fr.d $fa0, $a0
+; LA64-4-NEXT: ret
ret double 3.1415926535897931159979634685441851615905761718750
}
diff --git a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
index 29cfea7dc09cd..d875bb98e4593 100644
--- a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
@@ -193,12 +193,10 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2
;
; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_f64:
; LA64D: # %bb.0:
+; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI2_0)
; LA64D-NEXT: fsqrt.d $fa1, $fa0
; LA64D-NEXT: frsqrt.d $fa0, $fa0
-; LA64D-NEXT: ori $a2, $zero, 0
-; LA64D-NEXT: lu32i.d $a2, 327680
-; LA64D-NEXT: lu52i.d $a2, $a2, 1028
-; LA64D-NEXT: movgr2fr.d $fa2, $a2
; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
; LA64D-NEXT: fst.d $fa0, $a0, 0
; LA64D-NEXT: fst.d $fa2, $a1, 0
@@ -216,14 +214,12 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
+; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0)
+; LA64D-FRECIPE-NEXT: fld.d $fa5, $a2, %pc_lo12(.LCPI2_0)
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: ori $a2, $zero, 0
-; LA64D-FRECIPE-NEXT: lu32i.d $a2, 327680
-; LA64D-FRECIPE-NEXT: lu52i.d $a2, $a2, 1028
-; LA64D-FRECIPE-NEXT: movgr2fr.d $fa2, $a2
-; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa2
+; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa5
; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0
; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0
@@ -327,16 +323,12 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p
;
; LA64D-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
; LA64D: # %bb.0:
+; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
+; LA64D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI3_0)
+; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
+; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI3_1)
; LA64D-NEXT: fsqrt.d $fa0, $fa0
-; LA64D-NEXT: ori $a2, $zero, 0
-; LA64D-NEXT: ori $a3, $zero, 0
-; LA64D-NEXT: lu32i.d $a3, 327680
-; LA64D-NEXT: lu52i.d $a3, $a3, 1028
-; LA64D-NEXT: movgr2fr.d $fa1, $a3
; LA64D-NEXT: fdiv.d $fa1, $fa1, $fa0
-; LA64D-NEXT: lu32i.d $a2, 360448
-; LA64D-NEXT: lu52i.d $a2, $a2, 1028
-; LA64D-NEXT: movgr2fr.d $fa2, $a2
; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa0
; LA64D-NEXT: fst.d $fa1, $a0, 0
; LA64D-NEXT: fst.d $fa2, $a1, 0
@@ -355,18 +347,14 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
+; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0)
+; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI3_0)
+; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1)
+; LA64D-FRECIPE-NEXT: fld.d $fa4, $a2, %pc_lo12(.LCPI3_1)
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
-; LA64D-FRECIPE-NEXT: ori $a2, $zero, 0
-; LA64D-FRECIPE-NEXT: ori $a3, $zero, 0
-; LA64D-FRECIPE-NEXT: lu32i.d $a3, 327680
-; LA64D-FRECIPE-NEXT: lu52i.d $a3, $a3, 1028
-; LA64D-FRECIPE-NEXT: movgr2fr.d $fa2, $a3
-; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: lu32i.d $a2, 360448
-; LA64D-FRECIPE-NEXT: lu52i.d $a2, $a2, 1028
-; LA64D-FRECIPE-NEXT: movgr2fr.d $fa3, $a2
-; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3
+; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3
+; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fst.d $fa2, $a0, 0
; LA64D-FRECIPE-NEXT: fst.d $fa1, $a1, 0
; LA64D-FRECIPE-NEXT: ret
@@ -500,17 +488,13 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2
;
; LA64D-LABEL: sqrt_simplify_before_recip_4_uses_f64:
; LA64D: # %bb.0:
+; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
+; LA64D-NEXT: fld.d $fa2, $a3, %pc_lo12(.LCPI4_0)
+; LA64D-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1)
+; LA64D-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_1)
; LA64D-NEXT: fsqrt.d $fa1, $fa0
; LA64D-NEXT: frsqrt.d $fa0, $fa0
-; LA64D-NEXT: ori $a3, $zero, 0
-; LA64D-NEXT: ori $a4, $zero, 0
-; LA64D-NEXT: lu32i.d $a4, 327680
-; LA64D-NEXT: lu52i.d $a4, $a4, 1028
-; LA64D-NEXT: movgr2fr.d $fa2, $a4
; LA64D-NEXT: fdiv.d $fa2, $fa2, $fa1
-; LA64D-NEXT: lu32i.d $a3, 360448
-; LA64D-NEXT: lu52i.d $a3, $a3, 1028
-; LA64D-NEXT: movgr2fr.d $fa3, $a3
; LA64D-NEXT: fdiv.d $fa3, $fa3, $fa1
; LA64D-NEXT: fst.d $fa0, $a0, 0
; LA64D-NEXT: fst.d $fa2, $a1, 0
@@ -530,18 +514,14 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
+; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
+; LA64D-FRECIPE-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_0)
+; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1)
+; LA64D-FRECIPE-NEXT: fld.d $fa5, $a3, %pc_lo12(.LCPI4_1)
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4
; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: ori $a3, $zero, 0
-; LA64D-FRECIPE-NEXT: ori $a4, $zero, 0
-; LA64D-FRECIPE-NEXT: lu32i.d $a4, 327680
-; LA64D-FRECIPE-NEXT: lu52i.d $a4, $a4, 1028
-; LA64D-FRECIPE-NEXT: movgr2fr.d $fa2, $a4
-; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa2
-; LA64D-FRECIPE-NEXT: lu32i.d $a3, 360448
-; LA64D-FRECIPE-NEXT: lu52i.d $a3, $a3, 1028
-; LA64D-FRECIPE-NEXT: movgr2fr.d $fa3, $a3
-; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa3
+; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3
+; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa5
; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0
; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index a59d4ffcc4e33..16c9e754fb94d 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -519,8 +519,8 @@ define void @test_d4(ptr %P, ptr %S) nounwind {
;
; LA32D-LABEL: test_d4:
; LA32D: # %bb.0:
-; LA32D-NEXT: fld.d $fa0, $a0, 24
-; LA32D-NEXT: fld.d $fa1, $a0, 16
+; LA32D-NEXT: fld.d $fa0, $a0, 16
+; LA32D-NEXT: fld.d $fa1, $a0, 24
; LA32D-NEXT: fld.d $fa2, $a0, 8
; LA32D-NEXT: fld.d $fa3, $a0, 0
; LA32D-NEXT: movgr2fr.w $fa4, $zero
@@ -532,15 +532,14 @@ define void @test_d4(ptr %P, ptr %S) nounwind {
; LA32D-NEXT: fmov.d $fa5, $fa4
; LA32D-NEXT: movgr2frh.w $fa5, $a0
; LA32D-NEXT: fadd.d $fa2, $fa2, $fa5
-; LA32D-NEXT: lu12i.w $a0, 262272
-; LA32D-NEXT: fmov.d $fa5, $fa4
-; LA32D-NEXT: movgr2frh.w $fa5, $a0
-; LA32D-NEXT: fadd.d $fa1, $fa1, $fa5
+; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
+; LA32D-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_0)
; LA32D-NEXT: lu12i.w $a0, 262400
; LA32D-NEXT: movgr2frh.w $fa4, $a0
-; LA32D-NEXT: fadd.d $fa0, $fa0, $fa4
-; LA32D-NEXT: fst.d $fa0, $a1, 24
-; LA32D-NEXT: fst.d $fa1, $a1, 16
+; LA32D-NEXT: fadd.d $fa1, $fa1, $fa4
+; LA32D-NEXT: fadd.d $fa0, $fa0, $fa5
+; LA32D-NEXT: fst.d $fa0, $a1, 16
+; LA32D-NEXT: fst.d $fa1, $a1, 24
; LA32D-NEXT: fst.d $fa2, $a1, 8
; LA32D-NEXT: fst.d $fa3, $a1, 0
; LA32D-NEXT: ret
@@ -741,12 +740,12 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
;
; LA32D-LABEL: test_d8:
; LA32D: # %bb.0:
-; LA32D-NEXT: fld.d $fa0, $a0, 56
-; LA32D-NEXT: fld.d $fa1, $a0, 48
-; LA32D-NEXT: fld.d $fa2, $a0, 40
-; LA32D-NEXT: fld.d $fa3, $a0, 32
-; LA32D-NEXT: fld.d $fa4, $a0, 24
-; LA32D-NEXT: fld.d $fa5, $a0, 16
+; LA32D-NEXT: fld.d $fa0, $a0, 48
+; LA32D-NEXT: fld.d $fa1, $a0, 16
+; LA32D-NEXT: fld.d $fa2, $a0, 56
+; LA32D-NEXT: fld.d $fa3, $a0, 40
+; LA32D-NEXT: fld.d $fa4, $a0, 32
+; LA32D-NEXT: fld.d $fa5, $a0, 24
; LA32D-NEXT: fld.d $fa6, $a0, 8
; LA32D-NEXT: fld.d $fa7, $a0, 0
; LA32D-NEXT: movgr2fr.w $ft0, $zero
@@ -758,23 +757,22 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
; LA32D-NEXT: fmov.d $ft2, $ft0
; LA32D-NEXT: movgr2frh.w $ft2, $a0
; LA32D-NEXT: fadd.d $fa6, $fa6, $ft2
-; LA32D-NEXT: lu12i.w $a0, 262272
-; LA32D-NEXT: fmov.d $ft3, $ft0
-; LA32D-NEXT: movgr2frh.w $ft3, $a0
-; LA32D-NEXT: fadd.d $fa5, $fa5, $ft3
; LA32D-NEXT: lu12i.w $a0, 262400
; LA32D-NEXT: movgr2frh.w $ft0, $a0
-; LA32D-NEXT: fadd.d $fa4, $fa4, $ft0
-; LA32D-NEXT: fadd.d $fa3, $fa3, $ft1
-; LA32D-NEXT: fadd.d $fa2, $fa2, $ft2
-; LA32D-NEXT: fadd.d $fa1, $fa1, $ft3
-; LA32D-NEXT: fadd.d $fa0, $fa0, $ft0
-; LA32D-NEXT: fst.d $fa0, $a1, 56
-; LA32D-NEXT: fst.d $fa1, $a1, 48
-; LA32D-NEXT: fst.d $fa2, $a1, 40
-; LA32D-NEXT: fst.d $fa3, $a1, 32
-; LA32D-NEXT: fst.d $fa4, $a1, 24
-; LA32D-NEXT: fst.d $fa5, $a1, 16
+; LA32D-NEXT: fadd.d $fa4, $fa4, $ft1
+; LA32D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
+; LA32D-NEXT: fld.d $ft1, $a0, %pc_lo12(.LCPI6_0)
+; LA32D-NEXT: fadd.d $fa5, $fa5, $ft0
+; LA32D-NEXT: fadd.d $fa3, $fa3, $ft2
+; LA32D-NEXT: fadd.d $fa2, $fa2, $ft0
+; LA32D-NEXT: fadd.d $fa1, $fa1, $ft1
+; LA32D-NEXT: fadd.d $fa0, $fa0, $ft1
+; LA32D-NEXT: fst.d $fa0, $a1, 48
+; LA32D-NEXT: fst.d $fa1, $a1, 16
+; LA32D-NEXT: fst.d $fa2, $a1, 56
+; LA32D-NEXT: fst.d $fa3, $a1, 40
+; LA32D-NEXT: fst.d $fa4, $a1, 32
+; LA32D-NEXT: fst.d $fa5, $a1, 24
; LA32D-NEXT: fst.d $fa6, $a1, 8
; LA32D-NEXT: fst.d $fa7, $a1, 0
; LA32D-NEXT: ret
More information about the llvm-commits
mailing list