[llvm-branch-commits] [llvm] [LoongArch] Convert ld to fld when result is only used by sitofp (PR #165523)
Zhaoxin Yang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Oct 29 00:53:52 PDT 2025
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/165523
If the result of an integer load is only used by an integer-to-float conversion, use a fp load instead. This eliminates an integer-to-float-move (movgr2fr) instruction.
>From 25fc7d1d06a7b6b6a9d19ed82586094f58f8c527 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Wed, 29 Oct 2025 15:19:05 +0800
Subject: [PATCH] [LoongArch] Convert ld to fld when result is only used by
sitofp
If the result of an integer load is only used by an integer-to-float
conversion, use a fp load instead. This eliminates an
integer-to-float-move (movgr2fr) instruction.
---
.../LoongArch/LoongArchFloat32InstrInfo.td | 5 +++
.../LoongArch/LoongArchFloat64InstrInfo.td | 3 ++
.../LoongArch/LoongArchISelLowering.cpp | 45 +++++++++++++++++++
.../Target/LoongArch/LoongArchISelLowering.h | 1 +
.../CodeGen/LoongArch/load-itofp-combine.ll | 31 ++++---------
5 files changed, 62 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73014e57..cb6b7c7342ec6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
// comparisons to prevent recursive lowering.
@@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
+def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>;
//===----------------------------------------------------------------------===//
// Instructions
@@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))),
// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
(FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// ffint.s.w
+def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>;
} // Predicates = [HasBasicF]
let Predicates = [HasBasicF, IsLA64] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa52d42a..e973c80af807c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))),
// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
(FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// ffint.d.l
+def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>;
} // Predicates = [HasBasicD]
/// Floating point constants
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..3695c5a42790f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
}
// Set DAG combine for LA32 and LA64.
+ if (Subtarget.hasBasicF()) {
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ }
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+ if (VT == MVT::f32 && !Subtarget.hasBasicF())
+ return SDValue();
+ if (VT == MVT::f64 && !Subtarget.hasBasicD())
+ return SDValue();
+
+ // Only optimize when the source and destination types have the same width.
+ if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ // If the result of an integer load is only used by an integer-to-float
+ // conversion, use a fp load instead. This eliminates an integer-to-float-move
+ // (movgr2fr) instruction.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
+ // Do not change the width of a volatile load. This condition check is
+ // inspired by AArch64.
+ !cast<LoadSDNode>(Src)->isVolatile()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(Src);
+ SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), LN0->getAlign(),
+ LN0->getMemOperand()->getFlags());
+
+ // Make sure successors of the original load stay after it by updating them
+ // to use the new Chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
+ return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6760,6 +6802,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_VECTOR_ELT:
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
+ case ISD::SINT_TO_FP:
+ return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
@@ -7491,6 +7535,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
NODE_NAME_CASE(MOVFR2GR_S_LA64)
NODE_NAME_CASE(FTINT)
+ NODE_NAME_CASE(SITOF)
NODE_NAME_CASE(BUILD_PAIR_F64)
NODE_NAME_CASE(SPLIT_PAIR_F64)
NODE_NAME_CASE(REVB_2H)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c7..e61a77a4b9d9b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -66,6 +66,7 @@ enum NodeType : unsigned {
MOVGR2FCSR,
FTINT,
+ SITOF,
// Build and split F64 pair
BUILD_PAIR_F64,
diff --git a/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll b/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll
index f9b14be99b1ef..195008679dc95 100644
--- a/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/load-itofp-combine.ll
@@ -7,33 +7,25 @@
define float @load_sitofp_f32(ptr %src) nounwind {
; LA32F-LABEL: load_sitofp_f32:
; LA32F: # %bb.0:
-; LA32F-NEXT: ld.w $a0, $a0, 0
-; LA32F-NEXT: movgr2fr.w $fa0, $a0
+; LA32F-NEXT: fld.s $fa0, $a0, 0
; LA32F-NEXT: ffint.s.w $fa0, $fa0
; LA32F-NEXT: ret
;
; LA32D-LABEL: load_sitofp_f32:
; LA32D: # %bb.0:
-; LA32D-NEXT: ld.w $a0, $a0, 0
-; LA32D-NEXT: movgr2fr.w $fa0, $a0
+; LA32D-NEXT: fld.s $fa0, $a0, 0
; LA32D-NEXT: ffint.s.w $fa0, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: load_sitofp_f32:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -16
-; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; LA64F-NEXT: ld.w $a0, $a0, 0
-; LA64F-NEXT: pcaddu18i $ra, %call36(__floatdisf)
-; LA64F-NEXT: jirl $ra, $ra, 0
-; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 16
+; LA64F-NEXT: fld.s $fa0, $a0, 0
+; LA64F-NEXT: ffint.s.w $fa0, $fa0
; LA64F-NEXT: ret
;
; LA64D-LABEL: load_sitofp_f32:
; LA64D: # %bb.0:
-; LA64D-NEXT: ld.w $a0, $a0, 0
-; LA64D-NEXT: movgr2fr.w $fa0, $a0
+; LA64D-NEXT: fld.s $fa0, $a0, 0
; LA64D-NEXT: ffint.s.w $fa0, $fa0
; LA64D-NEXT: ret
%1 = load i32, ptr %src
@@ -56,14 +48,8 @@ define double @load_sitofp_f64(ptr %src) nounwind {
;
; LA32D-LABEL: load_sitofp_f64:
; LA32D: # %bb.0:
-; LA32D-NEXT: addi.w $sp, $sp, -16
-; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32D-NEXT: ld.w $a2, $a0, 0
-; LA32D-NEXT: ld.w $a1, $a0, 4
-; LA32D-NEXT: move $a0, $a2
-; LA32D-NEXT: bl __floatdidf
-; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32D-NEXT: addi.w $sp, $sp, 16
+; LA32D-NEXT: fld.d $fa0, $a0, 0
+; LA32D-NEXT: ffint.d.l $fa0, $fa0
; LA32D-NEXT: ret
;
; LA64F-LABEL: load_sitofp_f64:
@@ -79,8 +65,7 @@ define double @load_sitofp_f64(ptr %src) nounwind {
;
; LA64D-LABEL: load_sitofp_f64:
; LA64D: # %bb.0:
-; LA64D-NEXT: ld.d $a0, $a0, 0
-; LA64D-NEXT: movgr2fr.d $fa0, $a0
+; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: ffint.d.l $fa0, $fa0
; LA64D-NEXT: ret
%1 = load i64, ptr %src
More information about the llvm-branch-commits
mailing list