[llvm-branch-commits] [llvm] release/22.x: [LoongArch] Fix incorrect reciprocal sqrt estimate semantics (#187621) (PR #188672)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 25 21:06:49 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: llvmbot
<details>
<summary>Changes</summary>
Backport 6ae5803ffdebf1486d9a1987b818a231444cfd8d
Requested by: @<!-- -->wangleiat
---
Full diff: https://github.com/llvm/llvm-project/pull/188672.diff
4 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+67-31)
- (modified) llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll (-12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll (-4)
- (modified) llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll (-2)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 755e2afa2bf04..9c9e9772f0434 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -9261,57 +9261,93 @@ static int getEstimateRefinementSteps(EVT VT,
return RefinementSteps;
}
+static bool
+isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget) {
+ assert(Subtarget.hasFrecipe() &&
+ "Reciprocal estimate queried on unsupported target");
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ // f32 is the base type for reciprocal estimate instructions.
+ return true;
+
+ case MVT::f64:
+ return Subtarget.hasBasicD();
+
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return Subtarget.hasExtLSX();
+
+ case MVT::v8f32:
+ case MVT::v4f64:
+ return Subtarget.hasExtLASX();
+
+ default:
+ return false;
+ }
+}
+
SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
- if (Subtarget.hasFrecipe()) {
- SDLoc DL(Operand);
- EVT VT = Operand.getValueType();
+ assert(Enabled != ReciprocalEstimate::Disabled &&
+ "Enabled should never be Disabled here");
+
+ if (!Subtarget.hasFrecipe())
+ return SDValue();
- if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
- (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
- (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
- (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
- (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
+ SDLoc DL(Operand);
+ EVT VT = Operand.getValueType();
- if (RefinementSteps == ReciprocalEstimate::Unspecified)
- RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+ // Check supported types.
+ if (!isSupportedReciprocalEstimateType(VT, Subtarget))
+ return SDValue();
- SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
- if (Reciprocal)
- Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
+ // Handle refinement steps.
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- return Estimate;
- }
- }
+ // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
+ UseOneConstNR = false;
+ SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
- return SDValue();
+ // If the caller wants 1.0 / sqrt(x), or if further refinement steps
+ // are needed (which rely on the reciprocal form), return the raw reciprocal
+ // estimate.
+ if (Reciprocal || RefinementSteps > 0)
+ return Rsqrt;
+
+ // Otherwise, return sqrt(x) by multiplying with the operand.
+ return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
}
SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
- if (Subtarget.hasFrecipe()) {
- SDLoc DL(Operand);
- EVT VT = Operand.getValueType();
+ assert(Enabled != ReciprocalEstimate::Disabled &&
+ "Enabled should never be Disabled here");
- if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
- (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
- (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
- (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
- (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
+ if (!Subtarget.hasFrecipe())
+ return SDValue();
- if (RefinementSteps == ReciprocalEstimate::Unspecified)
- RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+ SDLoc DL(Operand);
+ EVT VT = Operand.getValueType();
- return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
- }
- }
+ // Check supported types.
+ if (!isSupportedReciprocalEstimateType(VT, Subtarget))
+ return SDValue();
- return SDValue();
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+
+ // FRECIPE computes 1.0 / x.
+ return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
index d875bb98e4593..df0387b87fe13 100644
--- a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll
@@ -17,7 +17,6 @@ define float @frsqrt_f32(float %a) nounwind {
; LA32F-FRECIPE-LABEL: frsqrt_f32:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
; LA32F-FRECIPE-NEXT: lu12i.w $a0, -261120
; LA32F-FRECIPE-NEXT: movgr2fr.w $fa2, $a0
@@ -36,7 +35,6 @@ define float @frsqrt_f32(float %a) nounwind {
; LA64D-FRECIPE-LABEL: frsqrt_f32:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr2, -1144
; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2
@@ -87,7 +85,6 @@ define double @frsqrt_f64(double %a) nounwind {
; LA64D-FRECIPE-LABEL: frsqrt_f64:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -888
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
@@ -206,7 +203,6 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -888
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
@@ -337,7 +333,6 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -888
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
@@ -505,7 +500,6 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -888
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3
@@ -554,7 +548,6 @@ define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2)
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA32F-FRECIPE-NEXT: lu12i.w $a2, -261120
; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
@@ -586,7 +579,6 @@ define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2)
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
@@ -629,7 +621,6 @@ define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2,
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA32F-FRECIPE-NEXT: lu12i.w $a3, -261120
; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a3
@@ -669,7 +660,6 @@ define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2,
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
@@ -715,7 +705,6 @@ define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr
; LA32F-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA32F-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA32F-FRECIPE-NEXT: lu12i.w $a2, -261120
; LA32F-FRECIPE-NEXT: movgr2fr.w $fa3, $a2
@@ -751,7 +740,6 @@ define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr
; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0
-; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1
; LA64D-FRECIPE-NEXT: vldi $vr3, -1144
; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
index e696129acb862..835ccde211a20 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
@@ -61,7 +61,6 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
; LA32-NEXT: st.w $a1, $sp, 32
; LA32-NEXT: xvld $xr0, $sp, 32
; LA32-NEXT: xvfrsqrte.s $xr1, $xr0
-; LA32-NEXT: xvfmul.s $xr1, $xr0, $xr1
; LA32-NEXT: xvfmul.s $xr0, $xr0, $xr1
; LA32-NEXT: xvldi $xr2, -1400
; LA32-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
@@ -96,7 +95,6 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: xvld $xr0, $a1, 0
; LA64-NEXT: xvfrsqrte.s $xr1, $xr0
-; LA64-NEXT: xvfmul.s $xr1, $xr0, $xr1
; LA64-NEXT: xvfmul.s $xr0, $xr0, $xr1
; LA64-NEXT: xvldi $xr2, -1400
; LA64-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
@@ -171,7 +169,6 @@ define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind {
; LA32-NEXT: st.w $a1, $sp, 32
; LA32-NEXT: xvld $xr0, $sp, 32
; LA32-NEXT: xvfrsqrte.d $xr1, $xr0
-; LA32-NEXT: xvfmul.d $xr1, $xr0, $xr1
; LA32-NEXT: xvfmul.d $xr2, $xr0, $xr1
; LA32-NEXT: xvldi $xr3, -888
; LA32-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3
@@ -210,7 +207,6 @@ define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: xvld $xr0, $a1, 0
; LA64-NEXT: xvfrsqrte.d $xr1, $xr0
-; LA64-NEXT: xvfmul.d $xr1, $xr0, $xr1
; LA64-NEXT: xvfmul.d $xr2, $xr0, $xr1
; LA64-NEXT: xvldi $xr3, -888
; LA64-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll
index 4951696e05a94..ca3d2bc8b671f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll
@@ -17,7 +17,6 @@ define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vfrsqrte.s $vr1, $vr0
-; CHECK-NEXT: vfmul.s $vr1, $vr0, $vr1
; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1
; CHECK-NEXT: vldi $vr2, -1400
; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2
@@ -48,7 +47,6 @@ define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vfrsqrte.d $vr1, $vr0
-; CHECK-NEXT: vfmul.d $vr1, $vr0, $vr1
; CHECK-NEXT: vfmul.d $vr2, $vr0, $vr1
; CHECK-NEXT: vldi $vr3, -888
; CHECK-NEXT: vfmadd.d $vr2, $vr2, $vr1, $vr3
``````````
</details>
https://github.com/llvm/llvm-project/pull/188672
More information about the llvm-branch-commits
mailing list