[llvm] [GlobalIsel] Canonicalize G_FCMP (PR #108891)

Mon Sep 16 14:59:59 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Thorsten Schütt (tschuett)

<details>
<summary>Changes</summary>

As a side-effect, we start constant folding fcmps.

---

Patch is 64.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108891.diff


11 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3) 
- (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+39) 
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-2) 
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp (+59) 
- (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+40) 
- (added) llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir (+119) 
- (modified) llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll (+66-66) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log.ll (+27-41) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log10.ll (+27-41) 
- (modified) llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll (+1-1) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 37c9422d192754..3261b26e74cd71 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -911,6 +911,7 @@ class CombinerHelper {
                               const MachineInstr &BVMI, BuildFnTy &MatchInfo);
 
   bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
 
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
@@ -1029,6 +1030,8 @@ class CombinerHelper {
 
   bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst,
                         const GIConstant &RHSCst, BuildFnTy &MatchInfo);
+  bool constantFoldFCmp(const GFCmp &FCmp, const GFConstant &LHSCst,
+                        const GFConstant &RHSCst, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 76e0954357a5d7..95a8234d3c6080 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -632,5 +632,44 @@ class GIConstant {
                                                const MachineRegisterInfo &MRI);
 };
 
+/// An floating-point-like constant.
+///
+/// It abstracts over scalar, fixed-length vectors, and scalable vectors.
+/// In the common case, it provides a common API and feels like an APFloat,
+/// while still providing low-level access.
+/// It can be used for constant-folding.
+///
+/// bool isZero()
+/// abstracts over the kind.
+///
+/// switch(const.getKind())
+/// {
+/// }
+/// provides low-level access.
+class GFConstant {
+public:
+  enum class GFConstantKind { Scalar, FixedVector, ScalableVector };
+
+private:
+  GFConstantKind Kind;
+  SmallVector<APFloat> Values;
+
+public:
+  GFConstant(ArrayRef<APFloat> Values)
+      : Kind(GFConstantKind::FixedVector), Values(Values) {};
+  GFConstant(const APFloat &Value, GFConstantKind Kind) : Kind(Kind) {
+    Values.push_back(Value);
+  }
+
+  /// Returns the kind of of this constant, e.g, Scalar.
+  GFConstantKind getKind() const { return Kind; }
+
+  /// Returns the value, if this constant is a scalar.
+  APFloat getScalarValue() const;
+
+  static std::optional<GFConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
 } // End namespace llvm.
 #endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index c66212d2ab12c8..e75cf0b7d4afc1 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1921,8 +1921,15 @@ def canonicalize_icmp : GICombineRule<
          [{ return Helper.matchCanonicalizeICmp(*${cmp}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
 
-def icmp_combines: GICombineGroup<[
+def canonicalize_fcmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_FCMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchCanonicalizeFCmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def cmp_combines: GICombineGroup<[
   canonicalize_icmp,
+  canonicalize_fcmp,
   icmp_to_true_false_known_bits,
   icmp_to_lhs_known_bits,
   double_icmp_zero_and_combine,
@@ -1995,7 +2002,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     combine_extracted_vector_load,
     undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
-    reassocs, ptr_add_immed_chain, icmp_combines,
+    reassocs, ptr_add_immed_chain, cmp_combines,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,
     known_bits_simplifications,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index 025cd2dc9f87f1..7a4cfd4b1a7bb5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -60,6 +60,37 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
   return true;
 }
 
+bool CombinerHelper::constantFoldFCmp(const GFCmp &FCmp,
+                                      const GFConstant &LHSCst,
+                                      const GFConstant &RHSCst,
+                                      BuildFnTy &MatchInfo) {
+  if (LHSCst.getKind() != GFConstant::GFConstantKind::Scalar)
+    return false;
+
+  Register Dst = FCmp.getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  CmpInst::Predicate Pred = FCmp.getCond();
+  APFloat LHS = LHSCst.getScalarValue();
+  APFloat RHS = RHSCst.getScalarValue();
+
+  bool Result = FCmpInst::compare(LHS, RHS, Pred);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/true));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
+
 bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
                                            BuildFnTy &MatchInfo) {
   const GICmp *Cmp = cast<GICmp>(&MI);
@@ -84,3 +115,31 @@ bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
 
   return false;
 }
+
+bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI,
+                                           BuildFnTy &MatchInfo) {
+  const GFCmp *Cmp = cast<GFCmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+  if (auto CLHS = GFConstant::getConstant(LHS, MRI)) {
+    if (auto CRHS = GFConstant::getConstant(RHS, MRI))
+      return constantFoldFCmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildFCmp(Pred, Dst, LHS, RHS, Cmp->getFlags());
+    };
+    return true;
+  }
+
+  return false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 15d3aa427d568d..9574464207d99f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -2008,3 +2008,43 @@ llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
 
   return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
 }
+
+APFloat llvm::GFConstant::getScalarValue() const {
+  assert(Kind == GFConstantKind::Scalar && "Expected scalar constant");
+
+  return Values[0];
+}
+
+std::optional<GFConstant>
+llvm::GFConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+  MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+  if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+    std::optional<FPValueAndVReg> MayBeConstant =
+        getFConstantVRegValWithLookThrough(Splat->getScalarReg(), MRI);
+    if (!MayBeConstant)
+      return std::nullopt;
+    return GFConstant(MayBeConstant->Value, GFConstantKind::ScalableVector);
+  }
+
+  if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+    SmallVector<APFloat> Values;
+    unsigned NumSources = Build->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I) {
+      Register SrcReg = Build->getSourceReg(I);
+      std::optional<FPValueAndVReg> MayBeConstant =
+          getFConstantVRegValWithLookThrough(SrcReg, MRI);
+      if (!MayBeConstant)
+        return std::nullopt;
+      Values.push_back(MayBeConstant->Value);
+    }
+    return GFConstant(Values);
+  }
+
+  std::optional<FPValueAndVReg> MayBeConstant =
+      getFConstantVRegValWithLookThrough(Const, MRI);
+  if (!MayBeConstant)
+    return std::nullopt;
+
+  return GFConstant(MayBeConstant->Value, GFConstantKind::Scalar);
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir
new file mode 100644
index 00000000000000..94204611095db0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name:            test_fcmp_canon
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_canon
+    ; CHECK: %lhs:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %rhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %res:_(s32) = afn G_FCMP floatpred(ole), %rhs(s64), %lhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_FCONSTANT double 1.0
+    %rhs:_(s64) = COPY $x0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_fcmp_no_canon
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_no_canon
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = G_FCONSTANT double 1.0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_fcmp_no_canon_bv
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_no_canon_bv
+    ; CHECK: %opaque1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %const1:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %const2:_(s64) = G_FCONSTANT double 2.000000e+00
+    ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+    ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+    ; CHECK-NEXT: %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+    ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+    %opaque1:_(s64) = COPY $x0
+    %opaque2:_(s64) = COPY $x0
+    %const1:_(s64) = G_FCONSTANT double 1.0
+    %const2:_(s64) = G_FCONSTANT double 2.0
+    %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+    %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+    %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+    $x0 = COPY %res(<2 x s32>)
+...
+---
+name:            test_fcmp_canon_bv
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_canon_bv
+    ; CHECK: %opaque1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %const1:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %const2:_(s64) = G_FCONSTANT double 2.000000e+00
+    ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+    ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+    ; CHECK-NEXT: %res:_(<2 x s32>) = afn G_FCMP floatpred(ole), %rhs(<2 x s64>), %lhs
+    ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+    %opaque1:_(s64) = COPY $x0
+    %opaque2:_(s64) = COPY $x0
+    %const1:_(s64) = G_FCONSTANT double 1.0
+    %const2:_(s64) = G_FCONSTANT double 2.0
+    %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+    %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+    %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+    $x0 = COPY %res(<2 x s32>)
+...
+---
+name:            test_fcmp_canon_splat
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_canon_splat
+    ; CHECK: %const:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const(s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
+    ; CHECK-NEXT: %res:_(<vscale x 2 x s32>) = afn G_FCMP floatpred(ole), %rhs(<vscale x 2 x s64>), %lhs
+    ; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_ZEXT %res(<vscale x 2 x s32>)
+    ; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
+    %const:_(s64) = G_FCONSTANT double 1.0
+    %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const:_(s64)
+    %1:_(s64) = COPY $x1
+    %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1:_(s64)
+    %res:_(<vscale x 2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<vscale x 2 x s64>), %rhs
+    %z:_(<vscale x 2 x s64>) = G_ZEXT  %res
+    $z0 = COPY %z(<vscale x 2 x s64>)
+...
+---
+name:            test_fcmp_const
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_const
+    ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_FCONSTANT double 1.0
+    %rhs:_(s64) = G_FCONSTANT double 2.0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_fcmp_const_other
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_const_other
+    ; CHECK: %res:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_FCONSTANT double 2.0
+    %rhs:_(s64) = G_FCONSTANT double 1.0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index 5ae989603b31ab..2140f50611d711 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -342,7 +342,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -410,7 +410,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -479,7 +479,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -549,7 +549,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -607,7 +607,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -647,7 +647,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -687,7 +687,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -728,7 +728,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -831,7 +831,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -871,7 +871,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -911,7 +911,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -952,7 +952,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/108891