[llvm] acfa294 - [GlobalIsel] Canonicalize G_FCMP (#108891)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 00:42:08 PDT 2024


Author: Thorsten Schütt
Date: 2024-09-17T09:42:04+02:00
New Revision: acfa294b5e2b10dfe9b9fc075259d849a3897493

URL: https://github.com/llvm/llvm-project/commit/acfa294b5e2b10dfe9b9fc075259d849a3897493
DIFF: https://github.com/llvm/llvm-project/commit/acfa294b5e2b10dfe9b9fc075259d849a3897493.diff

LOG: [GlobalIsel] Canonicalize G_FCMP (#108891)

As a side-effect, we start constant folding fcmps.

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/CodeGen/GlobalISel/Utils.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
    llvm/lib/CodeGen/GlobalISel/Utils.cpp
    llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
    llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
    llvm/test/CodeGen/AMDGPU/llvm.log.ll
    llvm/test/CodeGen/AMDGPU/llvm.log10.ll
    llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 37c9422d192754..3261b26e74cd71 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -911,6 +911,7 @@ class CombinerHelper {
                               const MachineInstr &BVMI, BuildFnTy &MatchInfo);
 
   bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
 
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
@@ -1029,6 +1030,8 @@ class CombinerHelper {
 
   bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst,
                         const GIConstant &RHSCst, BuildFnTy &MatchInfo);
+  bool constantFoldFCmp(const GFCmp &FCmp, const GFConstant &LHSCst,
+                        const GFConstant &RHSCst, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 

diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 76e0954357a5d7..95a8234d3c6080 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -632,5 +632,44 @@ class GIConstant {
                                                const MachineRegisterInfo &MRI);
 };
 
+/// An floating-point-like constant.
+///
+/// It abstracts over scalar, fixed-length vectors, and scalable vectors.
+/// In the common case, it provides a common API and feels like an APFloat,
+/// while still providing low-level access.
+/// It can be used for constant-folding.
+///
+/// bool isZero()
+/// abstracts over the kind.
+///
+/// switch(const.getKind())
+/// {
+/// }
+/// provides low-level access.
+class GFConstant {
+public:
+  enum class GFConstantKind { Scalar, FixedVector, ScalableVector };
+
+private:
+  GFConstantKind Kind;
+  SmallVector<APFloat> Values;
+
+public:
+  GFConstant(ArrayRef<APFloat> Values)
+      : Kind(GFConstantKind::FixedVector), Values(Values) {};
+  GFConstant(const APFloat &Value, GFConstantKind Kind) : Kind(Kind) {
+    Values.push_back(Value);
+  }
+
+  /// Returns the kind of of this constant, e.g, Scalar.
+  GFConstantKind getKind() const { return Kind; }
+
+  /// Returns the value, if this constant is a scalar.
+  APFloat getScalarValue() const;
+
+  static std::optional<GFConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
 } // End namespace llvm.
 #endif

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index c66212d2ab12c8..e75cf0b7d4afc1 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1921,8 +1921,15 @@ def canonicalize_icmp : GICombineRule<
          [{ return Helper.matchCanonicalizeICmp(*${cmp}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
 
-def icmp_combines: GICombineGroup<[
+def canonicalize_fcmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_FCMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchCanonicalizeFCmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def cmp_combines: GICombineGroup<[
   canonicalize_icmp,
+  canonicalize_fcmp,
   icmp_to_true_false_known_bits,
   icmp_to_lhs_known_bits,
   double_icmp_zero_and_combine,
@@ -1995,7 +2002,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     combine_extracted_vector_load,
     undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
-    reassocs, ptr_add_immed_chain, icmp_combines,
+    reassocs, ptr_add_immed_chain, cmp_combines,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,
     known_bits_simplifications,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index 025cd2dc9f87f1..7a4cfd4b1a7bb5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -60,6 +60,37 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
   return true;
 }
 
+bool CombinerHelper::constantFoldFCmp(const GFCmp &FCmp,
+                                      const GFConstant &LHSCst,
+                                      const GFConstant &RHSCst,
+                                      BuildFnTy &MatchInfo) {
+  if (LHSCst.getKind() != GFConstant::GFConstantKind::Scalar)
+    return false;
+
+  Register Dst = FCmp.getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  CmpInst::Predicate Pred = FCmp.getCond();
+  APFloat LHS = LHSCst.getScalarValue();
+  APFloat RHS = RHSCst.getScalarValue();
+
+  bool Result = FCmpInst::compare(LHS, RHS, Pred);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/true));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
+
 bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
                                            BuildFnTy &MatchInfo) {
   const GICmp *Cmp = cast<GICmp>(&MI);
@@ -84,3 +115,31 @@ bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
 
   return false;
 }
+
+bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI,
+                                           BuildFnTy &MatchInfo) {
+  const GFCmp *Cmp = cast<GFCmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+  if (auto CLHS = GFConstant::getConstant(LHS, MRI)) {
+    if (auto CRHS = GFConstant::getConstant(RHS, MRI))
+      return constantFoldFCmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildFCmp(Pred, Dst, LHS, RHS, Cmp->getFlags());
+    };
+    return true;
+  }
+
+  return false;
+}

diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 15d3aa427d568d..9574464207d99f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -2008,3 +2008,43 @@ llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
 
   return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
 }
+
+APFloat llvm::GFConstant::getScalarValue() const {
+  assert(Kind == GFConstantKind::Scalar && "Expected scalar constant");
+
+  return Values[0];
+}
+
+std::optional<GFConstant>
+llvm::GFConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+  MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+  if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+    std::optional<FPValueAndVReg> MayBeConstant =
+        getFConstantVRegValWithLookThrough(Splat->getScalarReg(), MRI);
+    if (!MayBeConstant)
+      return std::nullopt;
+    return GFConstant(MayBeConstant->Value, GFConstantKind::ScalableVector);
+  }
+
+  if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+    SmallVector<APFloat> Values;
+    unsigned NumSources = Build->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I) {
+      Register SrcReg = Build->getSourceReg(I);
+      std::optional<FPValueAndVReg> MayBeConstant =
+          getFConstantVRegValWithLookThrough(SrcReg, MRI);
+      if (!MayBeConstant)
+        return std::nullopt;
+      Values.push_back(MayBeConstant->Value);
+    }
+    return GFConstant(Values);
+  }
+
+  std::optional<FPValueAndVReg> MayBeConstant =
+      getFConstantVRegValWithLookThrough(Const, MRI);
+  if (!MayBeConstant)
+    return std::nullopt;
+
+  return GFConstant(MayBeConstant->Value, GFConstantKind::Scalar);
+}

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir
new file mode 100644
index 00000000000000..94204611095db0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cannonicalize-fcmp.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name:            test_fcmp_canon
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_canon
+    ; CHECK: %lhs:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %rhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %res:_(s32) = afn G_FCMP floatpred(ole), %rhs(s64), %lhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_FCONSTANT double 1.0
+    %rhs:_(s64) = COPY $x0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_fcmp_no_canon
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_no_canon
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = G_FCONSTANT double 1.0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_fcmp_no_canon_bv
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_no_canon_bv
+    ; CHECK: %opaque1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %const1:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %const2:_(s64) = G_FCONSTANT double 2.000000e+00
+    ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+    ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+    ; CHECK-NEXT: %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+    ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+    %opaque1:_(s64) = COPY $x0
+    %opaque2:_(s64) = COPY $x0
+    %const1:_(s64) = G_FCONSTANT double 1.0
+    %const2:_(s64) = G_FCONSTANT double 2.0
+    %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+    %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+    %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+    $x0 = COPY %res(<2 x s32>)
+...
+---
+name:            test_fcmp_canon_bv
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_canon_bv
+    ; CHECK: %opaque1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %const1:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %const2:_(s64) = G_FCONSTANT double 2.000000e+00
+    ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+    ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+    ; CHECK-NEXT: %res:_(<2 x s32>) = afn G_FCMP floatpred(ole), %rhs(<2 x s64>), %lhs
+    ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+    %opaque1:_(s64) = COPY $x0
+    %opaque2:_(s64) = COPY $x0
+    %const1:_(s64) = G_FCONSTANT double 1.0
+    %const2:_(s64) = G_FCONSTANT double 2.0
+    %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+    %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+    %res:_(<2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<2 x s64>), %rhs
+    $x0 = COPY %res(<2 x s32>)
+...
+---
+name:            test_fcmp_canon_splat
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_canon_splat
+    ; CHECK: %const:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; CHECK-NEXT: %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const(s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
+    ; CHECK-NEXT: %res:_(<vscale x 2 x s32>) = afn G_FCMP floatpred(ole), %rhs(<vscale x 2 x s64>), %lhs
+    ; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_ZEXT %res(<vscale x 2 x s32>)
+    ; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
+    %const:_(s64) = G_FCONSTANT double 1.0
+    %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const:_(s64)
+    %1:_(s64) = COPY $x1
+    %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1:_(s64)
+    %res:_(<vscale x 2 x s32>) = afn G_FCMP floatpred(oge), %lhs(<vscale x 2 x s64>), %rhs
+    %z:_(<vscale x 2 x s64>) = G_ZEXT  %res
+    $z0 = COPY %z(<vscale x 2 x s64>)
+...
+---
+name:            test_fcmp_const
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_const
+    ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_FCONSTANT double 1.0
+    %rhs:_(s64) = G_FCONSTANT double 2.0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_fcmp_const_other
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_fcmp_const_other
+    ; CHECK: %res:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_FCONSTANT double 2.0
+    %rhs:_(s64) = G_FCONSTANT double 1.0
+    %res:_(s32) = afn G_FCMP floatpred(oge), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index 5ae989603b31ab..2140f50611d711 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -342,7 +342,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -410,7 +410,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -479,7 +479,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -549,7 +549,7 @@ define float @v_fdiv_recip_sqrt_f32(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -607,7 +607,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -647,7 +647,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -687,7 +687,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -728,7 +728,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -831,7 +831,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -871,7 +871,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -911,7 +911,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -952,7 +952,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_fdiv_only(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1016,7 +1016,7 @@ define float @v_fdiv_recip_sqrt_f32_afn_fdiv_only(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1064,7 +1064,7 @@ define float @v_fdiv_recip_sqrt_f32_afn_fdiv_only(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1111,7 +1111,7 @@ define float @v_fdiv_recip_sqrt_f32_afn_fdiv_only(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1157,7 +1157,7 @@ define float @v_fdiv_recip_sqrt_f32_afn_fdiv_only(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1191,7 +1191,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only(float %x) {
 ; CODEGEN-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CODEGEN-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1221,7 +1221,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1251,7 +1251,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only(float %x) {
 ; CODEGEN-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; CODEGEN-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; CODEGEN-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CODEGEN-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; CODEGEN-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1280,7 +1280,7 @@ define float @v_fdiv_recip_sqrt_f32_arcp_afn_fdiv_only(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1676,7 +1676,7 @@ define float @v_recip_sqrt_f32_ulp25_contract(float %x) {
 ; IR-IEEE-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-IEEE-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-IEEE-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-IEEE-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-IEEE-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-IEEE-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-IEEE-GISEL-NEXT:    v_sqrt_f32_e32 v1, v0
 ; IR-IEEE-GISEL-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1751,7 +1751,7 @@ define float @v_recip_sqrt_f32_ulp25_contract(float %x) {
 ; IR-DAZ-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; IR-DAZ-GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; IR-DAZ-GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; IR-DAZ-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; IR-DAZ-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; IR-DAZ-GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; IR-DAZ-GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1

diff  --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index 8fd201038ad160..c6c145e090829c 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -34,7 +34,7 @@ define float @v_sqrt_f32(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -79,7 +79,7 @@ define float @v_sqrt_f32(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -129,7 +129,7 @@ define float @v_sqrt_f32_fneg(float %x) {
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e64 v2, -v0, v2
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e64 vcc, v1, -v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -175,7 +175,7 @@ define float @v_sqrt_f32_fneg(float %x) {
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e64 v2, -v0, v2
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, v1, -v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, -v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -227,7 +227,7 @@ define float @v_sqrt_f32_fabs(float %x) {
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e64 v2, |v0|, v2
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e64 vcc, v1, |v0|
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, |v0|, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -274,7 +274,7 @@ define float @v_sqrt_f32_fabs(float %x) {
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e64 v2, |v0|, v2
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, v1, |v0|
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, |v0|, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -326,7 +326,7 @@ define float @v_sqrt_f32_fneg_fabs(float %x) {
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e64 v2, -|v0|, v2
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e64 vcc, v1, -|v0|
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, -|v0|, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, -|v0|, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -373,7 +373,7 @@ define float @v_sqrt_f32_fneg_fabs(float %x) {
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e64 v2, -|v0|, v2
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 vcc, v1, -|v0|
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 vcc, -|v0|, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v0, -|v0|, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -424,7 +424,7 @@ define float @v_sqrt_f32_ninf(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -469,7 +469,7 @@ define float @v_sqrt_f32_ninf(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -518,7 +518,7 @@ define float @v_sqrt_f32_no_infs_attribute(float %x) #5 {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -563,7 +563,7 @@ define float @v_sqrt_f32_no_infs_attribute(float %x) #5 {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -612,7 +612,7 @@ define float @v_sqrt_f32_nnan(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -657,7 +657,7 @@ define float @v_sqrt_f32_nnan(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -705,12 +705,12 @@ define amdgpu_ps i32 @s_sqrt_f32(float inreg %x) {
 ;
 ; GISEL-IEEE-LABEL: s_sqrt_f32:
 ; GISEL-IEEE:       ; %bb.0:
-; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
+; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, s0
+; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, s0
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, s0, v2
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
 ; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
@@ -754,12 +754,12 @@ define amdgpu_ps i32 @s_sqrt_f32(float inreg %x) {
 ;
 ; GISEL-DAZ-LABEL: s_sqrt_f32:
 ; GISEL-DAZ:       ; %bb.0:
-; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
+; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, s0
+; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, s0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, s0, v2
-; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
+; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
@@ -809,12 +809,12 @@ define amdgpu_ps i32 @s_sqrt_f32_ninf(float inreg %x) {
 ;
 ; GISEL-IEEE-LABEL: s_sqrt_f32_ninf:
 ; GISEL-IEEE:       ; %bb.0:
-; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, 0xf800000
+; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, s0
+; GISEL-IEEE-NEXT:    v_mov_b32_e32 v0, s0
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, s0, v2
-; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
+; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[0:1], -1, v1
 ; GISEL-IEEE-NEXT:    v_fma_f32 v3, -v2, v1, v0
@@ -858,12 +858,12 @@ define amdgpu_ps i32 @s_sqrt_f32_ninf(float inreg %x) {
 ;
 ; GISEL-DAZ-LABEL: s_sqrt_f32_ninf:
 ; GISEL-DAZ:       ; %bb.0:
-; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, 0xf800000
+; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, s0
+; GISEL-DAZ-NEXT:    v_mov_b32_e32 v0, s0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, s0, v2
-; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
-; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v1
+; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v1, 0.5, v1
@@ -938,7 +938,7 @@ define float @v_sqrt_f32_nsz(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -983,7 +983,7 @@ define float @v_sqrt_f32_nsz(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1032,7 +1032,7 @@ define float @v_sqrt_f32_nnan_ninf(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1077,7 +1077,7 @@ define float @v_sqrt_f32_nnan_ninf(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1126,7 +1126,7 @@ define float @v_sqrt_f32_nnan_ninf_nsz(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1171,7 +1171,7 @@ define float @v_sqrt_f32_nnan_ninf_nsz(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1349,7 +1349,7 @@ define <2 x float> @v_sqrt_v2f32(<2 x float> %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v3
@@ -1363,7 +1363,7 @@ define <2 x float> @v_sqrt_v2f32(<2 x float> %x) {
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v1
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
@@ -1425,7 +1425,7 @@ define <2 x float> @v_sqrt_v2f32(<2 x float> %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v2, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v3, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, v0, v3
@@ -1438,7 +1438,7 @@ define <2 x float> @v_sqrt_v2f32(<2 x float> %x) {
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v1
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v2
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v2, v1
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v4, 0x260
@@ -1521,7 +1521,7 @@ define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v4, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
@@ -1535,7 +1535,7 @@ define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v1
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v1
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v6, v1
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, 0x260
@@ -1552,7 +1552,7 @@ define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v6, 0x37800000, v4
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v2
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v2
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v2, v3
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v2
 ; GISEL-IEEE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v5
@@ -1628,7 +1628,7 @@ define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v3, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v4, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, v0, v4
@@ -1641,7 +1641,7 @@ define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v3, v1
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v3
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v5, v1
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v6, 0x260
@@ -1655,7 +1655,7 @@ define <3 x float> @v_sqrt_v3f32(<3 x float> %x) {
 ; GISEL-DAZ-NEXT:    v_fma_f32 v7, -v4, v4, v1
 ; GISEL-DAZ-NEXT:    v_fma_f32 v4, v7, v5, v4
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v2
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e64 s[4:5], v3, v2
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e64 s[4:5], v2, v3
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e64 v2, v2, v7, s[4:5]
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v3, v2
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
@@ -1708,7 +1708,7 @@ define float @v_sqrt_f32_ulp05(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -1753,7 +1753,7 @@ define float @v_sqrt_f32_ulp05(float %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -1803,7 +1803,7 @@ define float @v_sqrt_f32_ulp1(float %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -2023,7 +2023,7 @@ define <2 x float> @v_sqrt_v2f32_ulp1(<2 x float> %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v3, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v3
@@ -2037,7 +2037,7 @@ define <2 x float> @v_sqrt_v2f32_ulp1(<2 x float> %x) {
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v3
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v1
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v2, v1
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v1, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v4, 0x260
@@ -2166,7 +2166,7 @@ define <2 x float> @v_sqrt_v2f32_ulp1_fabs(<2 x float> %x) {
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v2, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v3, 0x4f800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e64 v4, |v0|, v3
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e64 vcc, v2, |v0|
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v0, |v0|, v4, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v4, v0
 ; GISEL-IEEE-NEXT:    v_mul_f32_e64 v3, |v1|, v3
@@ -2180,7 +2180,7 @@ define <2 x float> @v_sqrt_v2f32_ulp1_fabs(<2 x float> %x) {
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e64 vcc, v2, |v1|
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, v2
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e64 v1, |v1|, v3, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v5, 0x260
@@ -3175,7 +3175,7 @@ define float @v_sqrt_f32_ninf_known_never_zero(float nofpclass(zero) %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -3220,7 +3220,7 @@ define float @v_sqrt_f32_ninf_known_never_zero(float nofpclass(zero) %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -3269,7 +3269,7 @@ define float @v_sqrt_f32_known_never_zero(float nofpclass(zero) %x) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -3314,7 +3314,7 @@ define float @v_sqrt_f32_known_never_zero(float nofpclass(zero) %x) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -3363,7 +3363,7 @@ define float @v_sqrt_f32_known_never_zero_never_inf(float nofpclass(zero inf) %x
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -3408,7 +3408,7 @@ define float @v_sqrt_f32_known_never_zero_never_inf(float nofpclass(zero inf) %x
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -3457,7 +3457,7 @@ define float @v_sqrt_f32_known_never_zero_never_ninf(float nofpclass(zero ninf)
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -3502,7 +3502,7 @@ define float @v_sqrt_f32_known_never_zero_never_ninf(float nofpclass(zero ninf)
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -3551,7 +3551,7 @@ define float @v_sqrt_f32_known_never_zero_never_pinf(float nofpclass(zero pinf)
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v1, v0
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
@@ -3596,7 +3596,7 @@ define float @v_sqrt_f32_known_never_zero_never_pinf(float nofpclass(zero pinf)
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -3652,7 +3652,7 @@ define float @v_sqrt_f32_frexp_src(float %x) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; GISEL-NEXT:    v_rsq_f32_e32 v1, v0
 ; GISEL-NEXT:    v_mul_f32_e32 v2, v0, v1
@@ -3837,7 +3837,7 @@ define float @v_elim_redun_check_ult_sqrt(float %in) {
 ; GISEL-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-IEEE-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-IEEE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-IEEE-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-IEEE-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-IEEE-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
 ; GISEL-IEEE-NEXT:    v_sqrt_f32_e32 v2, v1
 ; GISEL-IEEE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
@@ -3886,7 +3886,7 @@ define float @v_elim_redun_check_ult_sqrt(float %in) {
 ; GISEL-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-DAZ-NEXT:    v_mov_b32_e32 v1, 0xf800000
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
-; GISEL-DAZ-NEXT:    v_cmp_gt_f32_e32 vcc, v1, v0
+; GISEL-DAZ-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
 ; GISEL-DAZ-NEXT:    v_cndmask_b32_e32 v1, v0, v2, vcc
 ; GISEL-DAZ-NEXT:    v_rsq_f32_e32 v2, v1
 ; GISEL-DAZ-NEXT:    v_mul_f32_e32 v3, v1, v2

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index c2f6fbfe4667c0..7f4cf19e9b85b4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -5736,20 +5736,16 @@ define float @v_log_f32_0() {
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3f317217
-; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
-; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
-; SI-GISEL-NEXT:    v_fma_f32 v2, v0, v2, -v4
-; SI-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3f317217
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3377d1cf
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT:    v_add_f32_e32 v2, v4, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v1, -v4
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT:    v_add_f32_e32 v1, v4, v1
 ; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x41b17218, v0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_log_f32_0:
@@ -5775,23 +5771,19 @@ define float @v_log_f32_0() {
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v1
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
-; VI-GISEL-NEXT:    v_mul_f32_e32 v5, 0x3805fdf4, v3
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3f317000, v3
-; VI-GISEL-NEXT:    v_add_f32_e32 v4, v4, v5
-; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
-; VI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x41b17218, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_log_f32_0:
@@ -5814,20 +5806,16 @@ define float @v_log_f32_0() {
 ; GFX900-GISEL:       ; %bb.0:
 ; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3f317217
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
-; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v2, -v4
-; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3f317217
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3377d1cf
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v4, v2
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3f317217, v0
+; GFX900-GISEL-NEXT:    v_fma_f32 v1, v0, v1, -v4
+; GFX900-GISEL-NEXT:    v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v4, v1
 ; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x41b17218
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
-; GFX900-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x41b17218, v0
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log_f32_0:
@@ -5851,7 +5839,6 @@ define float @v_log_f32_0() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; GFX1100-GISEL-NEXT:    v_cmp_lt_f32_e64 s0, 0, 0x800000
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
@@ -5861,9 +5848,8 @@ define float @v_log_f32_0() {
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x41b17218, v0
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log_f32_0:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 0a1f7ab6fc0ae3..1c64e6b76c9577 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -5736,20 +5736,16 @@ define float @v_log10_f32_0() {
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SI-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e9a209a
-; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
-; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; SI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
-; SI-GISEL-NEXT:    v_fma_f32 v2, v0, v2, -v4
-; SI-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3e9a209a
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3284fbcf
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
-; SI-GISEL-NEXT:    v_add_f32_e32 v2, v4, v2
+; SI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v1, -v4
+; SI-GISEL-NEXT:    v_fma_f32 v1, v0, v2, v1
+; SI-GISEL-NEXT:    v_add_f32_e32 v1, v4, v1
 ; SI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x411a209b
-; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
-; SI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x411a209b, v0
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_log10_f32_0:
@@ -5775,23 +5771,19 @@ define float @v_log10_f32_0() {
 ; VI-GISEL:       ; %bb.0:
 ; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; VI-GISEL-NEXT:    v_and_b32_e32 v2, 0xfffff000, v0
-; VI-GISEL-NEXT:    v_sub_f32_e32 v3, v0, v2
+; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
+; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v1
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
-; VI-GISEL-NEXT:    v_mul_f32_e32 v5, 0x369a84fb, v3
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3e9a2000, v3
-; VI-GISEL-NEXT:    v_add_f32_e32 v4, v4, v5
-; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
+; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x411a209b
-; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
-; VI-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; VI-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
+; VI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f800000
+; VI-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v2
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x411a209b, v0
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX900-SDAG-LABEL: v_log10_f32_0:
@@ -5814,20 +5806,16 @@ define float @v_log10_f32_0() {
 ; GFX900-GISEL:       ; %bb.0:
 ; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3e9a209a
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x800000
-; GFX900-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v2, -v4
-; GFX900-GISEL-NEXT:    v_fma_f32 v2, v0, v3, v2
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3e9a209a
+; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3284fbcf
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7f800000
-; GFX900-GISEL-NEXT:    v_add_f32_e32 v2, v4, v2
+; GFX900-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT:    v_fma_f32 v1, v0, v1, -v4
+; GFX900-GISEL-NEXT:    v_fma_f32 v1, v0, v2, v1
+; GFX900-GISEL-NEXT:    v_add_f32_e32 v1, v4, v1
 ; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, v3
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX900-GISEL-NEXT:    v_mov_b32_e32 v2, 0x411a209b
-; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
-; GFX900-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX900-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x411a209b, v0
 ; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-SDAG-LABEL: v_log10_f32_0:
@@ -5851,7 +5839,6 @@ define float @v_log10_f32_0() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, 0
-; GFX1100-GISEL-NEXT:    v_cmp_lt_f32_e64 s0, 0, 0x800000
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
@@ -5861,9 +5848,8 @@ define float @v_log10_f32_0() {
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-GISEL-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1100-GISEL-NEXT:    v_subrev_f32_e32 v0, 0x411a209b, v0
 ; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; R600-LABEL: v_log10_f32_0:

diff  --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
index a642543c3780db..6b097bd71c9f14 100644
--- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
@@ -202,7 +202,7 @@ define amdgpu_cs float @v_s_sqrt_f32(float inreg %src) {
 ;
 ; GFX12-GISEL-LABEL: v_s_sqrt_f32:
 ; GFX12-GISEL:       ; %bb.0:
-; GFX12-GISEL-NEXT:    s_cmp_gt_f32 0xf800000, s0
+; GFX12-GISEL-NEXT:    s_cmp_lt_f32 s0, 0xf800000
 ; GFX12-GISEL-NEXT:    s_mul_f32 s2, s0, 0x4f800000
 ; GFX12-GISEL-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)


        


More information about the llvm-commits mailing list