[llvm] [GlobalIsel] Canonicalize G_ICMP (PR #108755)

Sun Sep 15 06:37:53 PDT 2024

https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/108755

As a side-effect, we start constant folding icmps.

>From 3730790a9f5d96a31346f25db1e9e1f18982c0eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 15 Sep 2024 15:36:11 +0200
Subject: [PATCH] [GlobalIsel] Canonicalize G_ICMP

As a side-effect, we start constant folding icmps.
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  6 ++
 .../CodeGen/GlobalISel/GenericMachineInstrs.h | 10 ++
 llvm/include/llvm/CodeGen/GlobalISel/Utils.h  | 22 +++++
 .../include/llvm/Target/GlobalISel/Combine.td | 25 +++--
 llvm/lib/CodeGen/GlobalISel/CMakeLists.txt    |  1 +
 .../GlobalISel/CombinerHelperCompares.cpp     | 86 +++++++++++++++++
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         | 40 ++++++++
 .../AArch64/GlobalISel/combine-visit-icmp.mir | 95 +++++++++++++++++++
 llvm/test/CodeGen/AArch64/arm64-ccmp.ll       | 60 ++++--------
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll     | 18 ++--
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 46 ++++-----
 11 files changed, 328 insertions(+), 81 deletions(-)
 create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 828532dcffb7d3..37c9422d192754 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/IR/InstrTypes.h"
@@ -909,6 +910,8 @@ class CombinerHelper {
   bool matchCastOfBuildVector(const MachineInstr &CastMI,
                               const MachineInstr &BVMI, BuildFnTy &MatchInfo);
 
+  bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -1023,6 +1026,9 @@ class CombinerHelper {
   bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
 
   bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+  bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst,
+                        const GIConstant &RHSCst, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..2c459ccdd8a731 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,16 @@ class GExtOrTruncOp : public GCastOp {
   };
 };
 
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+  Register getValueReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288bd..4aeacfdf4ecdf0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,27 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
 /// estimate of the type.
 Type *getTypeForLLT(LLT Ty, LLVMContext &C);
 
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+  GIConstantKind Kind;
+  SmallVector<APInt> Values;
+  APInt Value;
+
+public:
+  GIConstant(ArrayRef<APInt> Values)
+      : Kind(GIConstantKind::FixedVector), Values(Values) {};
+  GIConstant(const APInt &Value, GIConstantKind Kind)
+      : Kind(Kind), Value(Value) {};
+
+  GIConstantKind getKind() const { return Kind; }
+
+  APInt getScalarValue() const;
+
+  static std::optional<GIConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
 } // End namespace llvm.
 #endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a595a51d7b01ff..c66212d2ab12c8 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
          (G_ICMP $root, $p, $ordst, 0))
 >;
 
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
-                                                      double_icmp_zero_or_combine]>;
-
 def and_or_disjoint_mask : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
@@ -1918,6 +1915,20 @@ def cast_combines: GICombineGroup<[
   integer_of_truncate
 ]>;
 
+def canonicalize_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchCanonicalizeICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+  canonicalize_icmp,
+  icmp_to_true_false_known_bits,
+  icmp_to_lhs_known_bits,
+  double_icmp_zero_and_combine,
+  double_icmp_zero_or_combine,
+  redundant_binop_in_equality
+]>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1951,7 +1962,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
 
 def known_bits_simplifications : GICombineGroup<[
   redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
-  zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+  zext_trunc_fold,
   sext_inreg_to_zext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1984,7 +1995,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     combine_extracted_vector_load,
     undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
-    reassocs, ptr_add_immed_chain,
+    reassocs, ptr_add_immed_chain, icmp_combines,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,
     known_bits_simplifications,
@@ -1998,9 +2009,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+    sub_add_reg, select_to_minmax, 
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+    combine_concat_vector, match_addos,
     sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b1..af1717dbf76f39 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
   Combiner.cpp
   CombinerHelper.cpp
   CombinerHelperCasts.cpp
+  CombinerHelperCompares.cpp
   CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 00000000000000..39ac4ee205fb7a
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,86 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+                                      const GIConstant &LHSCst,
+                                      const GIConstant &RHSCst,
+                                      BuildFnTy &MatchInfo) {
+  if (LHSCst.getKind() != GIConstantKind::Scalar)
+    return false;
+
+  Register Dst = ICmp.getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  CmpInst::Predicate Pred = ICmp.getCond();
+  APInt LHS = LHSCst.getScalarValue();
+  APInt RHS = RHSCst.getScalarValue();
+
+  bool Result = ICmpInst::compare(LHS, RHS, Pred);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
+
+bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
+                                           BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+  if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+    if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+      return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+    return true;
+  }
+
+  return false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1713a582d5cfe5..396f0f07ae9050 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1968,3 +1968,43 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
                            Ty.getElementCount());
   return IntegerType::get(C, Ty.getSizeInBits());
 }
+
+APInt llvm::GIConstant::getScalarValue() const {
+  assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+  return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+  MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+  if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+    std::optional<ValueAndVReg> MayBeConstant =
+        getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+    if (!MayBeConstant)
+      return std::nullopt;
+    return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+  }
+
+  if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+    SmallVector<APInt> Values;
+    unsigned NumSources = Build->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I) {
+      Register SrcReg = Build->getSourceReg(I);
+      std::optional<ValueAndVReg> MayBeConstant =
+          getIConstantVRegValWithLookThrough(SrcReg, MRI);
+      if (!MayBeConstant)
+        return std::nullopt;
+      Values.push_back(MayBeConstant->Value);
+    }
+    return GIConstant(Values);
+  }
+
+  std::optional<ValueAndVReg> MayBeConstant =
+      getIConstantVRegValWithLookThrough(Const, MRI);
+  if (!MayBeConstant)
+    return std::nullopt;
+
+  return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
new file mode 100644
index 00000000000000..bf04ac02d086ab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name:            test_icmp_canon
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_canon
+    ; CHECK: %lhs:_(s64) = G_CONSTANT i64 11
+    ; CHECK-NEXT: %rhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %rhs(s64), %lhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = G_CONSTANT i64 11
+    %rhs:_(s64) = COPY $x0
+    %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_no_canon
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_no_canon
+    ; CHECK: %lhs:_(s64) = COPY $x0
+    ; CHECK-NEXT: %rhs:_(s64) = G_CONSTANT i64 11
+    ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = G_CONSTANT i64 11
+    %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_icmp_canon_bv
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_canon_bv
+    ; CHECK: %opaque1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %const1:_(s64) = G_CONSTANT i64 11
+    ; CHECK-NEXT: %const2:_(s64) = G_CONSTANT i64 12
+    ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+    ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+    ; CHECK-NEXT: %res:_(<2 x s32>) = G_ICMP intpred(sgt), %rhs(<2 x s64>), %lhs
+    ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+    %opaque1:_(s64) = COPY $x0
+    %opaque2:_(s64) = COPY $x0
+    %const1:_(s64) = G_CONSTANT i64 11
+    %const2:_(s64) = G_CONSTANT i64 12
+    %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+    %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+    %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs
+    $x0 = COPY %res(<2 x s32>)
+...
+---
+name:            test_icmp_no_canon_bv_neither_const
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_no_canon_bv
+    ; CHECK: %opaque1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %const1:_(s64) = G_CONSTANT i64 11
+    ; CHECK-NEXT: %const2:_(s64) = G_CONSTANT i64 12
+    ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+    ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+    ; CHECK-NEXT: %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs
+    ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+    %opaque1:_(s64) = COPY $x0
+    %opaque2:_(s64) = COPY $x0
+    %const1:_(s64) = G_CONSTANT i64 11
+    %const2:_(s64) = G_CONSTANT i64 12
+    %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+    %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+    %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs
+    $x0 = COPY %res(<2 x s32>)
+...
+---
+name:            test_icmp_canon_splat
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_icmp_canon_splat
+    ; CHECK: %const:_(s64) = G_CONSTANT i64 11
+    ; CHECK-NEXT: %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const(s64)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
+    ; CHECK-NEXT: %res:_(<vscale x 2 x s32>) = G_ICMP intpred(sgt), %rhs(<vscale x 2 x s64>), %lhs
+    ; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_ZEXT %res(<vscale x 2 x s32>)
+    ; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
+    %const:_(s64) = G_CONSTANT i64 11
+    %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const:_(s64)
+    %1:_(s64) = COPY $x1
+    %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1:_(s64)
+    %res:_(<vscale x 2 x s32>) = G_ICMP intpred(slt), %lhs(<vscale x 2 x s64>), %rhs
+    %z:_(<vscale x 2 x s64>) = G_ZEXT  %res
+    $z0 = COPY %z(<vscale x 2 x s64>)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 50afc79a5a5768..06e957fdcc6a2a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -457,20 +457,12 @@ sw.bb.i.i:
 }
 
 define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_and:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp w1, #5
-; SDISEL-NEXT:    ccmp w0, w1, #0, ne
-; SDISEL-NEXT:    csel x0, x2, x3, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_and:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5 ; =0x5
-; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #0, ne
-; GISEL-NEXT:    csel x0, x2, x3, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #0, ne
+; CHECK-NEXT:    csel x0, x2, x3, lt
+; CHECK-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
   %3 = and i1 %1, %2
@@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 }
 
 define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_or:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp w1, #5
-; SDISEL-NEXT:    ccmp w0, w1, #8, eq
-; SDISEL-NEXT:    csel x0, x2, x3, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_or:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5 ; =0x5
-; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    csel x0, x2, x3, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_or:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #8, eq
+; CHECK-NEXT:    csel x0, x2, x3, lt
+; CHECK-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
   %3 = or i1 %1, %2
@@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 }
 
 define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
-; SDISEL-LABEL: select_or_float:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp w1, #5
-; SDISEL-NEXT:    ccmp w0, w1, #8, eq
-; SDISEL-NEXT:    fcsel s0, s0, s1, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_or_float:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5 ; =0x5
-; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    fcsel s0, s0, s1, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_or_float:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #8, eq
+; CHECK-NEXT:    fcsel s0, s0, s1, lt
+; CHECK-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
   %3 = or i1 %1, %2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index b1cdf553b72423..0b66185d25f3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) {
 ; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
@@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) {
 ; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
@@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) {
 ; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
 ; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
@@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
 ; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v20
 ; GFX6-NEXT:    v_mul_f64 v[12:13], v[8:9], v[6:7]
 ; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
 ; GFX6-NEXT:    v_mul_f64 v[8:9], v[16:17], v[4:5]
 ; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v20
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
 ; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v20
 ; GFX6-NEXT:    v_mul_f64 v[12:13], v[8:9], v[6:7]
 ; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
 ; GFX6-NEXT:    v_mul_f64 v[8:9], v[16:17], v[4:5]
 ; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v20
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
 ; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v20
 ; GFX6-NEXT:    v_mul_f64 v[12:13], v[8:9], v[6:7]
 ; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
 ; GFX6-NEXT:    v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
 ; GFX6-NEXT:    v_mul_f64 v[8:9], v[16:17], v[4:5]
 ; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
 ; GFX6-NEXT:    v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v17, v20
 ; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v11
 ; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index bd6e1f54e636d8..8f4a4b5afcdc1e 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v20
 ; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[6:7]
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v19, v20
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
@@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[6:7], v[8:9]
 ; SI-GISEL-NEXT:    v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v20
 ; SI-GISEL-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[18:19], v[6:7]
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v19, v20
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v10, 0xbff00000
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v13, v10
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v8, 0x3ff00000
 ; SI-GISEL-NEXT:    v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v19
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v19, v8
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[0:1]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
 ; SI-GISEL-NEXT:    v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
 ; SI-GISEL-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; SI-GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v10
 ; SI-GISEL-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]