[llvm] [GlobalIsel] Canonicalize G_ICMP (PR #108755)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 06:37:53 PDT 2024
https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/108755
As a side-effect, we start constant folding icmps.
>From 3730790a9f5d96a31346f25db1e9e1f18982c0eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 15 Sep 2024 15:36:11 +0200
Subject: [PATCH] [GlobalIsel] Canonicalize G_ICMP
As a side-effect, we start constant folding icmps.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 10 ++
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 22 +++++
.../include/llvm/Target/GlobalISel/Combine.td | 25 +++--
llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 +
.../GlobalISel/CombinerHelperCompares.cpp | 86 +++++++++++++++++
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 40 ++++++++
.../AArch64/GlobalISel/combine-visit-icmp.mir | 95 +++++++++++++++++++
llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 60 ++++--------
.../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 18 ++--
llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 46 ++++-----
11 files changed, 328 insertions(+), 81 deletions(-)
create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 828532dcffb7d3..37c9422d192754 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/InstrTypes.h"
@@ -909,6 +910,8 @@ class CombinerHelper {
bool matchCastOfBuildVector(const MachineInstr &CastMI,
const MachineInstr &BVMI, BuildFnTy &MatchInfo);
+ bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -1023,6 +1026,9 @@ class CombinerHelper {
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+ bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst,
+ const GIConstant &RHSCst, BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..2c459ccdd8a731 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,16 @@ class GExtOrTruncOp : public GCastOp {
};
};
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+ Register getValueReg() const { return getOperand(1).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288bd..4aeacfdf4ecdf0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,27 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
/// estimate of the type.
Type *getTypeForLLT(LLT Ty, LLVMContext &C);
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+ GIConstantKind Kind;
+ SmallVector<APInt> Values;
+ APInt Value;
+
+public:
+ GIConstant(ArrayRef<APInt> Values)
+ : Kind(GIConstantKind::FixedVector), Values(Values) {};
+ GIConstant(const APInt &Value, GIConstantKind Kind)
+ : Kind(Kind), Value(Value) {};
+
+ GIConstantKind getKind() const { return Kind; }
+
+ APInt getScalarValue() const;
+
+ static std::optional<GIConstant> getConstant(Register Const,
+ const MachineRegisterInfo &MRI);
+};
+
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a595a51d7b01ff..c66212d2ab12c8 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
(G_ICMP $root, $p, $ordst, 0))
>;
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
- double_icmp_zero_or_combine]>;
-
def and_or_disjoint_mask : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -1918,6 +1915,20 @@ def cast_combines: GICombineGroup<[
integer_of_truncate
]>;
+def canonicalize_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.matchCanonicalizeICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+ canonicalize_icmp,
+ icmp_to_true_false_known_bits,
+ icmp_to_lhs_known_bits,
+ double_icmp_zero_and_combine,
+ double_icmp_zero_or_combine,
+ redundant_binop_in_equality
+]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1951,7 +1962,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+ zext_trunc_fold,
sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1984,7 +1995,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
combine_extracted_vector_load,
undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
- reassocs, ptr_add_immed_chain,
+ reassocs, ptr_add_immed_chain, icmp_combines,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
known_bits_simplifications,
@@ -1998,9 +2009,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
- sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+ sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+ combine_concat_vector, match_addos,
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b1..af1717dbf76f39 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
Combiner.cpp
CombinerHelper.cpp
CombinerHelperCasts.cpp
+ CombinerHelperCompares.cpp
CombinerHelperVectorOps.cpp
GIMatchTableExecutor.cpp
GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 00000000000000..39ac4ee205fb7a
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,86 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+ const GIConstant &LHSCst,
+ const GIConstant &RHSCst,
+ BuildFnTy &MatchInfo) {
+ if (LHSCst.getKind() != GIConstantKind::Scalar)
+ return false;
+
+ Register Dst = ICmp.getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ CmpInst::Predicate Pred = ICmp.getCond();
+ APInt LHS = LHSCst.getScalarValue();
+ APInt RHS = RHSCst.getScalarValue();
+
+ bool Result = ICmpInst::compare(LHS, RHS, Pred);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (Result)
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+
+ CmpInst::Predicate Pred = Cmp->getCond();
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+ if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+ if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+ return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1713a582d5cfe5..396f0f07ae9050 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1968,3 +1968,43 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
Ty.getElementCount());
return IntegerType::get(C, Ty.getSizeInBits());
}
+
+APInt llvm::GIConstant::getScalarValue() const {
+ assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+ return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+ MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+ if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+ }
+
+ if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+ SmallVector<APInt> Values;
+ unsigned NumSources = Build->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I) {
+ Register SrcReg = Build->getSourceReg(I);
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(SrcReg, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ Values.push_back(MayBeConstant->Value);
+ }
+ return GIConstant(Values);
+ }
+
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Const, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+
+ return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
new file mode 100644
index 00000000000000..bf04ac02d086ab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name: test_icmp_canon
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_canon
+ ; CHECK: %lhs:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %rhs(s64), %lhs
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = G_CONSTANT i64 11
+ %rhs:_(s64) = COPY $x0
+ %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_no_canon
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_no_canon
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: %rhs:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_CONSTANT i64 11
+ %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_canon_bv
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_canon_bv
+ ; CHECK: %opaque1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %const1:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: %const2:_(s64) = G_CONSTANT i64 12
+ ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+ ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+ ; CHECK-NEXT: %res:_(<2 x s32>) = G_ICMP intpred(sgt), %rhs(<2 x s64>), %lhs
+ ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+ %opaque1:_(s64) = COPY $x0
+ %opaque2:_(s64) = COPY $x0
+ %const1:_(s64) = G_CONSTANT i64 11
+ %const2:_(s64) = G_CONSTANT i64 12
+ %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64)
+ %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64)
+ %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs
+ $x0 = COPY %res(<2 x s32>)
+...
+---
+name: test_icmp_no_canon_bv_neither_const
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_no_canon_bv
+ ; CHECK: %opaque1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %const1:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: %const2:_(s64) = G_CONSTANT i64 12
+ ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+ ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+ ; CHECK-NEXT: %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs
+ ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>)
+ %opaque1:_(s64) = COPY $x0
+ %opaque2:_(s64) = COPY $x0
+ %const1:_(s64) = G_CONSTANT i64 11
+ %const2:_(s64) = G_CONSTANT i64 12
+ %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64)
+ %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64)
+ %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs
+ $x0 = COPY %res(<2 x s32>)
+...
+---
+name: test_icmp_canon_splat
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_canon_splat
+ ; CHECK: %const:_(s64) = G_CONSTANT i64 11
+ ; CHECK-NEXT: %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const(s64)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
+ ; CHECK-NEXT: %res:_(<vscale x 2 x s32>) = G_ICMP intpred(sgt), %rhs(<vscale x 2 x s64>), %lhs
+ ; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_ZEXT %res(<vscale x 2 x s32>)
+ ; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
+ %const:_(s64) = G_CONSTANT i64 11
+ %lhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %const:_(s64)
+ %1:_(s64) = COPY $x1
+ %rhs:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1:_(s64)
+ %res:_(<vscale x 2 x s32>) = G_ICMP intpred(slt), %lhs(<vscale x 2 x s64>), %rhs
+ %z:_(<vscale x 2 x s64>) = G_ZEXT %res
+ $z0 = COPY %z(<vscale x 2 x s64>)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 50afc79a5a5768..06e957fdcc6a2a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -457,20 +457,12 @@ sw.bb.i.i:
}
define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_and:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, #5
-; SDISEL-NEXT: ccmp w0, w1, #0, ne
-; SDISEL-NEXT: csel x0, x2, x3, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_and:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5 ; =0x5
-; GISEL-NEXT: cmp w8, w1
-; GISEL-NEXT: ccmp w0, w1, #0, ne
-; GISEL-NEXT: csel x0, x2, x3, lt
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_and:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #0, ne
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
%3 = and i1 %1, %2
@@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
}
define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_or:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, #5
-; SDISEL-NEXT: ccmp w0, w1, #8, eq
-; SDISEL-NEXT: csel x0, x2, x3, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_or:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5 ; =0x5
-; GISEL-NEXT: cmp w8, w1
-; GISEL-NEXT: ccmp w0, w1, #8, eq
-; GISEL-NEXT: csel x0, x2, x3, lt
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_or:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #8, eq
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
%3 = or i1 %1, %2
@@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
}
define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
-; SDISEL-LABEL: select_or_float:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, #5
-; SDISEL-NEXT: ccmp w0, w1, #8, eq
-; SDISEL-NEXT: fcsel s0, s0, s1, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_or_float:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5 ; =0x5
-; GISEL-NEXT: cmp w8, w1
-; GISEL-NEXT: ccmp w0, w1, #8, eq
-; GISEL-NEXT: fcsel s0, s0, s1, lt
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_or_float:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #8, eq
+; CHECK-NEXT: fcsel s0, s0, s1, lt
+; CHECK-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
%3 = or i1 %1, %2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index b1cdf553b72423..0b66185d25f3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) {
; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
@@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) {
; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
@@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) {
; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
@@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20
; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7]
; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5]
; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20
; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7]
; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5]
; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20
; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7]
; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5]
; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index bd6e1f54e636d8..8f4a4b5afcdc1e 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20
; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7]
; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
@@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20
; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7]
; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0x3ff00000
; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v19
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v8
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
More information about the llvm-commits
mailing list