[llvm] [GlobalIsel] Visit ICmp (PR #105991)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 25 23:03:06 PDT 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/105991
>From 23cd7dd1bd55b2b236edd0211a99dd6b8dd6e961 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 11 Aug 2024 21:47:55 +0200
Subject: [PATCH 1/5] [GlobalIsel] Visit ICmp
inspired by simplifyICmpInst and simplifyICmpWithZero
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 10 +
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 24 ++
llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 26 ++
.../include/llvm/Target/GlobalISel/Combine.td | 51 ++-
llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 +
.../GlobalISel/CombinerHelperCompares.cpp | 305 +++++++++++++++++
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 323 ++++++++++++++++++
.../AArch64/GlobalISel/arm64-atomic.ll | 96 +++---
.../AArch64/GlobalISel/arm64-pcsections.ll | 56 +--
.../AArch64/GlobalISel/combine-visit-icmp.mir | 167 +++++++++
llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 60 +---
llvm/test/CodeGen/AArch64/icmp2.ll | 295 ++++++++++++++++
.../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 18 +-
llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 154 ++++-----
llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 46 +--
15 files changed, 1401 insertions(+), 231 deletions(-)
create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
create mode 100644 llvm/test/CodeGen/AArch64/icmp2.ll
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b62d6067be39c..da9c7fdbd2a093 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/InstrTypes.h"
@@ -299,6 +300,12 @@ class CombinerHelper {
/// $whatever = COPY $addr
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
+ bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchSextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchZextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Try hard to fold icmp with zero RHS because this is a common case.
+ bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
@@ -1017,6 +1024,9 @@ class CombinerHelper {
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+ bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS,
+ const GIConstant &RHS, BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..427b5a86b6e0c4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp {
};
};
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+ Register getValueReg() const { return getOperand(1).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+ };
+};
+
+/// Represents an integer-like extending operation.
+class GZextOrSextOp : public GCastOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288bd..a8bf2e722881ac 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
/// estimate of the type.
Type *getTypeForLLT(LLT Ty, LLVMContext &C);
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+ GIConstantKind Kind;
+ SmallVector<APInt> Values;
+ APInt Value;
+
+public:
+ GIConstant(ArrayRef<APInt> Values)
+ : Kind(GIConstantKind::FixedVector), Values(Values) {};
+ GIConstant(const APInt &Value, GIConstantKind Kind)
+ : Kind(Kind), Value(Value) {};
+
+ GIConstantKind getKind() const { return Kind; }
+
+ APInt getScalarValue() const;
+
+ static std::optional<GIConstant> getConstant(Register Const,
+ const MachineRegisterInfo &MRI);
+};
+
+/// Return true if the given value is known to be non-zero when defined.
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth = 0);
+
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 525cc815e73cef..175a8ed57b2669 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
(G_ICMP $root, $p, $ordst, 0))
>;
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
- double_icmp_zero_or_combine]>;
-
def and_or_disjoint_mask : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -1884,6 +1881,46 @@ def cast_combines: GICombineGroup<[
buildvector_of_truncate
]>;
+def visit_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def sext_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SEXT $rhs, $inputR),
+ (G_SEXT $lhs, $inputL),
+ (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.matchSextOfICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def zext_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ZEXT $rhs, $inputR),
+ (G_ZEXT $lhs, $inputL),
+ (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.matchZextOfICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_of_zero : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $zero, 0),
+ (G_ICMP $root, $pred, $lhs, $zero):$cmp,
+ [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+ visit_icmp,
+ sext_icmp,
+ zext_icmp,
+ icmp_of_zero,
+ icmp_to_true_false_known_bits,
+ icmp_to_lhs_known_bits,
+ double_icmp_zero_and_combine,
+ double_icmp_zero_or_combine,
+ redundant_binop_in_equality
+]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1917,7 +1954,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+ zext_trunc_fold,
sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1944,7 +1981,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
-def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
+def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
combine_extracted_vector_load,
@@ -1964,9 +2001,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
- sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+ sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+ combine_concat_vector, match_addos,
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b1..af1717dbf76f39 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
Combiner.cpp
CombinerHelper.cpp
CombinerHelperCasts.cpp
+ CombinerHelperCompares.cpp
CombinerHelperVectorOps.cpp
GIMatchTableExecutor.cpp
GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 00000000000000..415768fb07e59f
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,305 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+ const GIConstant &LHSCst,
+ const GIConstant &RHSCst,
+ BuildFnTy &MatchInfo) {
+ if (LHSCst.getKind() != GIConstantKind::Scalar)
+ return false;
+
+ Register Dst = ICmp.getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ CmpInst::Predicate Pred = ICmp.getCond();
+ APInt LHS = LHSCst.getScalarValue();
+ APInt RHS = RHSCst.getScalarValue();
+
+ bool Result;
+
+ switch (Pred) {
+ case CmpInst::Predicate::ICMP_EQ:
+ Result = LHS.eq(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_NE:
+ Result = LHS.ne(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_UGT:
+ Result = LHS.ugt(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_UGE:
+ Result = LHS.uge(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_ULT:
+ Result = LHS.ult(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_ULE:
+ Result = LHS.ule(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SGT:
+ Result = LHS.sgt(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SGE:
+ Result = LHS.sge(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SLT:
+ Result = LHS.slt(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SLE:
+ Result = LHS.sle(RHS);
+ break;
+ default:
+ llvm_unreachable("Unexpected predicate");
+ }
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (Result)
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+
+ CmpInst::Predicate Pred = Cmp->getCond();
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+ if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+ if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+ return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+ return true;
+ }
+
+ [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS);
+ MachineInstr *MIRHS = MRI.getVRegDef(RHS);
+
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ if (isa<GImplicitDef>(MIRHS) && ICmpInst::isEquality(Pred) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ // icmp X, X -> true/false
+ // icmp X, undef -> true/false because undef could be X.
+ if ((LHS == RHS || isa<GImplicitDef>(MIRHS)) &&
+ isConstantLegalOrBeforeLegalizer(DstTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (CmpInst::isTrueWhenEqual(Pred))
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchSextOfICmp(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+ CmpInst::Predicate Pred = Cmp->getCond();
+
+ GSext *SL = cast<GSext>(MRI.getVRegDef(LHS));
+ GSext *SR = cast<GSext>(MRI.getVRegDef(RHS));
+
+ LLT SLTy = MRI.getType(SL->getSrcReg());
+ LLT SRTy = MRI.getType(SR->getSrcReg());
+
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SLTy != SRTy)
+ return false;
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, SLTy}}))
+ return false;
+
+ // Compare X and Y. Note that the predicate does not change.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildICmp(Pred, Dst, SL->getSrcReg(), SR->getSrcReg());
+ };
+ return true;
+}
+
+bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+ CmpInst::Predicate Pred = Cmp->getCond();
+
+ /*
+ %x:_(p0) = COPY $x0
+ %y:_(p0) = COPY $x1
+ %zero:_(p0) = G_CONSTANT i64 0
+ %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_
+ */
+
+ if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer())
+ return false;
+
+ if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar())
+ return false;
+
+ GZext *ZL = cast<GZext>(MRI.getVRegDef(LHS));
+ GZext *ZR = cast<GZext>(MRI.getVRegDef(RHS));
+
+ LLT ZLTy = MRI.getType(ZL->getSrcReg());
+ LLT ZRTy = MRI.getType(ZR->getSrcReg());
+
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have
+ // the same type.
+ if (ZLTy != ZRTy)
+ return false;
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, ZLTy}}))
+ return false;
+
+ // Compare X and Y. Note that signed predicates become unsigned.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildICmp(ICmpInst::getUnsignedPredicate(Pred), Dst, ZL->getSrcReg(),
+ ZR->getSrcReg());
+ };
+ return true;
+}
+
+bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ CmpInst::Predicate Pred = Cmp->getCond();
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ std::optional<bool> Result;
+
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unkonwn ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ Result = false;
+ break;
+ case ICmpInst::ICMP_UGE:
+ Result = true;
+ break;
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownNonZero(LHS, MRI, KB))
+ Result = false;
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, MRI, KB))
+ Result = true;
+ break;
+ case ICmpInst::ICMP_SLT: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = true;
+ if (LHSKnown.isNonNegative())
+ Result = false;
+ break;
+ }
+ case ICmpInst::ICMP_SLE: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = true;
+ if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+ Result = false;
+ break;
+ }
+ case ICmpInst::ICMP_SGE: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = false;
+ if (LHSKnown.isNonNegative())
+ Result = true;
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = false;
+ if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+ Result = true;
+ break;
+ }
+ }
+
+ if (!Result)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (*Result)
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+
+ return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cfdd9905c16fa6..e8b9d995a22768 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1984,3 +1984,326 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
Ty.getElementCount());
return IntegerType::get(C, Ty.getSizeInBits());
}
+
+APInt llvm::GIConstant::getScalarValue() const {
+ assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+ return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+ MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+ if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+ }
+
+ if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+ SmallVector<APInt> Values;
+ unsigned NumSources = Build->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I) {
+ Register SrcReg = Build->getSourceReg(I);
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(SrcReg, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ Values.push_back(MayBeConstant->Value);
+ }
+ return GIConstant(Values);
+ }
+
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Const, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+
+ return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
+
+static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth);
+
+bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth) {
+ if (!Reg.isVirtual())
+ return false;
+
+ LLT Ty = MRI.getType(Reg);
+ if (!Ty.isValid())
+ return false;
+
+ if (Ty.isPointer())
+ return false;
+
+ if (!Ty.isScalar())
+ errs() << "type: " << Ty << '\n';
+
+ assert(Ty.isScalar() && "Expected a scalar value");
+ return ::isKnownNonZero(Reg, MRI, KB, Depth);
+}
+
+static bool matchOpWithOpEqZero(Register Op0, Register Op1,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(Op0);
+
+ bool Result = false;
+
+ if (GZextOrSextOp *ZS = dyn_cast<GZextOrSextOp>(MI)) {
+ MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg());
+ if (GICmp *Cmp = dyn_cast<GICmp>(SrcMI)) {
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI);
+ if (MayBeConstant)
+ Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op1) &&
+ (Cmp->getCond() == ICmpInst::ICMP_EQ);
+ }
+ }
+
+ MI = MRI.getVRegDef(Op1);
+ if (GZextOrSextOp *ZS = dyn_cast<GZextOrSextOp>(MI)) {
+ MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg());
+ if (GICmp *Cmp = dyn_cast<GICmp>(SrcMI)) {
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI);
+ if (MayBeConstant)
+ Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op0) &&
+ (Cmp->getCond() == ICmpInst::ICMP_EQ);
+ }
+ }
+
+ return Result;
+}
+
+static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth,
+ unsigned BitWidth) {
+ bool NSW = Add.getFlag(MachineInstr::MIFlag::NoSWrap);
+ bool NUW = Add.getFlag(MachineInstr::MIFlag::NoUWrap);
+ Register LHS = Add.getLHSReg();
+ Register RHS = Add.getRHSReg();
+
+ // (X + (X != 0)) is non zero
+ if (matchOpWithOpEqZero(LHS, RHS, MRI))
+ return true;
+
+ if (NUW)
+ return ::isKnownNonZero(RHS, MRI, KB, Depth) ||
+ ::isKnownNonZero(LHS, MRI, KB, Depth);
+
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ KnownBits RHSKnown = KB->getKnownBits(RHS);
+
+ // If LHS and RHS are both non-negative (as signed values) then their sum is
+ // not zero unless both LHS and RHS are zero.
+ if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())
+ if (::isKnownNonZero(LHS, MRI, KB, Depth) ||
+ ::isKnownNonZero(RHS, MRI, KB, Depth))
+ return true;
+
+ // If LHS and RHS are both negative (as signed values) then their sum is not
+ // zero unless both LHS and RHS equal INT_MIN.
+ if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
+ APInt Mask = APInt::getSignedMaxValue(BitWidth);
+ // The sign bit of LHS is set. If some other bit is set then LHS is not
+ // equal to INT_MIN.
+ if (LHSKnown.One.intersects(Mask))
+ return true;
+ // The sign bit of RHS is set. If some other bit is set then RHS is not
+ // equal to INT_MIN.
+ if (RHSKnown.One.intersects(Mask))
+ return true;
+ }
+
+ // The sum of a non-negative number and a power of two is not zero.
+ if (LHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(RHS, MRI, KB))
+ return true;
+ if (RHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(LHS, MRI, KB))
+ return true;
+
+ return KnownBits::add(LHSKnown, RHSKnown, NSW, NUW).isNonZero();
+}
+
+static bool isKnownNonZeroBinOp(const GBinOp &BinOp,
+ const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth) {
+ unsigned BitWidth = MRI.getType(BinOp.getReg(0)).getScalarSizeInBits();
+ switch (BinOp.getOpcode()) {
+ case TargetOpcode::G_XOR:
+ // (X ^ (X != 0)) is non zero
+ if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI))
+ return true;
+ break;
+ case TargetOpcode::G_OR: {
+ // (X | (X != 0)) is non zero
+ if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI))
+ return true;
+ // X | Y != 0 if X != 0 or Y != 0.
+ return ::isKnownNonZero(BinOp.getRHSReg(), MRI, KB, Depth) ||
+ ::isKnownNonZero(BinOp.getLHSReg(), MRI, KB, Depth);
+ }
+ case TargetOpcode::G_ADD: {
+ // X + Y.
+
+ // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
+ // non-zero.
+ return isNonZeroAdd(BinOp, MRI, KB, Depth, BitWidth);
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static bool isKnownNonZeroCastOp(const GCastOp &CastOp,
+ const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth) {
+ switch (CastOp.getOpcode()) {
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ // ext X != 0 if X != 0.
+ return isKnownNonZero(CastOp.getSrcReg(), MRI, KB);
+ case Instruction::Trunc:
+ // nuw/nsw trunc preserves zero/non-zero status of input.
+ if (CastOp.getFlag(MachineInstr::MIFlag::NoSWrap) ||
+ CastOp.getFlag(MachineInstr::MIFlag::NoUWrap))
+ return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth);
+ break;
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static bool isNonZeroShift(const MachineInstr *MI,
+ const MachineRegisterInfo &MRI, GISelKnownBits *KB,
+ unsigned Depth, const KnownBits &KnownVal) {
+ auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SHL:
+ return Lhs.shl(Rhs);
+ case TargetOpcode::G_LSHR:
+ return Lhs.lshr(Rhs);
+ case TargetOpcode::G_ASHR:
+ return Lhs.ashr(Rhs);
+ default:
+ llvm_unreachable("Unknown Shift Opcode");
+ }
+ };
+
+ auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SHL:
+ return Lhs.lshr(Rhs);
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ return Lhs.shl(Rhs);
+ default:
+ llvm_unreachable("Unknown Shift Opcode");
+ }
+ };
+
+ if (KnownVal.isUnknown())
+ return false;
+
+ KnownBits KnownCnt = KB->getKnownBits(MI->getOperand(2).getReg());
+ APInt MaxShift = KnownCnt.getMaxValue();
+ unsigned NumBits = KnownVal.getBitWidth();
+ if (MaxShift.uge(NumBits))
+ return false;
+
+ if (!ShiftOp(KnownVal.One, MaxShift).isZero())
+ return true;
+
+ // If all of the bits shifted out are known to be zero, and Val is known
+ // non-zero then at least one non-zero bit must remain.
+ if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
+ .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
+ ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth))
+ return true;
+
+ return false;
+}
+
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth) {
+ if (!Reg.isVirtual())
+ return false;
+
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Reg, MRI);
+
+ if (MayBeConstant)
+ return MayBeConstant->Value != 0;
+
+ // Some of the tests below are recursive, so bail out if we hit the limit.
+ if (Depth++ >= MaxAnalysisRecursionDepth)
+ return false;
+
+ MachineInstr *MI = getDefIgnoringCopies(Reg, MRI);
+
+ if (GBinOp *BinOp = dyn_cast<GBinOp>(MI))
+ return isKnownNonZeroBinOp(*BinOp, MRI, KB, Depth);
+
+ if (GCastOp *CastOp = dyn_cast<GCastOp>(MI))
+ return isKnownNonZeroCastOp(*CastOp, MRI, KB, Depth);
+
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SHL: {
+ // shl nsw/nuw can't remove any non-zero bits.
+ if (MI->getFlag(MachineInstr::MIFlag::NoUWrap) ||
+ MI->getFlag(MachineInstr::MIFlag::NoSWrap))
+ return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth);
+
+ // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
+ // if the lowest bit is shifted off the end.
+ KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg());
+ if (Known.One[0])
+ return true;
+
+ return isNonZeroShift(MI, MRI, KB, Depth, Known);
+ }
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR: {
+ // shr exact can only shift out zero bits.
+ if (MI->getFlag(MachineInstr::MIFlag::IsExact))
+ return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth);
+
+ // shr X, Y != 0 if X is negative. Note that the value of the shift is not
+ // defined if the sign bit is shifted off the end.
+ KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg());
+ if (Known.isNegative())
+ return true;
+
+ return isNonZeroShift(MI, MRI, KB, Depth, Known);
+ }
+ case TargetOpcode::G_FREEZE:
+ return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) &&
+ ::isGuaranteedNotToBePoison(MI->getOperand(1).getReg(), MRI, Depth);
+ case TargetOpcode::G_SMIN: {
+ // If either arg is negative the result is non-zero. Otherwise
+ // the result is non-zero if both ops are non-zero.
+ KnownBits Op1Known = KB->getKnownBits(MI->getOperand(2).getReg());
+ if (Op1Known.isNegative())
+ return true;
+ KnownBits Op0Known = KB->getKnownBits(MI->getOperand(1).getReg());
+ if (Op0Known.isNegative())
+ return true;
+
+ if (Op1Known.isNonZero() && Op0Known.isNonZero())
+ return true;
+ }
+ [[fallthrough]];
+ case TargetOpcode::G_UMIN:
+ return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) &&
+ ::isKnownNonZero(MI->getOperand(2).getReg(), MRI, KB, Depth);
+ default:
+ return false;
+ }
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index de3f323891a36a..816f7c3debcd34 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -2655,13 +2655,15 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xff
; CHECK-NOLSE-O1-NEXT: LBB35_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0]
-; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xff
-; CHECK-NOLSE-O1-NEXT: cmp w8, w9
-; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, lo
+; CHECK-NOLSE-O1-NEXT: ldaxrb w10, [x0]
+; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xff
+; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xff
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxtb
+; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, ls
; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB35_1
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2670,13 +2672,15 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i8:
; CHECK-OUTLINE-O1: ; %bb.0:
-; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff
+; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xff
; CHECK-OUTLINE-O1-NEXT: LBB35_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0]
-; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xff
-; CHECK-OUTLINE-O1-NEXT: cmp w8, w9
-; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, lo
+; CHECK-OUTLINE-O1-NEXT: ldaxrb w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xff
+; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xff
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxtb
+; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, ls
; CHECK-OUTLINE-O1-NEXT: stlxrb w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB35_1
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2777,13 +2781,15 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xff
; CHECK-NOLSE-O1-NEXT: LBB36_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0]
-; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xff
-; CHECK-NOLSE-O1-NEXT: cmp w8, w9
-; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, hi
+; CHECK-NOLSE-O1-NEXT: ldxrb w10, [x0]
+; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xff
+; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xff
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxtb
+; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, hi
; CHECK-NOLSE-O1-NEXT: stxrb w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB36_1
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -2792,13 +2798,15 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i8:
; CHECK-OUTLINE-O1: ; %bb.0:
-; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff
+; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xff
; CHECK-OUTLINE-O1-NEXT: LBB36_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-OUTLINE-O1-NEXT: ldxrb w8, [x0]
-; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xff
-; CHECK-OUTLINE-O1-NEXT: cmp w8, w9
-; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, hi
+; CHECK-OUTLINE-O1-NEXT: ldxrb w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xff
+; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xff
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxtb
+; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, hi
; CHECK-OUTLINE-O1-NEXT: stxrb w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB36_1
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -3710,13 +3718,15 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xffff
; CHECK-NOLSE-O1-NEXT: LBB45_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0]
-; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xffff
-; CHECK-NOLSE-O1-NEXT: cmp w8, w9
-; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, lo
+; CHECK-NOLSE-O1-NEXT: ldaxrh w10, [x0]
+; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xffff
+; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xffff
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxth
+; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, ls
; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB45_1
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -3725,13 +3735,15 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i16:
; CHECK-OUTLINE-O1: ; %bb.0:
-; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff
+; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xffff
; CHECK-OUTLINE-O1-NEXT: LBB45_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0]
-; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xffff
-; CHECK-OUTLINE-O1-NEXT: cmp w8, w9
-; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, lo
+; CHECK-OUTLINE-O1-NEXT: ldaxrh w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xffff
+; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xffff
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxth
+; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, ls
; CHECK-OUTLINE-O1-NEXT: stlxrh w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB45_1
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -3832,13 +3844,15 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff
+; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xffff
; CHECK-NOLSE-O1-NEXT: LBB46_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0]
-; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xffff
-; CHECK-NOLSE-O1-NEXT: cmp w8, w9
-; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, hi
+; CHECK-NOLSE-O1-NEXT: ldxrh w10, [x0]
+; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xffff
+; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xffff
+; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxth
+; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, hi
; CHECK-NOLSE-O1-NEXT: stxrh w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB46_1
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -3847,13 +3861,15 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i16:
; CHECK-OUTLINE-O1: ; %bb.0:
-; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff
+; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xffff
; CHECK-OUTLINE-O1-NEXT: LBB46_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-OUTLINE-O1-NEXT: ldxrh w8, [x0]
-; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xffff
-; CHECK-OUTLINE-O1-NEXT: cmp w8, w9
-; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, hi
+; CHECK-OUTLINE-O1-NEXT: ldxrh w10, [x0]
+; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xffff
+; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xffff
+; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxth
+; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, hi
; CHECK-OUTLINE-O1-NEXT: stxrh w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB46_1
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index c6819ff39ed33e..0e4750d3815922 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -919,16 +919,18 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7
+ ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1
+ ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4103, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK-NEXT: liveins: $w9, $x0
+ ; CHECK-NEXT: liveins: $x0, $x1, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
- ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = LDAXRB renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 7
+ ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4103, pcsections !0
+ ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
+ ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 9, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -947,16 +949,18 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7
+ ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1
+ ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4103, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK-NEXT: liveins: $w9, $x0
+ ; CHECK-NEXT: liveins: $x0, $x1, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
- ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = LDXRB renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 7
+ ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4103, pcsections !0
+ ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
+ ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 8, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1172,16 +1176,18 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15
+ ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1
+ ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4111, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK-NEXT: liveins: $w9, $x0
+ ; CHECK-NEXT: liveins: $x0, $x1, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
- ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = LDAXRH renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 15
+ ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4111, pcsections !0
+ ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 8, implicit-def $nzcv, pcsections !0
+ ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 9, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1200,16 +1206,18 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15
+ ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1
+ ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4111, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK-NEXT: liveins: $w9, $x0
+ ; CHECK-NEXT: liveins: $x0, $x1, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
- ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
- ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = LDXRH renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 15
+ ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4111, pcsections !0
+ ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 8, implicit-def $nzcv, pcsections !0
+ ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 8, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
new file mode 100644
index 00000000000000..e0eaa6d63b7fc1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir
@@ -0,0 +1,167 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
+
+---
+name: test_icmp_of_eq_and_right_undef
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_eq_and_right_undef
+ ; CHECK: %res:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_IMPLICIT_DEF
+ %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_of_not_eq_and_right_undef
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_not_eq_and_right_undef
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_IMPLICIT_DEF
+ %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_of_is_eq_and_right_undef
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_is_eq_and_right_undef
+ ; CHECK: %res:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = G_IMPLICIT_DEF
+ %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_of_eq_not_eq
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_eq_not_eq
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x0
+ %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_of_eq_is_eq
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_eq_is_eq
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x0
+ %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_of_zext_and_zext
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_zext_and_zext
+ ; CHECK: %lhs1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs1(s32), %rhs1
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs1:_(s32) = COPY $w0
+ %rhs1:_(s32) = COPY $w0
+ %lhs:_(s64) = G_ZEXT %lhs1
+ %rhs:_(s64) = G_ZEXT %rhs1
+ %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_icmp_of_sext_and_sext
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_icmp_of_sext_and_sext
+ ; CHECK: %lhs1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs1(s32), %rhs1
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs1:_(s32) = COPY $w0
+ %rhs1:_(s32) = COPY $w0
+ %lhs:_(s64) = G_SEXT %lhs1
+ %rhs:_(s64) = G_SEXT %rhs1
+ %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_ugt_icmp_of_sext_and_sext
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_ugt_icmp_of_sext_and_sext
+ ; CHECK: %lhs1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs1(s32), %rhs1
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs1:_(s32) = COPY $w0
+ %rhs1:_(s32) = COPY $w0
+ %lhs:_(s64) = G_SEXT %lhs1
+ %rhs:_(s64) = G_SEXT %rhs1
+ %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs
+ $w0 = COPY %res(s32)
+...
+---
+name: test_uge_icmp_of_zero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_uge_icmp_of_zero
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %res:_(s32) = G_ICMP intpred(uge), %lhs(s64), %zero
+ $w0 = COPY %res(s32)
+...
+---
+name: test_slt_icmp_of_zero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_slt_icmp_of_zero
+ ; CHECK: %lhs:_(s64) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %lhs:_(s64) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero
+ $w0 = COPY %res(s32)
+...
+---
+name: test_ugt_icmp_of_zero_known_non_zero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_ugt_icmp_of_zero_known_non_zero
+ ; CHECK: %res:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %amount:_(s64) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %lhs:_(s64) = G_SHL %zero, %amount
+ %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero
+ $w0 = COPY %res(s32)
+...
+---
+name: test_ugt_icmp_of_zero_xor
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_ugt_icmp_of_zero_xor
+ ; CHECK: %x:_(s64) = COPY $x0
+ ; CHECK-NEXT: %y:_(s64) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %lhs:_(s64) = G_XOR %x, %y
+ ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %x:_(s64) = COPY $x0
+ %y:_(s64) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %lhs:_(s64) = G_XOR %x, %y
+ %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero
+ $w0 = COPY %res(s32)
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 50afc79a5a5768..06e957fdcc6a2a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -457,20 +457,12 @@ sw.bb.i.i:
}
define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_and:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, #5
-; SDISEL-NEXT: ccmp w0, w1, #0, ne
-; SDISEL-NEXT: csel x0, x2, x3, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_and:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5 ; =0x5
-; GISEL-NEXT: cmp w8, w1
-; GISEL-NEXT: ccmp w0, w1, #0, ne
-; GISEL-NEXT: csel x0, x2, x3, lt
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_and:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #0, ne
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
%3 = and i1 %1, %2
@@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
}
define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
-; SDISEL-LABEL: select_or:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, #5
-; SDISEL-NEXT: ccmp w0, w1, #8, eq
-; SDISEL-NEXT: csel x0, x2, x3, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_or:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5 ; =0x5
-; GISEL-NEXT: cmp w8, w1
-; GISEL-NEXT: ccmp w0, w1, #8, eq
-; GISEL-NEXT: csel x0, x2, x3, lt
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_or:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #8, eq
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
%3 = or i1 %1, %2
@@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
}
define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
-; SDISEL-LABEL: select_or_float:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, #5
-; SDISEL-NEXT: ccmp w0, w1, #8, eq
-; SDISEL-NEXT: fcsel s0, s0, s1, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_or_float:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #5 ; =0x5
-; GISEL-NEXT: cmp w8, w1
-; GISEL-NEXT: ccmp w0, w1, #8, eq
-; GISEL-NEXT: fcsel s0, s0, s1, lt
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_or_float:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #8, eq
+; CHECK-NEXT: fcsel s0, s0, s1, lt
+; CHECK-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
%3 = or i1 %1, %2
diff --git a/llvm/test/CodeGen/AArch64/icmp2.ll b/llvm/test/CodeGen/AArch64/icmp2.ll
new file mode 100644
index 00000000000000..1ab8d0e3331b35
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/icmp2.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i1 @i64_i64_canon(i64 %a, i64 %b) {
+; CHECK-SD-LABEL: i64_i64_canon:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp x0, #0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_canon:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp x0, #0
+; CHECK-GI-NEXT: cset w0, ls
+; CHECK-GI-NEXT: ret
+entry:
+ %c = icmp uge i64 0, %a
+ ret i1 %c
+}
+
+define <2 x i1> @i64_i64_canon_2x64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: i64_i64_canon_2x64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, .LCPI1_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %c = icmp uge <2 x i64> <i64 42, i64 11>, %a
+ ret <2 x i1> %c
+}
+
+define i1 @i64_i64_undef_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: i64_i64_undef_eq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+entry:
+ %c = icmp eq i64 %a, undef
+ ret i1 %c
+}
+
+define i1 @i64_i64_slt_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: i64_i64_slt_eq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+entry:
+ %c = icmp slt i64 %a, %a
+ ret i1 %c
+}
+
+define i1 @i64_i64_not_eq_undef(i64 %a, i64 %b) {
+; CHECK-LABEL: i64_i64_not_eq_undef:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+entry:
+ %c = icmp slt i64 %a, undef
+ ret i1 %c
+}
+
+define i1 @i64_i64_sext(i32 %a, i32 %b) {
+; CHECK-SD-LABEL: i64_i64_sext:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: sxtw x8, w0
+; CHECK-SD-NEXT: cmp x8, w1, sxtw
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_sext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w0, lt
+; CHECK-GI-NEXT: ret
+entry:
+ %sextedlhs = sext i32 %a to i64
+ %sextedrhs = sext i32 %b to i64
+ %c = icmp slt i64 %sextedlhs, %sextedrhs
+ ret i1 %c
+}
+
+define i1 @i64_i64_zext(i32 %a, i32 %b) {
+; CHECK-SD-LABEL: i64_i64_zext:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, w0
+; CHECK-SD-NEXT: cmp x8, w1, uxtw
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_zext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
+entry:
+ %zextedlhs = zext i32 %a to i64
+ %zextedrhs = zext i32 %b to i64
+ %c = icmp slt i64 %zextedlhs, %zextedrhs
+ ret i1 %c
+}
+
+define i1 @i64_i64_ule_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_ule_or:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr x8, x2, #0x1
+; CHECK-SD-NEXT: cmp x8, #0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_ule_or:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: ret
+entry:
+ %or = or i64 1, %c
+ %cmp = icmp ule i64 %or, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_ugt_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_ugt_or:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr x8, x2, #0x1
+; CHECK-SD-NEXT: cmp x8, #0
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_ugt_or:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: ret
+entry:
+ %or = or i64 1, %c
+ %cmp = icmp ugt i64 %or, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_eq_or:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr x8, x2, #0x1
+; CHECK-SD-NEXT: cmp x8, #0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_eq_or:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: ret
+entry:
+ %or = or i64 1, %c
+ %cmp = icmp eq i64 %or, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_freeze_or(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_eq_freeze_or:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr x8, x2, #0x1
+; CHECK-SD-NEXT: cmp x8, #0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_eq_freeze_or:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: ret
+entry:
+ %or = or i64 1, %c
+ %free = freeze i64 %or
+ %cmp = icmp eq i64 %free, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_freeze_add(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_eq_freeze_add:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmn x2, #1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+entry:
+ %add = add nuw i64 1, %c
+ %free = freeze i64 %add
+ %cmp = icmp eq i64 %free, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_lshr(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_eq_lshr:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+entry:
+ %lshr = lshr exact i64 1, %c
+ %cmp = icmp eq i64 %lshr, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_eq_zext(i64 %a, i64 %b, i32 %c) {
+; CHECK-SD-LABEL: i64_i64_eq_zext:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr w8, w2, #0x1
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_eq_zext:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: ret
+entry:
+ %or = or i32 1, %c
+ %ze = zext i32 %or to i64
+ %cmp = icmp eq i64 %ze, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_canon_ule(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_canon_ule:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: ret
+entry:
+ %cmp = icmp ule i64 0, %a
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_canon_ugt(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_canon_ugt:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+entry:
+ %cmp = icmp ugt i64 0, %a
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_trunc_eq(i64 %a, i64 %b, i64 %c) {
+; CHECK-SD-LABEL: i64_i64_trunc_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr w8, w2, #0x1
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i64_i64_trunc_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: ret
+entry:
+ %or = or i64 1, %c
+ %tr = trunc nsw i64 %or to i32
+ %cmp = icmp eq i32 %tr, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_umin_eq(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_umin_eq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr x8, x2, #0x1
+; CHECK-NEXT: orr x9, x2, #0x2
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lo
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+entry:
+ %or1 = or i64 1, %c
+ %or2 = or i64 2, %c
+ %umin = call i64 @llvm.umin.i64(i64 %or1, i64 %or2)
+ %cmp = icmp eq i64 %umin, 0
+ ret i1 %cmp
+}
+
+define i1 @i64_i64_smin_eq(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: i64_i64_smin_eq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr x8, x2, #0x1
+; CHECK-NEXT: orr x9, x2, #0x2
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+entry:
+ %or1 = or i64 1, %c
+ %or2 = or i64 2, %c
+ %smin = call i64 @llvm.smin.i64(i64 %or1, i64 %or2)
+ %cmp = icmp eq i64 %smin, 0
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index b1cdf553b72423..0b66185d25f3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) {
; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
@@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) {
; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
@@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) {
; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
@@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20
; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7]
; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5]
; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20
; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7]
; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5]
; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
@@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20
; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7]
; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9]
@@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5]
; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17]
-; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17
+; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20
; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c5198cdb421a50..98f09db4925ec8 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -138,7 +138,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_cbranch_execz .LBB0_14
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
@@ -165,8 +165,8 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5
; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v8
; GISEL-NEXT: ; implicit-def: $vgpr4
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -177,16 +177,16 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr5
; GISEL-NEXT: ; implicit-def: $vgpr2
; GISEL-NEXT: ; %bb.3: ; %Flow3
-; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_13
; GISEL-NEXT: ; %bb.4: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v8
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
; GISEL-NEXT: s_cbranch_execz .LBB0_8
; GISEL-NEXT: ; %bb.5: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v8
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GISEL-NEXT: s_cbranch_execz .LBB0_7
; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
@@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -231,13 +228,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
; GISEL-NEXT: .LBB0_7: ; %Flow1
-; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT: s_or_b64 exec, exec, s[10:11]
; GISEL-NEXT: .LBB0_8: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9]
; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
@@ -246,20 +243,20 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v7, v8
; GISEL-NEXT: ; %bb.12: ; %Flow
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB0_13: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT: .LBB0_13: ; %Flow4
+; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6
; GISEL-NEXT: v_lshl_add_u32 v1, v7, 23, 1.0
; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4
; GISEL-NEXT: v_or3_b32 v4, v2, v0, v1
; GISEL-NEXT: .LBB0_14: ; %Flow5
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
%cvt = sitofp i128 %x to float
@@ -392,7 +389,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_cbranch_execz .LBB1_14
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
@@ -410,8 +407,8 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v6, 0x7f, v5
; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v7
; GISEL-NEXT: ; implicit-def: $vgpr4
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -422,16 +419,16 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr5
; GISEL-NEXT: ; implicit-def: $vgpr2
; GISEL-NEXT: ; %bb.3: ; %Flow3
-; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_13
; GISEL-NEXT: ; %bb.4: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
; GISEL-NEXT: s_cbranch_execz .LBB1_8
; GISEL-NEXT: ; %bb.5: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GISEL-NEXT: s_cbranch_execz .LBB1_7
; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
@@ -457,13 +454,10 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc
; GISEL-NEXT: v_and_b32_e32 v2, v8, v2
; GISEL-NEXT: v_and_b32_e32 v3, v9, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -476,13 +470,13 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
; GISEL-NEXT: .LBB1_7: ; %Flow1
-; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT: s_or_b64 exec, exec, s[10:11]
; GISEL-NEXT: .LBB1_8: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9]
; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
@@ -491,19 +485,19 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v6, v7
; GISEL-NEXT: ; %bb.12: ; %Flow
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB1_13: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT: .LBB1_13: ; %Flow4
+; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_lshl_add_u32 v0, v6, 23, 1.0
; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff
; GISEL-NEXT: v_and_or_b32 v4, v4, v1, v0
; GISEL-NEXT: .LBB1_14: ; %Flow5
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
%cvt = uitofp i128 %x to float
@@ -744,13 +738,10 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v17, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v15, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v11, v0, v2
@@ -1021,13 +1012,10 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v17, v5, v13
; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc
; GISEL-NEXT: v_and_b32_e32 v2, v4, v2
; GISEL-NEXT: v_and_b32_e32 v3, v5, v3
; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2
@@ -1229,7 +1217,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_cbranch_execz .LBB4_14
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
@@ -1256,8 +1244,8 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5
; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v8
; GISEL-NEXT: ; implicit-def: $vgpr4
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -1268,16 +1256,16 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr5
; GISEL-NEXT: ; implicit-def: $vgpr2
; GISEL-NEXT: ; %bb.3: ; %Flow3
-; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB4_13
; GISEL-NEXT: ; %bb.4: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v8
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
; GISEL-NEXT: s_cbranch_execz .LBB4_8
; GISEL-NEXT: ; %bb.5: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v8
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GISEL-NEXT: s_cbranch_execz .LBB4_7
; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
@@ -1303,13 +1291,10 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -1322,13 +1307,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
; GISEL-NEXT: .LBB4_7: ; %Flow1
-; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT: s_or_b64 exec, exec, s[10:11]
; GISEL-NEXT: .LBB4_8: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9]
; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
@@ -1337,21 +1322,21 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v7, v8
; GISEL-NEXT: ; %bb.12: ; %Flow
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB4_13: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT: .LBB4_13: ; %Flow4
+; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6
; GISEL-NEXT: v_lshl_add_u32 v1, v7, 23, 1.0
; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4
; GISEL-NEXT: v_or3_b32 v0, v2, v0, v1
; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0
; GISEL-NEXT: .LBB4_14: ; %Flow5
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
%cvt = sitofp i128 %x to half
@@ -1485,7 +1470,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v4, s4
-; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_cbranch_execz .LBB5_14
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
@@ -1503,8 +1488,8 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_sub_u32_e32 v6, 0x7f, v5
; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v7
; GISEL-NEXT: ; implicit-def: $vgpr4
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
@@ -1515,16 +1500,16 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr5
; GISEL-NEXT: ; implicit-def: $vgpr2
; GISEL-NEXT: ; %bb.3: ; %Flow3
-; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB5_13
; GISEL-NEXT: ; %bb.4: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
+; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
; GISEL-NEXT: s_cbranch_execz .LBB5_8
; GISEL-NEXT: ; %bb.5: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GISEL-NEXT: s_cbranch_execz .LBB5_7
; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
@@ -1550,13 +1535,10 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v15, v9, v11
; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc
; GISEL-NEXT: v_and_b32_e32 v2, v8, v2
; GISEL-NEXT: v_and_b32_e32 v3, v9, v3
; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2
@@ -1569,13 +1551,13 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
; GISEL-NEXT: .LBB5_7: ; %Flow1
-; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
+; GISEL-NEXT: s_or_b64 exec, exec, s[10:11]
; GISEL-NEXT: .LBB5_8: ; %Flow2
-; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
+; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9]
; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
@@ -1584,20 +1566,20 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v6, v7
; GISEL-NEXT: ; %bb.12: ; %Flow
-; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB5_13: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
+; GISEL-NEXT: .LBB5_13: ; %Flow4
+; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_lshl_add_u32 v0, v6, 23, 1.0
; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff
; GISEL-NEXT: v_and_or_b32 v0, v4, v1, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0
; GISEL-NEXT: .LBB5_14: ; %Flow5
-; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
+; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
%cvt = uitofp i128 %x to half
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index bd6e1f54e636d8..8f4a4b5afcdc1e 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20
; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7]
; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
@@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9]
; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20
; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13]
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
@@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7]
; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0
; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
@@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19]
; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0x3ff00000
; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15]
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v19
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v8
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0
@@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
@@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7]
; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3]
; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3
-; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9
+; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10
; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
>From f311097dfd50e42f18796d1124f501268c2b720d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 25 Aug 2024 19:46:44 +0200
Subject: [PATCH 2/5] remove debug leftover
---
.../CodeGen/GlobalISel/CombinerHelperCompares.cpp | 13 -------------
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 3 ---
2 files changed, 16 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index 415768fb07e59f..9fa7c347d917e7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -187,19 +187,6 @@ bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI,
Register RHS = Cmp->getRHSReg();
CmpInst::Predicate Pred = Cmp->getCond();
- /*
- %x:_(p0) = COPY $x0
- %y:_(p0) = COPY $x1
- %zero:_(p0) = G_CONSTANT i64 0
- %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_
- */
-
- if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer())
- return false;
-
- if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar())
- return false;
-
GZext *ZL = cast<GZext>(MRI.getVRegDef(LHS));
GZext *ZR = cast<GZext>(MRI.getVRegDef(RHS));
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index e8b9d995a22768..1110404bd43e29 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -2040,9 +2040,6 @@ bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
if (Ty.isPointer())
return false;
- if (!Ty.isScalar())
- errs() << "type: " << Ty << '\n';
-
assert(Ty.isScalar() && "Expected a scalar value");
return ::isKnownNonZero(Reg, MRI, KB, Depth);
}
>From 64bd908974f5745f213789813c6b4ea3f802f4c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 25 Aug 2024 22:29:45 +0200
Subject: [PATCH 3/5] style fix
---
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1110404bd43e29..8a936295323e8b 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -2164,7 +2164,7 @@ static bool isKnownNonZeroCastOp(const GCastOp &CastOp,
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
// ext X != 0 if X != 0.
- return isKnownNonZero(CastOp.getSrcReg(), MRI, KB);
+ return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth);
case Instruction::Trunc:
// nuw/nsw trunc preserves zero/non-zero status of input.
if (CastOp.getFlag(MachineInstr::MIFlag::NoSWrap) ||
>From f6785ce4dd2a55d3f10044c9849f4130e92d961f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Mon, 26 Aug 2024 07:05:36 +0200
Subject: [PATCH 4/5] address review comments
---
.../include/llvm/Target/GlobalISel/Combine.td | 4 +-
.../GlobalISel/CombinerHelperCompares.cpp | 3 +-
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 46 +++++++------------
3 files changed, 20 insertions(+), 33 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 175a8ed57b2669..ef4ca8a2552f4c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1981,8 +1981,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
-def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines,
- vector_ops_combines, freeze_combines, cast_combines,
+def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines,
+ trivial_combines, vector_ops_combines, freeze_combines, cast_combines,
insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
combine_extracted_vector_load,
undef_combines, identity_combines, phi_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index 9fa7c347d917e7..57f4804fa10f6b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements CombinerHelper for G_ICMP
+// This file implements CombinerHelper for G_ICMP.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -116,7 +116,6 @@ bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
- [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS);
MachineInstr *MIRHS = MRI.getVRegDef(RHS);
// For EQ and NE, we can always pick a value for the undef to make the
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8a936295323e8b..fa18c87a2eef56 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -2046,34 +2046,22 @@ bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
static bool matchOpWithOpEqZero(Register Op0, Register Op1,
const MachineRegisterInfo &MRI) {
- MachineInstr *MI = MRI.getVRegDef(Op0);
-
- bool Result = false;
-
- if (GZextOrSextOp *ZS = dyn_cast<GZextOrSextOp>(MI)) {
- MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg());
- if (GICmp *Cmp = dyn_cast<GICmp>(SrcMI)) {
- std::optional<ValueAndVReg> MayBeConstant =
- getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI);
- if (MayBeConstant)
- Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op1) &&
- (Cmp->getCond() == ICmpInst::ICMP_EQ);
- }
- }
-
- MI = MRI.getVRegDef(Op1);
- if (GZextOrSextOp *ZS = dyn_cast<GZextOrSextOp>(MI)) {
- MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg());
- if (GICmp *Cmp = dyn_cast<GICmp>(SrcMI)) {
- std::optional<ValueAndVReg> MayBeConstant =
- getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI);
- if (MayBeConstant)
- Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op0) &&
- (Cmp->getCond() == ICmpInst::ICMP_EQ);
+ auto MatchIt = [&MRI](const Register Reg0, const Register Reg1) {
+ MachineInstr *MI = MRI.getVRegDef(Reg0);
+ if (GZextOrSextOp *ZS = dyn_cast<GZextOrSextOp>(MI)) {
+ MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg());
+ if (GICmp *Cmp = dyn_cast<GICmp>(SrcMI)) {
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI);
+ return MayBeConstant && (MayBeConstant->Value == 0) &&
+ (Cmp->getLHSReg() == Reg1) &&
+ (Cmp->getCond() == ICmpInst::ICMP_EQ);
+ }
}
- }
+ return false;
+ };
- return Result;
+ return MatchIt(Op0, Op1) || MatchIt(Op1, Op0);
}
static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI,
@@ -2084,7 +2072,7 @@ static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI,
Register LHS = Add.getLHSReg();
Register RHS = Add.getRHSReg();
- // (X + (X != 0)) is non zero
+ // (X + (X != 0)) is non zero.
if (matchOpWithOpEqZero(LHS, RHS, MRI))
return true;
@@ -2131,12 +2119,12 @@ static bool isKnownNonZeroBinOp(const GBinOp &BinOp,
unsigned BitWidth = MRI.getType(BinOp.getReg(0)).getScalarSizeInBits();
switch (BinOp.getOpcode()) {
case TargetOpcode::G_XOR:
- // (X ^ (X != 0)) is non zero
+ // (X ^ (X != 0)) is non zero.
if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI))
return true;
break;
case TargetOpcode::G_OR: {
- // (X | (X != 0)) is non zero
+ // (X | (X != 0)) is non zero.
if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI))
return true;
// X | Y != 0 if X != 0 or Y != 0.
>From 08b3a999e987b31cbf3726206c8a534b621bc59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Mon, 26 Aug 2024 08:02:34 +0200
Subject: [PATCH 5/5] remove switch
---
.../GlobalISel/CombinerHelperCompares.cpp | 38 +------------------
1 file changed, 2 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
index 57f4804fa10f6b..88e1eb03043350 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstdlib>
@@ -45,42 +46,7 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
APInt LHS = LHSCst.getScalarValue();
APInt RHS = RHSCst.getScalarValue();
- bool Result;
-
- switch (Pred) {
- case CmpInst::Predicate::ICMP_EQ:
- Result = LHS.eq(RHS);
- break;
- case CmpInst::Predicate::ICMP_NE:
- Result = LHS.ne(RHS);
- break;
- case CmpInst::Predicate::ICMP_UGT:
- Result = LHS.ugt(RHS);
- break;
- case CmpInst::Predicate::ICMP_UGE:
- Result = LHS.uge(RHS);
- break;
- case CmpInst::Predicate::ICMP_ULT:
- Result = LHS.ult(RHS);
- break;
- case CmpInst::Predicate::ICMP_ULE:
- Result = LHS.ule(RHS);
- break;
- case CmpInst::Predicate::ICMP_SGT:
- Result = LHS.sgt(RHS);
- break;
- case CmpInst::Predicate::ICMP_SGE:
- Result = LHS.sge(RHS);
- break;
- case CmpInst::Predicate::ICMP_SLT:
- Result = LHS.slt(RHS);
- break;
- case CmpInst::Predicate::ICMP_SLE:
- Result = LHS.sle(RHS);
- break;
- default:
- llvm_unreachable("Unexpected predicate");
- }
+ bool Result = ICmpInst::compare(LHS, RHS, Pred);
MatchInfo = [=](MachineIRBuilder &B) {
if (Result)
More information about the llvm-commits
mailing list