[llvm] 364f781 - [GlobalIsel] Combine logic of icmps (#77855)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 06:58:06 PST 2024
Author: Thorsten Schütt
Date: 2024-02-06T15:58:02+01:00
New Revision: 364f781344e11d6a781ebdd0a4d9689bc9c51cfb
URL: https://github.com/llvm/llvm-project/commit/364f781344e11d6a781ebdd0a4d9689bc9c51cfb
DIFF: https://github.com/llvm/llvm-project/commit/364f781344e11d6a781ebdd0a4d9689bc9c51cfb.diff
LOG: [GlobalIsel] Combine logic of icmps (#77855)
Inspired by InstCombinerImpl::foldAndOrOfICmpsUsingRanges with some
adaptations to MIR.
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/arm64-ccmp.ll
llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 90428a622b417..10eeafdd09a8e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -814,6 +814,12 @@ class CombinerHelper {
/// Combine selects.
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Combine ands,
+ bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Combine ors,
+ bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -919,6 +925,12 @@ class CombinerHelper {
bool AllowUndefs);
std::optional<APInt> getConstantOrConstantSplatVector(Register Src);
+
+ /// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
+ /// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
+ /// into a single comparison using range-based reasoning.
+ bool tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
+ BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index eabbe688a1c76..f5a6528d10a97 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -592,6 +592,134 @@ class GPhi : public GenericMachineInstr {
}
};
+/// Represents a binary operation, i.e, x = y op z.
+class GBinOp : public GenericMachineInstr {
+public:
+ Register getLHSReg() const { return getReg(1); }
+ Register getRHSReg() const { return getReg(2); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ // Integer.
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ // Floating point.
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FPOW:
+ // Logical.
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
+/// Represents an integer binary operation.
+class GIntBinOp : public GBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
+/// Represents a floating point binary operation.
+class GFBinOp : public GBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FPOW:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
+/// Represents a logical binary operation.
+class GLogicalBinOp : public GBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
+/// Represents an integer addition.
+class GAdd : public GIntBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_ADD;
+ };
+};
+
+/// Represents a logical and.
+class GAnd : public GLogicalBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_AND;
+ };
+};
+
+/// Represents a logical or.
+class GOr : public GLogicalBinOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_OR;
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6bda80681432a..9b0e1b0d7c4f9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1241,6 +1241,18 @@ def match_selects : GICombineRule<
[{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def match_ands : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchAnd(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+def match_ors : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return Helper.matchOr(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1314,7 +1326,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
- fsub_to_fneg, commute_constant_to_rhs]>;
+ fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 772229215e798..1b199cfd41d23 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -28,10 +28,12 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DivisionByConstantInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
@@ -6651,3 +6653,181 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
return false;
}
+
+/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
+/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
+/// into a single comparison using range-based reasoning.
+/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
+bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
+ BuildFnTy &MatchInfo) {
+ assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
+ bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
+ Register DstReg = Logic->getReg(0);
+ Register LHS = Logic->getLHSReg();
+ Register RHS = Logic->getRHSReg();
+ unsigned Flags = Logic->getFlags();
+
+ // We need an G_ICMP on the LHS register.
+ GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
+ if (!Cmp1)
+ return false;
+
+ // We need an G_ICMP on the RHS register.
+ GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
+ if (!Cmp2)
+ return false;
+
+ // We want to fold the icmps.
+ if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
+ return false;
+
+ APInt C1;
+ APInt C2;
+ std::optional<ValueAndVReg> MaybeC1 =
+ getIConstantVRegValWithLookThrough(Cmp1->getRHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+ C1 = MaybeC1->Value;
+
+ std::optional<ValueAndVReg> MaybeC2 =
+ getIConstantVRegValWithLookThrough(Cmp2->getRHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+ C2 = MaybeC2->Value;
+
+ Register R1 = Cmp1->getLHSReg();
+ Register R2 = Cmp2->getLHSReg();
+ CmpInst::Predicate Pred1 = Cmp1->getCond();
+ CmpInst::Predicate Pred2 = Cmp2->getCond();
+ LLT CmpTy = MRI.getType(Cmp1->getReg(0));
+ LLT CmpOperandTy = MRI.getType(R1);
+
+ // We build ands, adds, and constants of type CmpOperandTy.
+ // They must be legal to build.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
+ !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
+ return false;
+
+ // Look through add of a constant offset on R1, R2, or both operands. This
+ // allows us to interpret the R + C' < C'' range idiom into a proper range.
+ std::optional<APInt> Offset1;
+ std::optional<APInt> Offset2;
+ if (R1 != R2) {
+ if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
+ std::optional<ValueAndVReg> MaybeOffset1 =
+ getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI);
+ if (MaybeOffset1) {
+ R1 = Add->getLHSReg();
+ Offset1 = MaybeOffset1->Value;
+ }
+ }
+ if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
+ std::optional<ValueAndVReg> MaybeOffset2 =
+ getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI);
+ if (MaybeOffset2) {
+ R2 = Add->getLHSReg();
+ Offset2 = MaybeOffset2->Value;
+ }
+ }
+ }
+
+ if (R1 != R2)
+ return false;
+
+ // We calculate the icmp ranges including maybe offsets.
+ ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
+ if (Offset1)
+ CR1 = CR1.subtract(*Offset1);
+
+ ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
+ if (Offset2)
+ CR2 = CR2.subtract(*Offset2);
+
+ bool CreateMask = false;
+ APInt LowerDiff;
+ std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
+ if (!CR) {
+ // We need non-wrapping ranges.
+ if (CR1.isWrappedSet() || CR2.isWrappedSet())
+ return false;
+
+ // Check whether we have equal-size ranges that only
diff er by one bit.
+ // In that case we can apply a mask to map one range onto the other.
+ LowerDiff = CR1.getLower() ^ CR2.getLower();
+ APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
+ APInt CR1Size = CR1.getUpper() - CR1.getLower();
+ if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
+ CR1Size != CR2.getUpper() - CR2.getLower())
+ return false;
+
+ CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
+ CreateMask = true;
+ }
+
+ if (IsAnd)
+ CR = CR->inverse();
+
+ CmpInst::Predicate NewPred;
+ APInt NewC, Offset;
+ CR->getEquivalentICmp(NewPred, NewC, Offset);
+
+ // We take the result type of one of the original icmps, CmpTy, for
+ // the to be build icmp. The operand type, CmpOperandTy, is used for
+ // the other instructions and constants to be build. The types of
+ // the parameters and output are the same for add and and. CmpTy
+ // and the type of DstReg might
diff er. That is why we zext or trunc
+ // the icmp into the destination register.
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (CreateMask && Offset != 0) {
+ auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
+ auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
+ auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
+ auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else if (CreateMask && Offset == 0) {
+ auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
+ auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else if (!CreateMask && Offset != 0) {
+ auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
+ auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else if (!CreateMask && Offset == 0) {
+ auto NewCon = B.buildConstant(CmpOperandTy, NewC);
+ auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
+ B.buildZExtOrTrunc(DstReg, ICmp);
+ } else {
+ llvm_unreachable("unexpected configuration of CreateMask and Offset");
+ }
+ };
+ return true;
+}
+
+bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GAnd *And = cast<GAnd>(&MI);
+
+ if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
+ return true;
+
+ return false;
+}
+
+bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ GOr *Or = cast<GOr>(&MI);
+
+ if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
+ return true;
+
+ return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
new file mode 100644
index 0000000000000..f667a83bf21a8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
@@ -0,0 +1,262 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+---
+# icmp (x, 1) && icmp (x, 2) -> x
+name: test_icmp_and_icmp
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_and_icmp
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %one:_(s64) = G_CONSTANT i64 1
+ %two:_(s64) = G_CONSTANT i64 2
+ %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one
+ %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two
+ %and:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ $x0 = COPY %zext
+...
+---
+# multi use icmp (x, 1) && icmp (x, 2) -> x
+name: multi_use_test_icmp_and_icmp
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: multi_use_test_icmp_and_icmp
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %two:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: %cmp1:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one
+ ; CHECK-NEXT: %cmp2:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), %two
+ ; CHECK-NEXT: %and:_(s1) = G_AND %cmp1, %cmp2
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %and(s1)
+ ; CHECK-NEXT: %cmp1zext:_(s64) = G_ZEXT %cmp1(s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ ; CHECK-NEXT: $x0 = COPY %cmp1zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %one:_(s64) = G_CONSTANT i64 1
+ %two:_(s64) = G_CONSTANT i64 2
+ %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one
+ %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two
+ %and:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ %cmp1zext:_(s64) = G_ZEXT %cmp1(s1)
+ $x0 = COPY %zext
+ $x0 = COPY %cmp1zext
+...
+---
+# icmp (x, 1) && icmp (x, add(x, 2)) -> x
+name: test_icmp_and_icmp_with_add
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_and_icmp_with_add
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %cmp1:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %cmp1(s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %one:_(s64) = G_CONSTANT i64 1
+ %two:_(s64) = G_CONSTANT i64 2
+ %add:_(s64) = G_ADD %0(s64), %two
+ %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one
+ %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %add
+ %and:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ $x0 = COPY %zext
+...
+---
+# icmp (x, 1) && icmp (x, add(x, 2000)) -> x
+name: test_icmp_or_icmp_with_add_2000
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_or_icmp_with_add_2000
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 -100
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %one:_(s64) = G_CONSTANT i64 -100
+ %two:_(s64) = G_CONSTANT i64 2000
+ %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one
+ %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two
+ %or:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %or(s1)
+ $x0 = COPY %zext
+...
+---
+# icmp (x, -100) || icmp (x, 2000) -> x
+name: test_icmp_or_icmp
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_or_icmp
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: %two:_(s64) = G_CONSTANT i64 2000
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), %two
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %one:_(s64) = G_CONSTANT i64 -100
+ %two:_(s64) = G_CONSTANT i64 2000
+ %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one
+ %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two
+ %or:_(s1) = G_OR %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %or(s1)
+ $x0 = COPY %zext
+...
+---
+# offset icmp (x, -100) || icmp (x, 2000) -> x
+name: test_icmp_or_icmp_offset
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_or_icmp_offset
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2001
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2101
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s64), [[C1]]
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %one:_(s64) = G_CONSTANT i64 -100
+ %two:_(s64) = G_CONSTANT i64 2000
+ %cmp1:_(s1) = G_ICMP intpred(slt), %0(s64), %one
+ %cmp2:_(s1) = G_ICMP intpred(sgt), %0(s64), %two
+ %or:_(s1) = G_OR %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %or(s1)
+ $x0 = COPY %zext
+...
+---
+# icmp (x, add(x, 9) || icmp (x, add(x, 2)) -> x
+name: test_icmp_or_icmp_with_add_and_add
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_or_icmp_with_add_and_add
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: %two:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: %add2:_(s64) = G_ADD [[COPY]], %two
+ ; CHECK-NEXT: %cmp2:_(s1) = G_ICMP intpred(ne), [[COPY1]](s64), %add2
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %cmp2(s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %nine:_(s64) = G_CONSTANT i64 9
+ %two:_(s64) = G_CONSTANT i64 2
+ %add1:_(s64) = G_ADD %0(s64), %nine
+ %add2:_(s64) = G_ADD %0(s64), %two
+ %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %add1
+ %cmp2:_(s1) = G_ICMP intpred(ne), %1(s64), %add2
+ %and:_(s1) = G_OR %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ $x0 = COPY %zext
+...
+---
+# icmp (x, 9) && icmp (x, 2)) -> x
+# buildConstant 0
+# buildICmp ult, R1, NewC
+# buildZExtOrTrunc -> COPY
+# erase G_AND
+# x > 9 && x < 2 => false
+name: test_icmp_and_icmp_9_2
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_and_icmp_9_2
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY [[C]](s64)
+ %0:_(s64) = COPY $x0
+ %nine:_(s64) = G_CONSTANT i64 9
+ %two:_(s64) = G_CONSTANT i64 2
+ %cmp1:_(s1) = G_ICMP intpred(sgt), %0(s64), %nine
+ %cmp2:_(s1) = G_ICMP intpred(slt), %0(s64), %two
+ %and:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ $x0 = COPY %zext
+...
+---
+# icmp (x, v1) && icmp (x, v2)) -> x
+name: test_icmp_and_icmp_with_vectors
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_icmp_and_icmp_with_vectors
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
+ ; CHECK-NEXT: %v1:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
+ ; CHECK-NEXT: %v2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64)
+ ; CHECK-NEXT: %v3:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64)
+ ; CHECK-NEXT: %v4:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64)
+ ; CHECK-NEXT: %cmp1:_(<2 x s1>) = G_ICMP intpred(ne), %v1(<2 x s64>), %v2
+ ; CHECK-NEXT: %cmp2:_(<2 x s1>) = G_ICMP intpred(eq), %v3(<2 x s64>), %v4
+ ; CHECK-NEXT: %and:_(<2 x s1>) = G_AND %cmp1, %cmp2
+ ; CHECK-NEXT: %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
+ ; CHECK-NEXT: $q0 = COPY %zext(<2 x s64>)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %5:_(s64) = COPY $x5
+ %6:_(s64) = COPY $x6
+ %7:_(s64) = COPY $x7
+ %nine:_(s64) = G_CONSTANT i64 9
+ %two:_(s64) = G_CONSTANT i64 2
+ %v1:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64)
+ %v2:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64)
+ %v3:_(<2 x s64>) = G_BUILD_VECTOR %4(s64), %5(s64)
+ %v4:_(<2 x s64>) = G_BUILD_VECTOR %6(s64), %7(s64)
+ %cmp1:_(<2 x s1>) = G_ICMP intpred(ne), %v1(<2 x s64>), %v2
+ %cmp2:_(<2 x s1>) = G_ICMP intpred(eq), %v3(<2 x s64>), %v4
+ %and:_(<2 x s1>) = G_AND %cmp1, %cmp2
+ %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
+ $q0 = COPY %zext
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 446526986b883..5d3b2d3649e1b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -635,19 +635,7 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
;
; GISEL-LABEL: select_noccmp1:
; GISEL: ; %bb.0:
-; GISEL-NEXT: cmp x0, #0
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp x0, #13
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: cmp x2, #2
-; GISEL-NEXT: cset w10, lt
-; GISEL-NEXT: cmp x2, #4
-; GISEL-NEXT: cset w11, gt
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: and w9, w10, w11
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel x0, xzr, x3, ne
+; GISEL-NEXT: mov x0, x3
; GISEL-NEXT: ret
%c0 = icmp slt i64 %v1, 0
%c1 = icmp sgt i64 %v1, 13
@@ -677,11 +665,8 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
;
; GISEL-LABEL: select_noccmp2:
; GISEL: ; %bb.0:
-; GISEL-NEXT: cmp x0, #0
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp x0, #13
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: cmp x0, #14
+; GISEL-NEXT: cset w8, hs
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel x0, xzr, x3, ne
; GISEL-NEXT: sbfx w8, w8, #0, #1
@@ -719,25 +704,14 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
;
; GISEL-LABEL: select_noccmp3:
; GISEL: ; %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w0, #13
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: cmp w0, #22
-; GISEL-NEXT: cset w10, lt
-; GISEL-NEXT: cmp w0, #44
-; GISEL-NEXT: cset w11, gt
-; GISEL-NEXT: cmp w0, #99
-; GISEL-NEXT: cset w12, eq
+; GISEL-NEXT: mov w8, #99 ; =0x63
+; GISEL-NEXT: sub w9, w0, #45
+; GISEL-NEXT: mov w10, #-23 ; =0xffffffe9
; GISEL-NEXT: cmp w0, #77
-; GISEL-NEXT: cset w13, eq
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: orr w9, w12, w13
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel w0, w1, w2, ne
+; GISEL-NEXT: ccmp w0, w8, #4, ne
+; GISEL-NEXT: ccmp w9, w10, #2, eq
+; GISEL-NEXT: ccmp w0, #14, #0, lo
+; GISEL-NEXT: csel w0, w1, w2, hs
; GISEL-NEXT: ret
%c0 = icmp slt i32 %v0, 0
%c1 = icmp sgt i32 %v0, 13
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index b4a8193d2377a..ec3c08ec79523 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -183,12 +183,10 @@ define i1 @snan_f16(half %x) nounwind {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x1ff
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: snan_f16:
@@ -894,11 +892,9 @@ define i1 @not_isnan_f16(half %x) {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c01
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: not_isnan_f16:
@@ -1542,10 +1538,8 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v1, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
+; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
@@ -2103,11 +2097,9 @@ define i1 @ispositive_f16(half %x) {
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c01
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: ispositive_f16:
@@ -2294,13 +2286,12 @@ define i1 @not_isnegative_f16(half %x) {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c01
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v2
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
+; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2355,11 +2346,10 @@ define i1 @iszero_or_nan_f16(half %x) {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8400
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: iszero_or_nan_f16:
@@ -2414,11 +2404,10 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8400
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: iszero_or_nan_f_daz:
@@ -2473,11 +2462,10 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8400
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: iszero_or_nan_f_maybe_daz:
@@ -2733,11 +2721,10 @@ define i1 @iszero_or_qnan_f16(half %x) {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff8200, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8201
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: iszero_or_qnan_f16:
@@ -2795,13 +2782,11 @@ define i1 @iszero_or_snan_f16(half %x) {
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v1
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x1ff
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -2875,15 +2860,14 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
-; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2
-; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, 0xffff83ff, v1
+; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x1ff
+; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
+; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@@ -3020,10 +3004,8 @@ define i1 @isinf_or_nan_f16(half %x) {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: isinf_or_nan_f16:
@@ -3132,10 +3114,8 @@ define i1 @isfinite_or_nan_f(half %x) {
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
-; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
-; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
-; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
+; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: isfinite_or_nan_f:
More information about the llvm-commits
mailing list