[llvm] [X86] Fold concat(PCMP*(), PCMP*()) -> CMPPS(concat, concat) on AVX1 targets (PR #95915)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 05:54:54 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/95915
This is a more restricted solution to #82242 (vs the more general #82290 + #84360) whereby if we're concat'ing PCMPEQ/GT nodes to 256-bits on a AVX1 target then determine if the integer values are in bounds to allow them to be converted to FP for a (legal) float comparison.
By performing this inside combineConcatVectorOps and working on PCMPEQ/GT nodes and not ICMP, we delay the fold until after more lowering has occurred, which avoids many of the issues where we were getting 'stuck' with CMPPS or unnecessary 256-bit nodes, and can more easily determine if either of the new concats() will be free.
Additionally this patch requires BOTH comparison operands to be in range, while technically not required this does help avoid the remaining regressions. It doesn't require that one of the operands is constant, it didn't seem necessary to include that constraint.
I've reused some of the code from #82290, and we may be able to add additional functionality (more CondCode patterns, v4i64/v4f64 handling, 'bitcastable' integers etc.) in future patches.
Fixes #82242
>From 411feda5e64583a34bde1152b8c0d11fe3a7fa9e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 18 Jun 2024 13:45:29 +0100
Subject: [PATCH] [X86] Fold concat(PCMP*(),PCMP*()) -> CMPPS(concat,concat) on
AVX1 targets
This is a more restricted solution to #82242 (vs the more general #82290 + #84360) whereby if we're concat'ing PCMPEQ/GT nodes to 256-bits on a AVX1 target then determine if the integer values are in bounds to allow them to be converted to FP for a (legal) float comparison.
By performing this inside combineConcatVectorOps and working on PCMPEQ/GT nodes and not ICMP, we delay the fold until after more lowering has occurred, which avoids many of the issues where we were getting 'stuck' with CMPPS or unnecessary 256-bit nodes, and can more easily determine if either of the new concats() will be free.
Additionally this patch requires both comparison operands to be in range, while technically not required this does help avoid the remaining regressions. It doesn't require that one of the operands is constant, it didn't seem necessary to include that constraint.
I've reused some of the code from #82290, and we may be able to add additional functionality (more CondCode patterns, v4i64/v4f64 handling, 'bitcastable' integers etc.) in future patches.
Fixes #82242
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 81 +-
.../X86/bitcast-int-to-vector-bool-sext.ll | 27 +-
.../X86/bitcast-int-to-vector-bool-zext.ll | 27 +-
llvm/test/CodeGen/X86/cmpf-avx.ll | 15 +-
.../CodeGen/X86/vector-popcnt-256-ult-ugt.ll | 3111 ++++++++---------
llvm/test/CodeGen/X86/vector-sext.ll | 9 +-
llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 17 +-
7 files changed, 1653 insertions(+), 1634 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f27c935812f51..0ad30490772c8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55716,6 +55716,38 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Helper to determine if we can convert an integer comparison to a float
+// comparison byt casting the operands.
+static std::optional<unsigned> CastIntSETCCtoFP(MVT VT, ISD::CondCode CC,
+ const KnownBits &LHS,
+ const KnownBits &RHS) {
+ MVT SVT = VT.getScalarType();
+ assert(SVT == MVT::f32 && "Only tested for float so far");
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(SVT);
+ assert((CC == ISD::SETEQ || CC == ISD::SETGT) &&
+ "Only PCMPEQ/PCMPGT currently supported");
+
+ // TODO: Handle bitcastable integers.
+
+ // For cvt + signed compare we need:
+ // abs(lhs) < MaxConvertableCvt and abs(rhs) < MaxConvertableCvt
+ auto OpInAbsRange = [](const KnownBits &Known, const APInt &Bound) {
+ if (Known.isUnknown() ||
+ !KnownBits::slt(Known, KnownBits::makeConstant(Bound)) ||
+ !KnownBits::sgt(Known, KnownBits::makeConstant(-Bound)))
+ return false;
+ return true;
+ };
+ APInt MaxConvertableCvt = APInt::getOneBitSet(
+ SVT.getScalarSizeInBits(), APFloat::semanticsPrecision(Sem));
+
+ if (OpInAbsRange(RHS, MaxConvertableCvt) &&
+ OpInAbsRange(LHS, MaxConvertableCvt))
+ return ISD::SINT_TO_FP;
+
+ return std::nullopt;
+}
+
/// Helper that combines an array of subvector ops as if they were the operands
/// of a ISD::CONCAT_VECTORS node, but may have come from another source (e.g.
/// ISD::INSERT_SUBVECTOR). The ops are assumed to be of the same type.
@@ -56106,11 +56138,52 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
break;
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT:
- if (!IsSplat && VT.is256BitVector() && Subtarget.hasInt256() &&
+ if (!IsSplat && VT.is256BitVector() &&
+ (Subtarget.hasInt256() || VT == MVT::v8i32) &&
(IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {
- return DAG.getNode(Op0.getOpcode(), DL, VT,
- ConcatSubOperand(VT, Ops, 0),
- ConcatSubOperand(VT, Ops, 1));
+ if (Subtarget.hasInt256())
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
+
+ // Without AVX2, see if we can cast the values to v8f32 and use fcmp.
+ // TODO: Handle v4f64 as well?
+ KnownBits KnownLHS(EltSizeInBits), KnownRHS(EltSizeInBits);
+ KnownLHS.One.setAllBits();
+ KnownRHS.One.setAllBits();
+ KnownLHS.Zero.setAllBits();
+ KnownRHS.Zero.setAllBits();
+ for (unsigned I = 0; I != NumOps; ++I) {
+ KnownBits LHS = DAG.computeKnownBits(Ops[I].getOperand(0));
+ KnownBits RHS = DAG.computeKnownBits(Ops[I].getOperand(1));
+ KnownLHS = KnownLHS.intersectWith(LHS);
+ KnownRHS = KnownRHS.intersectWith(RHS);
+ if (KnownLHS.isUnknown() && KnownRHS.isUnknown())
+ break;
+ }
+
+ ISD::CondCode ICC =
+ Op0.getOpcode() == X86ISD::PCMPEQ ? ISD::SETEQ : ISD::SETGT;
+ ISD::CondCode FCC =
+ Op0.getOpcode() == X86ISD::PCMPEQ ? ISD::SETOEQ : ISD::SETOGT;
+
+ MVT FpSVT = MVT::getFloatingPointVT(EltSizeInBits);
+ MVT FpVT = VT.changeVectorElementType(FpSVT);
+
+ if (std::optional<unsigned> CastOpc =
+ CastIntSETCCtoFP(FpVT, ICC, KnownLHS, KnownRHS)) {
+ SDValue LHS = ConcatSubOperand(VT, Ops, 0);
+ SDValue RHS = ConcatSubOperand(VT, Ops, 1);
+ LHS = DAG.getNode(*CastOpc, DL, FpVT, LHS);
+ RHS = DAG.getNode(*CastOpc, DL, FpVT, RHS);
+
+ bool IsAlwaysSignaling;
+ unsigned FSETCC =
+ translateX86FSETCC(FCC, LHS, RHS, IsAlwaysSignaling);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::CMPP, DL, FpVT, LHS, RHS,
+ DAG.getTargetConstant(FSETCC, DL, MVT::i8)));
+ }
}
break;
case ISD::CTPOP:
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 6255621d870e1..eef2b3db5d694 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -256,12 +256,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i32:
@@ -487,18 +484,12 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
-; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
-; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i32:
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index d2794df731b65..5c810797bd2b7 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -320,12 +320,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -613,20 +610,14 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
-; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
-; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [256,512,1024,2048,4096,8192,16384,32768]
-; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/cmpf-avx.ll b/llvm/test/CodeGen/X86/cmpf-avx.ll
index 39dce7b989509..e58295fff9855 100644
--- a/llvm/test/CodeGen/X86/cmpf-avx.ll
+++ b/llvm/test/CodeGen/X86/cmpf-avx.ll
@@ -2,25 +2,20 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
+; PR82242
define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
; X86-LABEL: cmp_eq_bitcast:
; X86: # %bb.0:
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
-; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
-; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp_eq_bitcast:
; X64: # %bb.0:
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
-; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
%and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
%cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
index 05854ff728a07..ab1922e3ad9a2 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
@@ -3432,36 +3432,36 @@ define <8 x i32> @ult_2_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_2_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_2_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_2_v8i32:
@@ -3535,36 +3535,35 @@ define <8 x i32> @ugt_2_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_3_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_3_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [3,3,3,3]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_3_v8i32:
@@ -3638,36 +3637,36 @@ define <8 x i32> @ult_3_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_3_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_3_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [3,3,3,3]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_3_v8i32:
@@ -3741,36 +3740,35 @@ define <8 x i32> @ugt_3_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_4_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_4_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4,4,4,4]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_4_v8i32:
@@ -3844,36 +3842,36 @@ define <8 x i32> @ult_4_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_4_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_4_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4,4,4,4]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0,4.0E+0,4.0E+0,4.0E+0,4.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_4_v8i32:
@@ -3947,36 +3945,35 @@ define <8 x i32> @ugt_4_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_5_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_5_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [5,5,5,5]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_5_v8i32:
@@ -4050,36 +4047,36 @@ define <8 x i32> @ult_5_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_5_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_5_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [5,5,5,5]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [5.0E+0,5.0E+0,5.0E+0,5.0E+0,5.0E+0,5.0E+0,5.0E+0,5.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_5_v8i32:
@@ -4153,36 +4150,35 @@ define <8 x i32> @ugt_5_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_6_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_6_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [6,6,6,6]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_6_v8i32:
@@ -4256,36 +4252,36 @@ define <8 x i32> @ult_6_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_6_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [6,6,6,6]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [6.0E+0,6.0E+0,6.0E+0,6.0E+0,6.0E+0,6.0E+0,6.0E+0,6.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_6_v8i32:
@@ -4359,36 +4355,35 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_7_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_7_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_7_v8i32:
@@ -4462,36 +4457,36 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_7_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [7.0E+0,7.0E+0,7.0E+0,7.0E+0,7.0E+0,7.0E+0,7.0E+0,7.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_7_v8i32:
@@ -4565,36 +4560,35 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_8_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_8_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_8_v8i32:
@@ -4668,36 +4662,36 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_8_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.0E+0,8.0E+0,8.0E+0,8.0E+0,8.0E+0,8.0E+0,8.0E+0,8.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_8_v8i32:
@@ -4771,36 +4765,35 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_9_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_9_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [9,9,9,9]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_9_v8i32:
@@ -4874,36 +4867,36 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_9_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [9,9,9,9]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0,9.0E+0,9.0E+0,9.0E+0,9.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_9_v8i32:
@@ -4977,36 +4970,35 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_10_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_10_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [10,10,10,10]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_10_v8i32:
@@ -5080,36 +5072,36 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_10_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [10,10,10,10]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+1,1.0E+1,1.0E+1,1.0E+1,1.0E+1,1.0E+1,1.0E+1,1.0E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_10_v8i32:
@@ -5183,36 +5175,35 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_11_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_11_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [11,11,11,11]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_11_v8i32:
@@ -5286,36 +5277,36 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_11_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [11,11,11,11]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.1E+1,1.1E+1,1.1E+1,1.1E+1,1.1E+1,1.1E+1,1.1E+1,1.1E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_11_v8i32:
@@ -5389,36 +5380,35 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_12_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_12_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [12,12,12,12]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_12_v8i32:
@@ -5492,36 +5482,36 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_12_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [12,12,12,12]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.2E+1,1.2E+1,1.2E+1,1.2E+1,1.2E+1,1.2E+1,1.2E+1,1.2E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_12_v8i32:
@@ -5591,40 +5581,39 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) {
%4 = sext <8 x i1> %3 to <8 x i32>
ret <8 x i32> %4
}
-
-define <8 x i32> @ult_13_v8i32(<8 x i32> %0) {
-; AVX1-LABEL: ult_13_v8i32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+
+define <8 x i32> @ult_13_v8i32(<8 x i32> %0) {
+; AVX1-LABEL: ult_13_v8i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [13,13,13,13]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_13_v8i32:
@@ -5698,36 +5687,36 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_13_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [13,13,13,13]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.3E+1,1.3E+1,1.3E+1,1.3E+1,1.3E+1,1.3E+1,1.3E+1,1.3E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_13_v8i32:
@@ -5801,36 +5790,35 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_14_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_14_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [14,14,14,14]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_14_v8i32:
@@ -5904,36 +5892,36 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_14_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [14,14,14,14]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.4E+1,1.4E+1,1.4E+1,1.4E+1,1.4E+1,1.4E+1,1.4E+1,1.4E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_14_v8i32:
@@ -6007,36 +5995,35 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_15_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_15_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_15_v8i32:
@@ -6110,36 +6097,36 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_15_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_15_v8i32:
@@ -6213,36 +6200,35 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_16_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_16_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [16,16,16,16]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_16_v8i32:
@@ -6316,36 +6302,36 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_16_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [16,16,16,16]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.6E+1,1.6E+1,1.6E+1,1.6E+1,1.6E+1,1.6E+1,1.6E+1,1.6E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_16_v8i32:
@@ -6419,36 +6405,35 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_17_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_17_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [17,17,17,17]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_17_v8i32:
@@ -6522,36 +6507,36 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_17_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [17,17,17,17]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.7E+1,1.7E+1,1.7E+1,1.7E+1,1.7E+1,1.7E+1,1.7E+1,1.7E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_17_v8i32:
@@ -6625,36 +6610,35 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_18_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_18_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [18,18,18,18]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_18_v8i32:
@@ -6728,36 +6712,36 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_18_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [18,18,18,18]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.8E+1,1.8E+1,1.8E+1,1.8E+1,1.8E+1,1.8E+1,1.8E+1,1.8E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_18_v8i32:
@@ -6831,36 +6815,35 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_19_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_19_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [19,19,19,19]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_19_v8i32:
@@ -6934,36 +6917,36 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_19_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [19,19,19,19]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.9E+1,1.9E+1,1.9E+1,1.9E+1,1.9E+1,1.9E+1,1.9E+1,1.9E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_19_v8i32:
@@ -7037,36 +7020,35 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_20_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_20_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [20,20,20,20]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_20_v8i32:
@@ -7140,36 +7122,36 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_20_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [20,20,20,20]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.0E+1,2.0E+1,2.0E+1,2.0E+1,2.0E+1,2.0E+1,2.0E+1,2.0E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_20_v8i32:
@@ -7243,36 +7225,35 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_21_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_21_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [21,21,21,21]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_21_v8i32:
@@ -7346,36 +7327,36 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_21_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [21,21,21,21]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.1E+1,2.1E+1,2.1E+1,2.1E+1,2.1E+1,2.1E+1,2.1E+1,2.1E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_21_v8i32:
@@ -7449,36 +7430,35 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_22_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_22_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [22,22,22,22]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_22_v8i32:
@@ -7552,36 +7532,36 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_22_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [22,22,22,22]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.2E+1,2.2E+1,2.2E+1,2.2E+1,2.2E+1,2.2E+1,2.2E+1,2.2E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_22_v8i32:
@@ -7655,36 +7635,35 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_23_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_23_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [23,23,23,23]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_23_v8i32:
@@ -7758,36 +7737,36 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_23_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [23,23,23,23]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.3E+1,2.3E+1,2.3E+1,2.3E+1,2.3E+1,2.3E+1,2.3E+1,2.3E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_23_v8i32:
@@ -7861,36 +7840,35 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_24_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_24_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [24,24,24,24]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_24_v8i32:
@@ -7964,36 +7942,36 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_24_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [24,24,24,24]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.4E+1,2.4E+1,2.4E+1,2.4E+1,2.4E+1,2.4E+1,2.4E+1,2.4E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_24_v8i32:
@@ -8067,36 +8045,35 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_25_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_25_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [25,25,25,25]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_25_v8i32:
@@ -8170,36 +8147,36 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_25_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [25,25,25,25]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.5E+1,2.5E+1,2.5E+1,2.5E+1,2.5E+1,2.5E+1,2.5E+1,2.5E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_25_v8i32:
@@ -8273,36 +8250,35 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_26_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_26_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [26,26,26,26]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_26_v8i32:
@@ -8376,36 +8352,36 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_26_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [26,26,26,26]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.6E+1,2.6E+1,2.6E+1,2.6E+1,2.6E+1,2.6E+1,2.6E+1,2.6E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_26_v8i32:
@@ -8479,36 +8455,35 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_27_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_27_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [27,27,27,27]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_27_v8i32:
@@ -8582,36 +8557,36 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_27_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [27,27,27,27]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.7E+1,2.7E+1,2.7E+1,2.7E+1,2.7E+1,2.7E+1,2.7E+1,2.7E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_27_v8i32:
@@ -8685,36 +8660,35 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_28_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_28_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [28,28,28,28]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_28_v8i32:
@@ -8788,36 +8762,36 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_28_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [28,28,28,28]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.8E+1,2.8E+1,2.8E+1,2.8E+1,2.8E+1,2.8E+1,2.8E+1,2.8E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_28_v8i32:
@@ -8891,36 +8865,35 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_29_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_29_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [29,29,29,29]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_29_v8i32:
@@ -8994,36 +8967,36 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_29_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [29,29,29,29]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.9E+1,2.9E+1,2.9E+1,2.9E+1,2.9E+1,2.9E+1,2.9E+1,2.9E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_29_v8i32:
@@ -9097,36 +9070,35 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_30_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_30_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [30,30,30,30]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_30_v8i32:
@@ -9200,36 +9172,36 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) {
define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ugt_30_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [30,30,30,30]
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+1,3.0E+1,3.0E+1,3.0E+1,3.0E+1,3.0E+1,3.0E+1,3.0E+1]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ugt_30_v8i32:
@@ -9303,36 +9275,35 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) {
define <8 x i32> @ult_31_v8i32(<8 x i32> %0) {
; AVX1-LABEL: ult_31_v8i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
-; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5
-; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5
+; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [31,31,31,31]
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ult_31_v8i32:
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 85c1e25c29ed5..dc9e69137a8a7 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -2419,12 +2419,9 @@ define <8 x i32> @load_sext_8i1_to_8i32(ptr%ptr) {
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_sext_8i1_to_8i32:
diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
index 9fd5b9010b0cf..44bd58f9d4121 100644
--- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -679,15 +679,16 @@ define void @PR54171(ptr %mask0, ptr %mask1, i64 %i) {
; AVX1-NEXT: # %bb.1: # %if.then
; AVX1-NEXT: vmovd %edx, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
-; AVX1-NEXT: vmovdqa %xmm1, 16(%rdi)
-; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa %xmm0, (%rsi)
-; AVX1-NEXT: vmovdqa %xmm1, 16(%rsi)
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [0.0E+0,0.0E+0,1.0E+0,1.0E+0,2.0E+0,2.0E+0,3.0E+0,3.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
+; AVX1-NEXT: vmovaps %ymm1, (%rdi)
+; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,4.0E+0,5.0E+0,5.0E+0,6.0E+0,6.0E+0,7.0E+0,7.0E+0]
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovaps %ymm0, (%rsi)
; AVX1-NEXT: .LBB18_2: # %if.end
+; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR54171:
More information about the llvm-commits
mailing list